In [1]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split
import tensorflow as tf
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, BaggingClassifier, GradientBoostingClassifier
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

In [2]:
svc = SVC(kernel='rbf')
dt = DecisionTreeClassifier(criterion='gini', max_features=6)
rf = RandomForestClassifier(n_estimators=100, criterion='gini', max_features= 6, class_weight='balanced')

In [3]:
os.chdir(r'C:\Users\shoun\Coding\Datasets')

In [4]:
df10 = pd.read_csv('Fish.csv')

In [5]:
df10

Unnamed: 0,Species,Weight,Length1,Length2,Length3,Height,Width
0,Bream,242.0,23.2,25.4,30.0,11.5200,4.0200
1,Bream,290.0,24.0,26.3,31.2,12.4800,4.3056
2,Bream,340.0,23.9,26.5,31.1,12.3778,4.6961
3,Bream,363.0,26.3,29.0,33.5,12.7300,4.4555
4,Bream,430.0,26.5,29.0,34.0,12.4440,5.1340
...,...,...,...,...,...,...,...
154,Smelt,12.2,11.5,12.2,13.4,2.0904,1.3936
155,Smelt,13.4,11.7,12.4,13.5,2.4300,1.2690
156,Smelt,12.2,12.1,13.0,13.8,2.2770,1.2558
157,Smelt,19.7,13.2,14.3,15.2,2.8728,2.0672


In [6]:
def label_type(df,arg):
    df[arg] = df.Species.astype('category')
    df['{} Code'.format(arg)] = df.Species.cat.codes

In [7]:
label_type(df10,'Species')

In [8]:
df10 = df10.sample(frac=1)

In [9]:
df10[['Weight', 'Length1', 'Length2', 'Length3', 'Height', 'Width']]

Unnamed: 0,Weight,Length1,Length2,Length3,Height,Width
115,690.0,34.6,37.0,39.3,10.5717,6.3666
43,150.0,20.4,22.0,24.7,5.8045,3.7544
54,390.0,29.5,31.7,35.0,9.4850,5.3550
142,1600.0,56.0,60.0,64.0,9.6000,6.1440
23,680.0,31.8,35.0,40.6,15.4686,6.1306
...,...,...,...,...,...,...
4,430.0,26.5,29.0,34.0,12.4440,5.1340
62,60.0,14.3,15.5,17.4,6.5772,2.3142
79,80.0,17.2,19.0,20.2,5.6358,3.0502
89,135.0,20.0,22.0,23.5,5.8750,3.5250


In [10]:
df10.columns

Index(['Species', 'Weight', 'Length1', 'Length2', 'Length3', 'Height', 'Width',
       'Species Code'],
      dtype='object')

In [11]:
df10.Species.unique()

['Perch', 'Roach', 'Pike', 'Bream', 'Smelt', 'Parkki', 'Whitefish']
Categories (7, object): ['Perch', 'Roach', 'Pike', 'Bream', 'Smelt', 'Parkki', 'Whitefish']

In [12]:
scaler = StandardScaler()

In [13]:
df10[['Weight', 'Length1', 'Length2', 'Length3', 'Height', 'Width']] = scaler.fit_transform(df10[['Weight', 'Length1', 'Length2', 'Length3', 'Height', 'Width']])

In [14]:
df10.head(2)

Unnamed: 0,Species,Weight,Length1,Length2,Length3,Height,Width,Species Code
115,Perch,0.817354,0.83822,0.803577,0.697527,0.374635,1.159846,2
43,Roach,-0.695883,-0.586773,-0.600578,-0.563956,-0.741098,-0.394578,4


In [24]:
features = df10[['Weight', 'Length1', 'Length2', 'Length3', 'Height', 'Width']].copy()
target = df10['Species Code'].copy()

In [25]:
xtrain, xtest,ytrain, ytest = train_test_split(features, target, test_size = 0.2)

In [26]:
xtrain.shape, ytrain.shape

((127, 6), (127,))

In [27]:
#Initialising the algorithms
logr = LogisticRegression(random_state=8)
svc = SVC(kernel='rbf')
dt = DecisionTreeClassifier(criterion='gini', max_features=6)
rf = RandomForestClassifier(n_estimators=100, criterion='gini', max_features= 6, class_weight='balanced')
knn = KNeighborsClassifier(n_neighbors= 4)
gradboost = GradientBoostingClassifier(n_estimators=100, learning_rate= 0.01, criterion= 'mse')

In [28]:
alg = [logr, svc, dt, rf, knn, gradboost]

In [47]:
def ML (alg):
    lst = []
    for i in alg:
        i.fit(features, target)
        ypred = i.predict(xtest)
        acc = accuracy_score(ytest, ypred)
        a = '{} = {}'.format(str(i), acc)
        lst.append(a)
    return lst

In [48]:
ML(alg)

['LogisticRegression(random_state=8) = 1.0',
 'SVC() = 0.9375',
 'DecisionTreeClassifier(max_features=6) = 0.78125',
 "RandomForestClassifier(class_weight='balanced', max_features=6) = 0.78125",
 'KNeighborsClassifier(n_neighbors=4) = 0.875',
 "GradientBoostingClassifier(criterion='mse', learning_rate=0.01) = 0.78125"]

In [68]:
aboost = AdaBoostClassifier(base_estimator= gradboost, algorithm='SAMME', n_estimators=100)
bag = BaggingClassifier(base_estimator= gradboost, n_estimators=250,)

In [69]:
ML([aboost, bag])

["AdaBoostClassifier(algorithm='SAMME',\n                   base_estimator=GradientBoostingClassifier(criterion='mse',\n                                                             learning_rate=0.01),\n                   n_estimators=100) = 0.78125",
 "BaggingClassifier(base_estimator=GradientBoostingClassifier(criterion='mse',\n                                                            learning_rate=0.01),\n                  n_estimators=250) = 0.8125"]