In [None]:
##Studying Data Science Through Iris Dataset

In [None]:
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline

In [None]:
#1. Data Extraction

import pandas as pd
import numpy as np

irisDataset = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data', 
                          names=['sepal length','sepal width','petal length','petal width','class'])

In [None]:
#2. Data Exploration

irisDataset.head(10)

In [None]:
irisDataset.describe()

In [None]:
print(irisDataset.info())

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

pg = sns.PairGrid(irisDataset, hue='class')
pg.map_lower(sns.scatterplot)
pg.map_diag(plt.hist)
pg.map_upper(sns.kdeplot)
plt.show()

In [None]:
cmap = sns.diverging_palette(10, 220, as_cmap=True)
sns.heatmap(irisDataset.corr(), annot=True, cmap=cmap)
plt.show()

In [None]:
#3. Data Preparation

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

ss = StandardScaler().fit(irisDataset.iloc[:,:-1])

x = irisDataset.iloc[:,:-1]
y = irisDataset.iloc[:,-1]

X_train, X_test, y_train, y_test = train_test_split(x,y, train_size=0.75, random_state=0)

In [None]:
#4. Data Modeling & Model Evaluation
#4.1 Logistic Regression

from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LogisticRegressionCV
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

lr = LogisticRegression()
lr.fit(X_train, y_train)
lrPredict = lr.predict(X_test)

lrcv = LogisticRegressionCV(cv=3)
lrcv.fit(x, y)
lrcvPredict = lrcv.predict(x)
lrcvScore = lrcv.score(x,y)

print("Logistic Regression:", accuracy_score(lrPredict, y_test))
print( confusion_matrix(lrPredict, y_test))
print("Logistic Regression CV:", lrcvScore)
print( confusion_matrix(lrcvPredict, y))

In [None]:
#4.2 Tree classifier

from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.model_selection import cross_validate
from sklearn.model_selection import cross_val_score

dt = DecisionTreeClassifier(random_state=random_state)
rf = RandomForestClassifier(random_state=random_state)
et = ExtraTreesClassifier(random_state=random_state)

print("Decision Tree: ", cross_validate(dt, x, y, cv=5)['test_score'])
print("RandomForest: ", cross_val_score(rf, x, y, cv=5))
print("ExtraTreesClassifier: ", cross_val_score(et, x, y, cv=5) )

In [None]:
#4.3 KNN

from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score

gs = GridSearchCV(KNeighborsClassifier(),param_grid={ 'n_neighbors' : [3,4,5] },cv=5)
gs.fit(x,y)
knn = gs.best_estimator_
knnPredict = knn.predict(x)
print("KNN: ", accuracy_score(knnPredict,y), "\nBest Params: ", knn.get_params())

In [None]:
#4.4 Naive Bayes

from sklearn.naive_bayes import GaussianNB
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score

print("Gaussian: ", cross_val_score(GaussianNB(),x,y,cv=5))

In [None]:
#4.5 SVM

from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

params = { 'svc__C': [1.0, 1.1, 1.2] }
pipe = Pipeline([('normalize', StandardScaler()) ,('pca', PCA() ), ('svc', SVC())])
svcGs = GridSearchCV(estimator=pipe, param_grid=params,cv=3)
svcGs.fit(x,y)
pd.DataFrame(svcGs.cv_results_) 

In [None]:
#4.6 Neural Network

from sklearn.neural_network import MLPClassifier

params = {}
pipe = Pipeline([ ("neural", MLPClassifier()) ])
gsMLP = GridSearchCV(estimator=pipe, param_grid=params, cv=3)
gsMLP.fit(x,y)
pd.DataFrame(gsMLP.cv_results_)

In [None]:
#4.7 Deep Learning
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.optimizers import SGD
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

lb = LabelEncoder().fit(y)

model = Sequential()
model.add(Dense(10, input_dim = 4 , activation = 'relu'))
model.add(Dense(8, activation = 'relu'))
model.add(Dense(6, activation = 'relu'))
model.add(Dense(3, activation = 'softmax'))

model.compile(loss = 'categorical_crossentropy' , optimizer = 'adam' , metrics = ['accuracy'] )
model.summary()

In [None]:
model.fit(X_train, to_categorical(lb.transform(y_train.values)), epochs=110)
model.evaluate(X_test, to_categorical(lb.transform(y_test.values)))