In [11]:
from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.datasets import load_iris
from sklearn.datasets import load_wine
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

In [16]:
# iris
# 0: setosa, 1: versicolor, 2: virginica

# wine
# 0: not so good quality, 1: good quality, 2: very good quality

In [17]:
# dataset preparation
D=load_iris()
data_X=D.data
print(len(data_X))
data_X=pd.DataFrame(data_X,columns=D.feature_names)
#print(data_X)
data_y=D.target
split_size=5
class_number=len(D.target_names)

X_train, X_test, y_train, y_test =train_test_split(data_X, data_y, test_size=0.33,shuffle=True) 
X_train, X_test, y_train, y_test =np.asarray(X_train),np.asarray(X_test),np.asarray(y_train),np.asarray(y_test)
k_fold=KFold(n_splits=split_size,shuffle=True)

TRAIN_INDICES=[]
VALIDATION_INDICES=[]

for train_indices, validation_indices in k_fold.split(X_train):
    TRAIN_INDICES.append(train_indices)
    VALIDATION_INDICES.append(validation_indices)

150


In [19]:
# Logistic Regression
LR=LogisticRegression(C=1,solver='lbfgs',multi_class='multinomial',max_iter=10000) 

# C is a parameter for regularization. Regularization can be stronger as C ->+0

train_score=0
validation_score=0

for i in range(split_size):
    train=TRAIN_INDICES[i]
    valid=VALIDATION_INDICES[i]
    LR.fit(X_train[train],y_train[train])
    
    train_score+=LR.score(X_train[train],y_train[train])
    validation_score+=LR.score(X_train[valid],y_train[valid])
    
print("train score in CV: ",train_score/split_size)
print()
print("validation score in CV: ",validation_score/split_size)
print("End of CV")
print()

train_pred=LR.predict(X_train)
train_score=LR.score(X_train,y_train)
    
test_pred=LR.predict(X_test)
test_score=LR.score(X_test,y_test)


print("train score: ",train_score)
print()
print("test_score: ",test_score)
print('\n')
print('Confusion Matrix for train')
print(confusion_matrix(y_train,train_pred,range(class_number)))
print()
print('Confusion Matrix for test')
print(confusion_matrix(y_test,test_pred,range(class_number)))

train score in CV:  0.9875

validation score in CV:  0.9800000000000001
End of CV

train score:  0.99

test_score:  0.94


Confusion Matrix for train
[[37  0  0]
 [ 0 30  1]
 [ 0  0 32]]

Confusion Matrix for test
[[13  0  0]
 [ 0 16  3]
 [ 0  0 18]]


In [22]:
# Linear Dicriminat Analysis
# Cuz there's no hyperparameter, no need for CV

LDA=LinearDiscriminantAnalysis()


LR.fit(X_train,y_train)
    
train_pred=LR.predict(X_train)
train_score=LR.score(X_train,y_train)
    
test_pred=LR.predict(X_test)
test_score=LR.score(X_test,y_test)
    
print("train score: ",train_score)
print("test score: ",test_score)
print('\n')
print('Confusion Matrix for train')
print(confusion_matrix(y_train,train_pred,range(class_number)))
print()
print('Confusion Matrix for test')
print(confusion_matrix(y_test,test_pred,range(class_number)))

train score:  0.99
test score:  0.96


Confusion Matrix for train
[[37  0  0]
 [ 0 30  1]
 [ 0  0 32]]

Confusion Matrix for test
[[13  0  0]
 [ 0 17  2]
 [ 0  0 18]]


In [27]:
# Neural Network
MLPC=MLPClassifier(hidden_layer_sizes=(5,),activation='relu',max_iter=10000,alpha=0)

train_score=0
validation_score=0

for i in range(split_size):
    train=TRAIN_INDICES[i]
    valid=VALIDATION_INDICES[i]
    MLPC.fit(X_train[train],y_train[train])
    
    train_score+=MLPC.score(X_train[train],y_train[train])
    validation_score+=MLPC.score(X_train[valid],y_train[valid])
    
print("train score in CV: ",train_score/split_size)
print()
print("validation score in CV: ",validation_score/split_size)
print("End of CV")
print()

train_pred=MLPC.predict(X_train)
train_score=MLPC.score(X_train,y_train)
    
test_pred=MLPC.predict(X_test)
test_score=MLPC.score(X_test,y_test)


print("train score: ",train_score)
print()
print("test_score: ",test_score)
print('\n')
print('Confusion Matrix for train')
print(confusion_matrix(y_train,train_pred,range(class_number)))
print()
print('Confusion Matrix for test')
print(confusion_matrix(y_test,test_pred,range(class_number)))

train score in CV:  0.9475

validation score in CV:  0.9199999999999999
End of CV

train score:  1.0

test_score:  0.92


Confusion Matrix for train
[[37  0  0]
 [ 0 31  0]
 [ 0  0 32]]

Confusion Matrix for test
[[13  0  0]
 [ 0 15  4]
 [ 0  0 18]]
