<a href="https://colab.research.google.com/github/nisanuro/CNG562-Assignment-1/blob/master/CNG562_Assignment1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import matplotlib.pyplot as plt  
from sklearn.model_selection import train_test_split, KFold, StratifiedKFold, cross_val_score
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn import metrics, datasets, preprocessing

%matplotlib inline

In [0]:
def kFold(foldNumber, X_train, Y_train):
  #creating cross validation method with according to foldNumber
  kf = KFold(n_splits=foldNumber, shuffle=False)
  
  #creating both linear & logistic regression models
  logReg = LogisticRegression(solver='liblinear', multi_class='ovr')
  linReg = LinearRegression()
  
  #getting cross validation score according to logistic & linear
  cv_result_log = cross_val_score(logReg, X_train, Y_train, cv=kf, scoring='accuracy')
  cv_result_lin = cross_val_score(linReg, X_train, Y_train, cv=kf, scoring='neg_mean_squared_error')

  #displaying results
  print(str(foldNumber) + "Fold")
  print("Logistic Regression Accuracy: ", cv_result_log.mean())
  print("Linear Regression Accuracy: ", 1 + cv_result_lin.mean())

In [0]:
def randomOneHoldout(X_train, Y_train):
  #splitting dataset as %80 train %20 test
  x_train, x_test, y_train, y_test = train_test_split(X_train, Y_train, test_size=0.2, random_state=1)
  
  #creating both linear & logistic regression models
  logReg = LogisticRegression(solver='liblinear', multi_class='ovr')
  linReg = LinearRegression()

  #training the models
  logReg.fit(x_train, y_train)
  linReg.fit(x_train, y_train)

  #predicting values 
  y_pred_log = logReg.predict(x_test)
  y_pred_lin = linReg.predict(x_test)
  
  #displaying results
  print("Random One Hold Out")
  print("Logistic Regression Accuracy: ", 1 - metrics.mean_squared_error(y_test, y_pred_log))
  print("Linear Regression Accuracy: ", 1 - metrics.mean_squared_error(y_test, y_pred_lin))


In [0]:
def stratifiedOneHoldout(X_train, Y_train):
  #splitting dataset as %80 train %20 test
  x_train, x_test, y_train, y_test = train_test_split(X_train, Y_train, test_size=0.2, random_state=1, stratify=Y_train)
  
  #creating both linear & logistic regression models
  logReg = LogisticRegression(solver='liblinear', multi_class='ovr')
  linReg = LinearRegression()

  #training the models
  logReg.fit(x_train, y_train)
  linReg.fit(x_train, y_train)

  #predicting values 
  y_pred_log = logReg.predict(x_test)
  y_pred_lin = linReg.predict(x_test)
  
  #displaying results
  print("Stratified")
  print("Logistic Regression Accuracy: ", 1 - metrics.mean_squared_error(y_test, y_pred_log))
  print("Linear Regression Accuracy: ", 1 - metrics.mean_squared_error(y_test, y_pred_lin))

In [0]:
def displayAccuracy(X, Y):
    #splitting dataset as %80 train %20 test
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0)
    
    #running kFold accuracy method as 5Fold
    kFold(5, X_train, Y_train)
    #running kFold accuracy method as 10Fold
    kFold(10, X_train, Y_train)
    #running random one holdout method
    randomOneHoldout(X_train, Y_train)
    #running strafied one holdout method
    stratifiedOneHoldout(X_train, Y_train)

In [0]:
#iris_data.mean(axis=0)
#iris_data.std(axis=0)

In [0]:
#stand_iris_data = preprocessing.scale(iris_data)

In [0]:
#stand_iris_data.mean(axis=0)
#stand_iris_data.std(axis=0)

In [38]:
if __name__ == '__main__':
  #loading raw iris dataset
  iris = datasets.load_iris()
  #loading raw iris data from dataset
  X = iris.data
  #loading iris titles from dataset 
  Y = iris.target
  
  #normalize raw data using L1 normalization technique
  l1_norm = preprocessing.normalize(X, norm="l1")
  #normalize raw data using mean removal technique
  mean_removal = preprocessing.scale(X)
  
  #mean & standart deviation before mean removal 
  print(X.mean(axis=0))
  print(X.std(axis=0))

  #mean & standart deviation after mean removal 
  print(mean_removal.mean(axis=0))
  print(mean_removal.std(axis=0))

  #Displaying result according to each type of methods and regression model
  print("\nRaw: ")
  displayAccuracy(X,Y)
  print("\nL1 Normalization: ")
  displayAccuracy(l1_norm,Y)
  print("\nMean Removal: ")
  displayAccuracy(mean_removal,Y)



[5.84333333 3.05733333 3.758      1.19933333]
[0.82530129 0.43441097 1.75940407 0.75969263]
[-1.69031455e-15 -1.84297022e-15 -1.69864123e-15 -1.40924309e-15]
[1. 1. 1. 1.]

Raw: 
5Fold
Logistic Regression Accuracy:  0.9333333333333333
Linear Regression Accuracy:  0.9497273362310497
10Fold
Logistic Regression Accuracy:  0.9333333333333332
Linear Regression Accuracy:  0.9502722839017427
Random One Hold Out
Logistic Regression Accuracy:  0.9583333333333334
Linear Regression Accuracy:  0.9717590220680964
Stratified
Logistic Regression Accuracy:  0.9166666666666666
Linear Regression Accuracy:  0.9519051610594429

L1 Normalization: 
5Fold
Logistic Regression Accuracy:  0.6916666666666667
Linear Regression Accuracy:  0.9185691833496094
10Fold
Logistic Regression Accuracy:  0.6916666666666667
Linear Regression Accuracy:  0.9181168874104818
Random One Hold Out
Logistic Regression Accuracy:  0.75
Linear Regression Accuracy:  0.9588673909505209
Stratified
Logistic Regression Accuracy:  0.70833333

In [39]:
  X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0)
 
  linReg = LinearRegression()
  linReg.fit(X_train, Y_train)
  Y_pred_lin = linReg.predict([[6, 3, 5, 1.5]])  
  #print(" Final Linear Regression Accuracy: ", 1 - metrics.mean_squared_error([0, 1, 2]], Y_pred_lin))
 
  '''kf = KFold(n_splits=10, shuffle=False)  
  logReg = LogisticRegression(solver='liblinear', multi_class='ovr')  
  cv_result_log = cross_val_score(logReg, l1_norm, Y, cv=kf, scoring='accuracy')
  print("10Fold")
  print("Final Logistic Regression Accuracy: ", cv_result_log.mean())'''



'kf = KFold(n_splits=10, shuffle=False)  \nlogReg = LogisticRegression(solver=\'liblinear\', multi_class=\'ovr\')  \ncv_result_log = cross_val_score(logReg, l1_norm, Y, cv=kf, scoring=\'accuracy\')\nprint("10Fold")\nprint("Final Logistic Regression Accuracy: ", cv_result_log.mean())'

In [40]:
Y_pred_lin

array([1.46624008])