<a href="https://colab.research.google.com/github/nisanuro/CNG562-Assignment-2/blob/naive/CNG562_Assignment2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import matplotlib.pyplot as plt  
from sklearn.model_selection import train_test_split, KFold, StratifiedKFold, cross_val_score
from sklearn import metrics, datasets, preprocessing
%matplotlib inline
from sklearn.naive_bayes import GaussianNB, BernoulliNB, CategoricalNB, ComplementNB, MultinomialNB

In [0]:
def randomOneHoldout(X_train, Y_train):

  x_train, x_test, y_train, y_test = train_test_split(X_train, Y_train, test_size=0.2, random_state=0)

  return x_train, x_test, y_train, y_test

In [0]:
def stratifiedOneHoldout(X_train, Y_train):
  
  x_train, x_test, y_train, y_test = train_test_split(X_train, Y_train, test_size=0.2, random_state=0, stratify=Y_train)
  
  return x_train, x_test, y_train, y_test

In [0]:
def displayAccuracy(X, Y):
    
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0)
    
    NaiveBayes(X_train, Y_train)

In [0]:
def NaiveBayes(X_train, Y_train):

    gaussian = GaussianNB()
    categorical = CategoricalNB()
    bernoulli = BernoulliNB()
    complement = ComplementNB()
    multinomial = MultinomialNB()

    models = [gaussian, categorical, bernoulli, complement, multinomial]

    # 5-Fold
    print("\n5-Fold")
    for i in models:
        cv = cross_val_score(i, X_train, Y_train, cv=5, scoring='accuracy')
        print(str(i).split('N')[0] + " Naive Bayes Accuracy: ", cv.mean())

    # 10-Fold
    print("\n10-Fold")
    for i in models:
        cv = cross_val_score(i, X_train, Y_train, cv=10, scoring='accuracy')
        print(str(i).split('N')[0] + " Naive Bayes Accuracy: ", cv.mean())

    # Random One Holdout
    x_train, x_test, y_train, y_test = randomOneHoldout(X_train, Y_train)

    print("\nRandom One Holdout")
    for i in models:
        i.fit(x_train, y_train)
        y_pred = i.predict(x_test)
        print(str(i).split('N')[0] + " Naive Bayes Accuracy: ", 1 - metrics.mean_squared_error(y_test, y_pred))


    # Stratified One Holdout
    x_train, x_test, y_train, y_test = stratifiedOneHoldout(X_train, Y_train)
    
    print("\nStratified One Holdout")
    for i in models:
        i.fit(x_train, y_train)
        y_pred = i.predict(x_test)
        print(str(i).split('N')[0] + " Naive Bayes Accuracy: ", 1 - metrics.mean_squared_error(y_test, y_pred))
                         

                

In [60]:
if __name__ == '__main__':

  iris = datasets.load_iris()
  
  X = iris.data
  Y = iris.target
  
  # L1 normalization
  l1_norm = preprocessing.normalize(X, norm="l1")
  # Mean removal
  mean_removal = preprocessing.scale(X)

  '''#mean & standart deviation before mean removal 
  print(X.mean(axis=0))
  print(X.std(axis=0))

  #mean & standart deviation after mean removal 
  print(mean_removal.mean(axis=0))
  print(mean_removal.std(axis=0))'''

  #Displaying result according to each type of methods and regression model
  print("\nRaw: ")
  displayAccuracy(X,Y)
  print("\nL1 Normalization: ")
 # displayAccuracy(l1_norm,Y)
  print("\nMean Removal: ")
 # displayAccuracy(mean_removal,Y)


Raw: 

5-Fold
Gaussian Naive Bayes Accuracy:  0.95
Categorical Naive Bayes Accuracy:  0.9333333333333333
Bernoulli Naive Bayes Accuracy:  0.36666666666666664
Complement Naive Bayes Accuracy:  0.6916666666666667
Multinomial Naive Bayes Accuracy:  0.7166666666666666

10-Fold
Gaussian Naive Bayes Accuracy:  0.9499999999999998
Categorical Naive Bayes Accuracy:  0.9166666666666666
Bernoulli Naive Bayes Accuracy:  0.3666666666666667
Complement Naive Bayes Accuracy:  0.6916666666666667
Multinomial Naive Bayes Accuracy:  0.7083333333333334

Random One Holdout
Gaussian Naive Bayes Accuracy:  0.875
Categorical Naive Bayes Accuracy:  0.875
Bernoulli Naive Bayes Accuracy:  -0.625
Complement Naive Bayes Accuracy:  0.7083333333333333
Multinomial Naive Bayes Accuracy:  0.7083333333333333

Stratified One Holdout
Gaussian Naive Bayes Accuracy:  0.9166666666666666
Categorical Naive Bayes Accuracy:  0.9583333333333334
Bernoulli Naive Bayes Accuracy:  -0.625
Complement Naive Bayes Accuracy:  0.7083333333

In [61]:
X_train, X_test, Y_train, Y_test = train_test_split(l1_norm, Y, test_size=0.2, random_state=0)
stratifiedOneHoldout(X_train, Y_train)


(array([[0.52083333, 0.33333333, 0.125     , 0.02083333],
        [0.44055944, 0.16083916, 0.30769231, 0.09090909],
        [0.38922156, 0.17964072, 0.31137725, 0.11976048],
        [0.37142857, 0.17142857, 0.33142857, 0.12571429],
        [0.37113402, 0.18556701, 0.31443299, 0.12886598],
        [0.39263804, 0.16564417, 0.32515337, 0.11656442],
        [0.3986014 , 0.1958042 , 0.31468531, 0.09090909],
        [0.37869822, 0.16568047, 0.33136095, 0.12426036],
        [0.5       , 0.34042553, 0.13829787, 0.0212766 ],
        [0.37341772, 0.18987342, 0.32278481, 0.11392405],
        [0.38121547, 0.17679558, 0.31491713, 0.12707182],
        [0.40425532, 0.21276596, 0.29787234, 0.08510638],
        [0.52380952, 0.33333333, 0.12380952, 0.01904762],
        [0.5046729 , 0.31775701, 0.1588785 , 0.01869159],
        [0.38461538, 0.19230769, 0.30769231, 0.11538462],
        [0.3583815 , 0.19653179, 0.31213873, 0.13294798],
        [0.37419355, 0.17419355, 0.32903226, 0.12258065],
        [0.504

In [0]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=0, stratify=Y)

In [0]:
Train_x, TrainDev_x, Train_y, TrainDev_y = train_test_split(X_train, Y_train, test_size=0.2, random_state=0, stratify=Y_train)
Dev_x, Test_x, Dev_y, Test_y = train_test_split(X_test, Y_test, test_size=0.5, random_state=0, stratify=Y_test)