In [62]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.naive_bayes import GaussianNB, MultinomialNB, BernoulliNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.preprocessing import OneHotEncoder, LabelEncoder, MinMaxScaler


In [7]:
df = sns.load_dataset('iris')
X = df.drop('species', axis=1)
y = df['species']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [9]:
model = GaussianNB()

model.fit(X_train, y_train)



In [11]:
y_pred = model.predict(X_test)

print('Accuracy: ', accuracy_score(y_test, y_pred))
print('Confusion Matrix: \n', confusion_matrix(y_test, y_pred))
print('Classification Report: \n', classification_report(y_test, y_pred))

Accuracy:  0.9777777777777777
Confusion Matrix: 
 [[19  0  0]
 [ 0 12  1]
 [ 0  0 13]]
Classification Report: 
               precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        19
  versicolor       1.00      0.92      0.96        13
   virginica       0.93      1.00      0.96        13

    accuracy                           0.98        45
   macro avg       0.98      0.97      0.97        45
weighted avg       0.98      0.98      0.98        45



In [13]:
model2 = MultinomialNB()

model2.fit(X_train, y_train)


In [14]:
y_pred2 = model2.predict(X_test)

print('Accuracy: ', accuracy_score(y_test, y_pred2))
print('Confusion Matrix: \n', confusion_matrix(y_test, y_pred2))
print('Classification Report: \n', classification_report(y_test, y_pred2))

Accuracy:  0.9555555555555556
Confusion Matrix: 
 [[19  0  0]
 [ 0 12  1]
 [ 0  1 12]]
Classification Report: 
               precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        19
  versicolor       0.92      0.92      0.92        13
   virginica       0.92      0.92      0.92        13

    accuracy                           0.96        45
   macro avg       0.95      0.95      0.95        45
weighted avg       0.96      0.96      0.96        45



In [19]:
y_train

81     versicolor
133     virginica
137     virginica
75     versicolor
109     virginica
          ...    
71     versicolor
106     virginica
14         setosa
92     versicolor
102     virginica
Name: species, Length: 105, dtype: object

In [36]:
#convert series to a single column
y_train2 = y_train.to_frame()
y_test2 = y_test.to_frame()

In [64]:
ohe = LabelEncoder()
y_train_ohe = ohe.fit_transform(y_train)
y_test_ohe = ohe.transform(y_test)



In [69]:
y_train_ohe

array([1, 2, 2, 1, 2, 1, 2, 1, 0, 2, 1, 0, 0, 0, 1, 2, 0, 0, 0, 1, 0, 1,
       2, 0, 1, 2, 0, 2, 2, 1, 1, 2, 1, 0, 1, 2, 0, 0, 1, 1, 0, 2, 0, 0,
       1, 1, 2, 1, 2, 2, 1, 0, 0, 2, 2, 0, 0, 0, 1, 2, 0, 2, 2, 0, 1, 1,
       2, 1, 2, 0, 2, 1, 2, 1, 1, 1, 0, 1, 1, 0, 1, 2, 2, 0, 1, 2, 2, 0,
       2, 0, 1, 2, 2, 1, 2, 1, 1, 2, 2, 0, 1, 2, 0, 1, 2])

In [100]:
mms = MinMaxScaler()
y_train_mms = mms.fit_transform(y_train_ohe.reshape(-1, 1))
y_test_mms = mms.transform(y_test_ohe.reshape(-1, 1))

In [83]:
# convert to dataframe
y_train_mms = pd.DataFrame(y_train_mms, columns=['species'])


In [106]:
y_train_ohe

array([1, 2, 2, 1, 2, 1, 2, 1, 0, 2, 1, 0, 0, 0, 1, 2, 0, 0, 0, 1, 0, 1,
       2, 0, 1, 2, 0, 2, 2, 1, 1, 2, 1, 0, 1, 2, 0, 0, 1, 1, 0, 2, 0, 0,
       1, 1, 2, 1, 2, 2, 1, 0, 0, 2, 2, 0, 0, 0, 1, 2, 0, 2, 2, 0, 1, 1,
       2, 1, 2, 0, 2, 1, 2, 1, 1, 1, 0, 1, 1, 0, 1, 2, 2, 0, 1, 2, 2, 0,
       2, 0, 1, 2, 2, 1, 2, 1, 1, 2, 2, 0, 1, 2, 0, 1, 2])

In [111]:
# convert to simple array

y_train_mms = y_train_mms.ravel()
y_train_mms 

array([0.5, 1. , 1. , 0.5, 1. , 0.5, 1. , 0.5, 0. , 1. , 0.5, 0. , 0. ,
       0. , 0.5, 1. , 0. , 0. , 0. , 0.5, 0. , 0.5, 1. , 0. , 0.5, 1. ,
       0. , 1. , 1. , 0.5, 0.5, 1. , 0.5, 0. , 0.5, 1. , 0. , 0. , 0.5,
       0.5, 0. , 1. , 0. , 0. , 0.5, 0.5, 1. , 0.5, 1. , 1. , 0.5, 0. ,
       0. , 1. , 1. , 0. , 0. , 0. , 0.5, 1. , 0. , 1. , 1. , 0. , 0.5,
       0.5, 1. , 0.5, 1. , 0. , 1. , 0.5, 1. , 0.5, 0.5, 0.5, 0. , 0.5,
       0.5, 0. , 0.5, 1. , 1. , 0. , 0.5, 1. , 1. , 0. , 1. , 0. , 0.5,
       1. , 1. , 0.5, 1. , 0.5, 0.5, 1. , 1. , 0. , 0.5, 1. , 0. , 0.5,
       1. ])

In [112]:
model3.fit(X_train, y_train_mms)


ValueError: Unknown label type: (array([0. , 0.5, 1. ]),)

In [61]:
y_pred3 = model3.predict(X_test)

print('Accuracy: ', accuracy_score(y_test_ohe, y_pred3))
print('Confusion Matrix: \n', confusion_matrix(y_test_ohe, y_pred3))
print('Classification Report: \n', classification_report(y_test_ohe, y_pred3))

Accuracy:  0.28888888888888886
Confusion Matrix: 
 [[ 0 19  0]
 [ 0 13  0]
 [ 0 13  0]]
Classification Report: 
               precision    recall  f1-score   support

           0       0.00      0.00      0.00        19
           1       0.29      1.00      0.45        13
           2       0.00      0.00      0.00        13

    accuracy                           0.29        45
   macro avg       0.10      0.33      0.15        45
weighted avg       0.08      0.29      0.13        45



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
