### Probabilistic Supervised Learning - Naive Bayes:
- Using iris dataset, perform the necessary pre-processing steps.
- Train the model using Naive Bayes Classifier.
- Give new test data and predict the classification output.
- Analyze and write the inference

In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv("./dataset/iris.csv")
df.sample(5)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
99,5.7,2.8,4.1,1.3,versicolor
130,7.4,2.8,6.1,1.9,virginica
77,6.7,3.0,5.0,1.7,versicolor
98,5.1,2.5,3.0,1.1,versicolor
109,7.2,3.6,6.1,2.5,virginica


In [3]:
df.duplicated().sum()

1

In [4]:
df.drop_duplicates(keep='first', inplace = True)

In [5]:
df.dropna(inplace = True)

In [6]:
x = df.drop(['species'], axis = 1)
y = df.drop(x.columns, axis = 1)

In [7]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [8]:
le = LabelEncoder()
y = y.apply(le.fit_transform)

In [9]:
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state = 42, test_size = 0.2)

In [10]:
from sklearn.naive_bayes import GaussianNB
model = GaussianNB()

In [11]:
model.fit(x_train, np.ravel(y_train))

In [12]:
y_pred = model.predict(x_test)
y_pred

array([1, 0, 2, 1, 1, 0, 1, 2, 1, 1, 2, 0, 0, 0, 0, 1, 2, 1, 1, 2, 0, 2,
       0, 2, 2, 2, 2, 2, 0, 0])

In [13]:
print(le.inverse_transform(y_pred))

['versicolor' 'setosa' 'virginica' 'versicolor' 'versicolor' 'setosa'
 'versicolor' 'virginica' 'versicolor' 'versicolor' 'virginica' 'setosa'
 'setosa' 'setosa' 'setosa' 'versicolor' 'virginica' 'versicolor'
 'versicolor' 'virginica' 'setosa' 'virginica' 'setosa' 'virginica'
 'virginica' 'virginica' 'virginica' 'virginica' 'setosa' 'setosa']


In [14]:
print(le.inverse_transform(np.ravel(y_test)))

['versicolor' 'setosa' 'virginica' 'versicolor' 'versicolor' 'setosa'
 'versicolor' 'virginica' 'versicolor' 'versicolor' 'virginica' 'setosa'
 'setosa' 'setosa' 'setosa' 'versicolor' 'virginica' 'versicolor'
 'versicolor' 'virginica' 'setosa' 'virginica' 'setosa' 'virginica'
 'virginica' 'virginica' 'virginica' 'virginica' 'setosa' 'setosa']


In [15]:
# sepal_length	sepal_width	petal_length	petal_width	species
data = {
    "sepal_length" : [5.7, 6.2, 6],
    "sepal_width" : [2.6, 2.9, 3],
    "petal_length" : [4.4, 4.2, 2.9],
    "petal_width" : [0.4, 0.8, 1.2]
}

pred = pd.DataFrame(data)
pred

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
0,5.7,2.6,4.4,0.4
1,6.2,2.9,4.2,0.8
2,6.0,3.0,2.9,1.2


In [16]:
res = model.predict(pred)

In [17]:
print(le.inverse_transform(res))

['versicolor' 'versicolor' 'versicolor']
