# Day 30

### Naive Bayes classifier

In [60]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import CategoricalNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,classification_report

### Load the data 

In [61]:
df = pd.read_csv('Dataset/weather_data.csv')
df

Unnamed: 0,Outlook,Temperature,Humidity,Windy,Play
0,sunny,hot,high,False,no
1,sunny,hot,high,True,no
2,overcast,hot,high,False,yes
3,rainy,mild,high,False,yes
4,rainy,cool,normal,False,yes
5,rainy,cool,normal,True,no
6,overcast,cool,normal,True,yes
7,sunny,mild,high,False,no
8,sunny,cool,normal,False,yes
9,rainy,mild,normal,False,yes


### Encode categorical variable

In [62]:
feature_encoders = {}
for col in ['Outlook', 'Temperature', 'Humidity', 'Windy']:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    feature_encoders[col] = le
target_encoder = LabelEncoder()
df['Play'] = target_encoder.fit_transform(df['Play'])

In [63]:
df

Unnamed: 0,Outlook,Temperature,Humidity,Windy,Play
0,2,1,0,0,0
1,2,1,0,1,0
2,0,1,0,0,1
3,1,2,0,0,1
4,1,0,1,0,1
5,1,0,1,1,0
6,0,0,1,1,1
7,2,2,0,0,0
8,2,0,1,0,1
9,1,2,1,0,1


### Split features and target

In [64]:
x= df.drop('Play',axis=1)
y = df['Play']

In [65]:
print('X = \n\n',x)
print("\n\nY = \n\n",y)

X = 

     Outlook  Temperature  Humidity  Windy
0         2            1         0      0
1         2            1         0      1
2         0            1         0      0
3         1            2         0      0
4         1            0         1      0
5         1            0         1      1
6         0            0         1      1
7         2            2         0      0
8         2            0         1      0
9         1            2         1      0
10        2            2         1      1
11        0            2         0      1
12        0            1         1      0
13        1            2         0      1


Y = 

 0     0
1     0
2     1
3     1
4     1
5     0
6     1
7     0
8     1
9     1
10    1
11    1
12    1
13    0
Name: Play, dtype: int32


### Train and test split

In [66]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.3,random_state=42)

In [67]:
print(x_train.shape)
x_train

(9, 4)


Unnamed: 0,Outlook,Temperature,Humidity,Windy
8,2,0,1,0
2,0,1,0,0
1,2,1,0,1
13,1,2,0,1
4,1,0,1,0
7,2,2,0,0
10,2,2,1,1
3,1,2,0,0
6,0,0,1,1


In [68]:
print(x_test.shape)
x_test

(5, 4)


Unnamed: 0,Outlook,Temperature,Humidity,Windy
9,1,2,1,0
11,0,2,0,1
0,2,1,0,0
12,0,1,1,0
5,1,0,1,1


In [69]:
print(y_train.shape)
y_train

(9,)


8     1
2     1
1     0
13    0
4     1
7     0
10    1
3     1
6     1
Name: Play, dtype: int32

In [70]:
print(y_test.shape)
y_test

(5,)


9     1
11    1
0     0
12    1
5     0
Name: Play, dtype: int32

### Train Naive bayes model

In [71]:
model = CategoricalNB()
model.fit(x_train,y_train)

### Predict and Evaluate

In [72]:
y_pred = model.predict(x_test)
y_pred

array([1, 0, 0, 1, 1])

In [73]:
y_test

9     1
11    1
0     0
12    1
5     0
Name: Play, dtype: int32

In [74]:
print("Accuracy = ",accuracy_score(y_test,y_pred))
print("Classification Report = \n",classification_report(y_test,y_pred))

Accuracy =  0.6
Classification Report = 
               precision    recall  f1-score   support

           0       0.50      0.50      0.50         2
           1       0.67      0.67      0.67         3

    accuracy                           0.60         5
   macro avg       0.58      0.58      0.58         5
weighted avg       0.60      0.60      0.60         5



In [81]:
new_sample =pd.DataFrame([{
    'Outlook':'sunny',
    'Temperature':'hot',
    'Humidity':'high',
    'Windy':True
}])
new_sample

Unnamed: 0,Outlook,Temperature,Humidity,Windy
0,sunny,hot,high,True


In [82]:
#Encode New_sample using stored encoders
for col in new_sample.columns:
    new_sample[col] = feature_encoders[col].transform(new_sample[col])

print(new_sample)
#predict and declare result
predict = model.predict(new_sample)
predicted_label = target_encoder.inverse_transform(predict)[0]
print("Predicted play = ",predicted_label)

   Outlook  Temperature  Humidity  Windy
0        2            1         0      1
Predicted play =  no


In [83]:
#check accuracy
y_pred = model.predict(x_test)
print("Accuracy = ",accuracy_score(y_test,y_pred))
print("Classification Report = \n",classification_report(y_test,y_pred))

Accuracy =  0.6
Classification Report = 
               precision    recall  f1-score   support

           0       0.50      0.50      0.50         2
           1       0.67      0.67      0.67         3

    accuracy                           0.60         5
   macro avg       0.58      0.58      0.58         5
weighted avg       0.60      0.60      0.60         5

