**Apply Naive Bayes classification to predict whether a customer buys a car or not using CategoricalNB**

In [1]:
import pandas as pd

df=pd.read_excel('training_data.xlsx')
df.head()

Unnamed: 0,Age,Income,Marital Status,Credit rating,buys_Car
0,Youth,High,No,Fair,No
1,Youth,High,No,Excellent,No
2,Middle Aged,High,No,Fair,Yes
3,Senior,Medium,No,Fair,Yes
4,Senior,Low,Yes,Fair,Yes


In [2]:
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import CategoricalNB
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report

df=df.apply(lambda col: LabelEncoder().fit_transform(col))

print(df)

    Age  Income  Marital Status  Credit rating  buys_Car
0     2       0               0              1         0
1     2       0               0              0         0
2     0       0               0              1         1
3     1       2               0              1         1
4     1       1               1              1         1
5     1       1               1              0         0
6     0       1               1              0         1
7     2       2               0              1         0
8     2       1               1              1         1
9     1       2               1              1         1
10    2       2               1              0         1
11    0       2               0              0         1
12    0       0               0              1         1
13    1       2               0              0         0


In [3]:
X=df.values[:,:-1]
y=df.values[:,-1]

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=45)

clf=CategoricalNB()
clf.fit(X_train,y_train)
y_predict=clf.predict(X_test)

print('Actual output: ',y_test)
print('Predicted output : ',y_predict)

print('\nConfusion matrix : \n',confusion_matrix(y_predict,y_test))
print('\nAccuracy : ',accuracy_score(y_predict,y_test))
print('Classification Report : \n',classification_report(y_test,y_predict,zero_division=0))

Actual output:  [1 1 1 1 1]
Predicted output :  [1 0 1 1 0]

Confusion matrix : 
 [[0 2]
 [0 3]]

Accuracy :  0.6
Classification Report : 
               precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       1.00      0.60      0.75         5

    accuracy                           0.60         5
   macro avg       0.50      0.30      0.37         5
weighted avg       1.00      0.60      0.75         5



In [4]:
print('Class membership probabilites : \n',clf.predict_proba(X_test))

Class membership probabilites : 
 [[0.34526854 0.65473146]
 [0.67839196 0.32160804]
 [0.31914894 0.68085106]
 [0.24788836 0.75211164]
 [0.8490566  0.1509434 ]]


**Apply Gaussian Naive Bayes classifier to predict forest fire based on forest conditions**

In [5]:
df=pd.read_csv('Forest_fire.csv')
df.head()
df=df.drop(columns=['day','month','year'])

In [7]:
df['Classes']=LabelEncoder().fit_transform(df['Classes'])
df.head(10)

Unnamed: 0,Temperature,RH,Ws,Rain,FFMC,DMC,DC,ISI,BUI,FWI,Classes
0,29,57,18,0.0,65.7,3.4,7.6,1.3,3.4,0.5,0
1,29,61,13,1.3,64.4,4.1,7.6,1.0,3.9,0.4,0
2,26,82,22,13.1,47.1,2.5,7.1,0.3,2.7,0.1,0
3,25,89,13,2.5,28.6,1.3,6.9,0.0,1.7,0.0,0
4,27,77,16,0.0,64.8,3.0,14.2,1.2,3.9,0.5,0
5,31,67,14,0.0,82.6,5.8,22.2,3.1,7.0,2.5,1
6,33,54,13,0.0,88.2,9.9,30.5,6.4,10.9,7.2,1
7,30,73,15,0.0,86.6,12.1,38.3,5.6,13.5,7.1,1
8,25,88,13,0.2,52.9,7.9,38.8,0.4,10.5,0.3,0
9,28,79,12,0.0,73.2,9.5,46.3,1.3,12.6,0.9,0


In [8]:
from sklearn.preprocessing import MinMaxScaler 

scaler=MinMaxScaler(feature_range=(-1, 1))
inputs=df.columns[:-1]
df[inputs]=scaler.fit_transform(df[inputs])

df.head(10)

Unnamed: 0,Temperature,RH,Ws,Rain,FFMC,DMC,DC,ISI,BUI,FWI,Classes
0,-0.066667,-0.454545,-0.066667,-1.0,0.202593,-0.899065,-0.993443,-0.792,-0.930618,-0.966887,0
1,-0.066667,-0.272727,-0.733333,-0.845238,0.160454,-0.872897,-0.993443,-0.84,-0.915535,-0.97351,0
2,-0.466667,0.681818,0.466667,0.559524,-0.400324,-0.93271,-0.998126,-0.952,-0.951735,-0.993377,0
3,-0.6,1.0,-0.733333,-0.702381,-1.0,-0.97757,-1.0,-1.0,-0.9819,-1.0,0
4,-0.333333,0.454545,-0.333333,-1.0,0.17342,-0.914019,-0.931616,-0.808,-0.915535,-0.966887,0
5,0.2,0.0,-0.6,-1.0,0.750405,-0.809346,-0.856674,-0.504,-0.822021,-0.834437,1
6,0.466667,-0.590909,-0.733333,-1.0,0.931929,-0.656075,-0.778923,0.024,-0.704374,-0.523179,1
7,0.066667,0.272727,-0.466667,-1.0,0.880065,-0.573832,-0.705855,-0.104,-0.625943,-0.529801,1
8,-0.6,0.954545,-0.733333,-0.97619,-0.212318,-0.730841,-0.701171,-0.936,-0.71644,-0.980132,0
9,-0.2,0.545455,-0.866667,-1.0,0.445705,-0.671028,-0.630913,-0.792,-0.653092,-0.940397,0


In [9]:
from sklearn.naive_bayes import GaussianNB
X=df.values[:,:-1]
y=df.values[:,-1]
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=45)

clf=GaussianNB()
clf.fit(X_train,y_train)
y_predict=clf.predict(X_test)

print('Actual output: ',y_test)
print('Predicted output : ',y_predict)

print('\nConfusion matrix : \n',confusion_matrix(y_predict,y_test))
print('\nAccuracy : ',accuracy_score(y_predict,y_test))
print('Classification Report : \n',classification_report(y_test,y_predict,zero_division=0))

Actual output:  [1. 0. 0. 1. 0. 1. 0. 1. 0. 0. 0. 0. 1. 1. 1. 1. 0. 1. 0. 0. 1. 0. 1. 0.
 0. 1. 1. 0. 0. 1. 0. 0. 0. 0. 0. 0. 1.]
Predicted output :  [1. 0. 0. 1. 0. 1. 0. 1. 0. 0. 0. 0. 1. 1. 1. 1. 0. 1. 0. 0. 0. 0. 1. 0.
 0. 1. 1. 0. 0. 1. 0. 0. 0. 0. 0. 0. 1.]

Confusion matrix : 
 [[22  1]
 [ 0 14]]

Accuracy :  0.972972972972973
Classification Report : 
               precision    recall  f1-score   support

         0.0       0.96      1.00      0.98        22
         1.0       1.00      0.93      0.97        15

    accuracy                           0.97        37
   macro avg       0.98      0.97      0.97        37
weighted avg       0.97      0.97      0.97        37



In [10]:
print('Class membership probabilites : \n',clf.predict_proba(X_test))

Class membership probabilites : 
 [[2.16496122e-123 1.00000000e+000]
 [1.00000000e+000 2.81876672e-063]
 [9.85741736e-001 1.42582637e-002]
 [6.98767210e-234 1.00000000e+000]
 [1.00000000e+000 2.01414010e-039]
 [9.92038655e-109 1.00000000e+000]
 [8.50861021e-001 1.49138979e-001]
 [1.17616869e-060 1.00000000e+000]
 [1.00000000e+000 1.41074651e-016]
 [1.00000000e+000 1.08217821e-189]
 [1.00000000e+000 1.68992166e-017]
 [1.00000000e+000 5.86768222e-104]
 [4.46785695e-031 1.00000000e+000]
 [9.48851516e-030 1.00000000e+000]
 [4.72230005e-089 1.00000000e+000]
 [1.54190080e-013 1.00000000e+000]
 [9.87331170e-001 1.26688299e-002]
 [2.97582902e-009 9.99999997e-001]
 [9.99999627e-001 3.72904403e-007]
 [1.00000000e+000 6.97589030e-041]
 [9.91816711e-001 8.18328916e-003]
 [1.00000000e+000 2.46437254e-033]
 [1.30415186e-010 1.00000000e+000]
 [1.00000000e+000 2.21794006e-011]
 [8.37201422e-001 1.62798578e-001]
 [2.88209275e-005 9.99971179e-001]
 [3.95491672e-009 9.99999996e-001]
 [1.00000000e+000 9.2