In [2]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [3]:
cd /content/gdrive/My Drive/Colab_csvs

/content/gdrive/My Drive/Colab_csvs


In [4]:
from sklearn.naive_bayes import CategoricalNB
import numpy as np
import pandas as pd

In [5]:
ds = pd.read_csv("Nephritis.csv")

In [None]:
y = ds.Nephritis.values
new_data = pd.get_dummies(ds.drop(['Nephritis'],axis = 1)) #encoding
print(f"First 5 rows : \n\n{new_data.head(5)}")
x = (new_data-np.min(new_data))/(np.max(new_data)-np.min(new_data)).values


from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=1)

First 5 rows : 

   Temperature  Nausea_no  Nausea_yes  Lumbar pain_no  Lumbar pain_yes  \
0         35.5          1           0               0                1   
1         35.9          1           0               1                0   
2         35.9          1           0               0                1   
3         36.0          1           0               1                0   
4         36.0          1           0               0                1   

   Continous_need_no  Continous_need_yes  Micturition pains_no  \
0                  1                   0                     1   
1                  0                   1                     0   
2                  1                   0                     1   
3                  0                   1                     0   
4                  1                   0                     1   

   Micturition pains_yes  Burning_no  Burning_yes  
0                      0           1            0  
1                      1           0 

In [6]:
help(CategoricalNB())

Help on CategoricalNB in module sklearn.naive_bayes object:

class CategoricalNB(_BaseDiscreteNB)
 |  CategoricalNB(*, alpha=1.0, fit_prior=True, class_prior=None, min_categories=None)
 |  
 |  Naive Bayes classifier for categorical features.
 |  
 |  The categorical Naive Bayes classifier is suitable for classification with
 |  discrete features that are categorically distributed. The categories of
 |  each feature are drawn from a categorical distribution.
 |  
 |  Read more in the :ref:`User Guide <categorical_naive_bayes>`.
 |  
 |  Parameters
 |  ----------
 |  alpha : float, default=1.0
 |      Additive (Laplace/Lidstone) smoothing parameter
 |      (0 for no smoothing).
 |  
 |  fit_prior : bool, default=True
 |      Whether to learn class prior probabilities or not.
 |      If false, a uniform prior will be used.
 |  
 |  class_prior : array-like of shape (n_classes,), default=None
 |      Prior probabilities of the classes. If specified the priors are not
 |      adjusted acco

In [None]:
model = CategoricalNB()
model.fit(x_train, y_train)
print("\n\n\nTrained model parameters:")
print("\nThe class category count is: ",model.category_count_,"\n")
print("The class count is: ",model.class_count_,"\n") 
print("The class log prior is: ",model.class_log_prior_,"\n") 
print("The classes are: ",model.classes_,"\n") 
print("The feature log probability is: ",model.feature_log_prob_,"\n") 
print("The number of features are: ",model.n_features_,"\n") 
print("The categories are: ",model.n_categories_,"\n") 




Trained model parameters:

The class category count is:  [array([[61.,  1.],
       [32.,  2.]]), array([[ 0., 62.],
       [18., 16.]]), array([[62.,  0.],
       [16., 18.]]), array([[19., 43.],
       [34.,  0.]]), array([[43., 19.],
       [ 0., 34.]]), array([[33., 29.],
       [30.,  4.]]), array([[29., 33.],
       [ 4., 30.]]), array([[25., 37.],
       [18., 16.]]), array([[37., 25.],
       [16., 18.]]), array([[19., 43.],
       [23., 11.]]), array([[43., 19.],
       [11., 23.]])] 

The class count is:  [62. 34.] 

The class log prior is:  [-0.43721381 -1.03798767] 

The classes are:  ['no' 'yes'] 

The feature log probability is:  [array([[-0.0317487 , -3.4657359 ],
       [-0.08701138, -2.48490665]]), array([[-4.15888308, -0.01574836],
       [-0.63907996, -0.75030559]]), array([[-0.01574836, -4.15888308],
       [-0.75030559, -0.63907996]]), array([[-1.16315081, -0.37469345],
       [-0.02817088, -3.58351894]]), array([[-0.37469345, -1.16315081],
       [-3.58351894, 

In [None]:
#test data
nb_model = CategoricalNB()
nb_model.fit(x_test,y_test)

CategoricalNB()

In [None]:
#confusion matrix
from sklearn.metrics import confusion_matrix
y_predicted = model.predict(x_test)
cmatrix = confusion_matrix(y_test,y_predicted) 
print(cmatrix)

[[ 8  0]
 [ 0 16]]


In [None]:
# Report accuracy.
accuracy_nb = nb_model.score(x_test,y_test)
print(f"The Accuracy of the Naive Bayes classifier is : {accuracy_nb}")
print(f"Hence, this concludes that {accuracy_nb*100}% of samples are classified correctly")

The Accuracy of the Naive Bayes classifier is : 0.9583333333333334
Hence, this concludes that 95.83333333333334% of samples are classified correctly


### Changing parameters

In [None]:
def CategoricalNBClass(a,mc):
    nb_model = CategoricalNB(alpha = a, min_categories = mc)
    nb_model.fit(x_test,y_test)
    
    # Confusion matrix
    y_predicted = model.predict(x_test)
    cmatrix = confusion_matrix(y_test,y_predicted)
    print(f"Confusion Matrix is : \n{cmatrix}")
    
    #Accuracy
    accuracy_nb = nb_model.score(x_test,y_test)
    print(f"The Accuracy of the Naive Bayes classifier is : {accuracy_nb}")
    print(f"Hence, this concludes that {accuracy_nb*100}% of samples are classified correctly")

In [None]:
CategoricalNBClass(0.25,10)

Confusion Matrix is : 
[[ 8  0]
 [ 0 16]]
The Accuracy of the Naive Bayes classifier is : 0.9583333333333334
Hence, this concludes that 95.83333333333334% of samples are classified correctly


In [None]:
CategoricalNBClass(0.5,1)
print("\n\n")
CategoricalNBClass(0.5,200)
print("\n\n")
CategoricalNBClass(0.5,1000)
print("\n\n")
CategoricalNBClass(0.5,2000)

Confusion Matrix is : 
[[ 8  0]
 [ 0 16]]
The Accuracy of the Naive Bayes classifier is : 0.9583333333333334
Hence, this concludes that 95.83333333333334% of samples are classified correctly



Confusion Matrix is : 
[[ 8  0]
 [ 0 16]]
The Accuracy of the Naive Bayes classifier is : 0.9583333333333334
Hence, this concludes that 95.83333333333334% of samples are classified correctly



Confusion Matrix is : 
[[ 8  0]
 [ 0 16]]
The Accuracy of the Naive Bayes classifier is : 0.9166666666666666
Hence, this concludes that 91.66666666666666% of samples are classified correctly



Confusion Matrix is : 
[[ 8  0]
 [ 0 16]]
The Accuracy of the Naive Bayes classifier is : 0.9166666666666666
Hence, this concludes that 91.66666666666666% of samples are classified correctly


In [None]:
CategoricalNBClass(0.75,2)

Confusion Matrix is : 
[[ 8  0]
 [ 0 16]]
The Accuracy of the Naive Bayes classifier is : 0.9583333333333334
Hence, this concludes that 95.83333333333334% of samples are classified correctly


In [None]:
CategoricalNBClass(1,2)

Confusion Matrix is : 
[[ 8  0]
 [ 0 16]]
The Accuracy of the Naive Bayes classifier is : 0.9583333333333334
Hence, this concludes that 95.83333333333334% of samples are classified correctly


In [None]:
help(CategoricalNB())

Help on CategoricalNB in module sklearn.naive_bayes object:

class CategoricalNB(_BaseDiscreteNB)
 |  CategoricalNB(*, alpha=1.0, fit_prior=True, class_prior=None, min_categories=None)
 |  
 |  Naive Bayes classifier for categorical features
 |  
 |  The categorical Naive Bayes classifier is suitable for classification with
 |  discrete features that are categorically distributed. The categories of
 |  each feature are drawn from a categorical distribution.
 |  
 |  Read more in the :ref:`User Guide <categorical_naive_bayes>`.
 |  
 |  Parameters
 |  ----------
 |  alpha : float, default=1.0
 |      Additive (Laplace/Lidstone) smoothing parameter
 |      (0 for no smoothing).
 |  
 |  fit_prior : bool, default=True
 |      Whether to learn class prior probabilities or not.
 |      If false, a uniform prior will be used.
 |  
 |  class_prior : array-like of shape (n_classes,), default=None
 |      Prior probabilities of the classes. If specified the priors are not
 |      adjusted accor