## Naive Bayees :  Assumption: All the independent features are really indepedent with each other
All the X features are independent with each other => X1vsX2 => independent relationship

BernoulliNB : Binary Classification : numerical outputs

MultinominalNB: Multiclass classification : numerical outputs

categoricalNB: categorical outputs

GaussianNB: assumes that data follows normal distribution

In [1]:
# importing required libraries
import pandas as pd
from sklearn.naive_bayes import BernoulliNB
from sklearn.metrics import accuracy_score

# data gathering
path=r'https://raw.githubusercontent.com/sindhura-nk/Datasets/refs/heads/main/train_loan.csv'
df = pd.read_csv(path)
X = df.drop(columns='loan_status')
Y = df[['loan_status']]

# data preprocessing and data cleaning
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import OneHotEncoder,StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer

cat = list(X.columns[X.dtypes=='object'])
con = list(X.columns[X.dtypes!='object'])

cat_pipe = make_pipeline(
    SimpleImputer(strategy='most_frequent'),
    OneHotEncoder(handle_unknown='ignore',sparse_output=False)
)

con_pipe = make_pipeline(
    SimpleImputer(strategy='median'),
    StandardScaler())

pre = ColumnTransformer([
    ('cat',cat_pipe,cat),
    ('con',con_pipe,con)
]).set_output(transform='pandas')
X_pre = pre.fit_transform(X)

# train test split
from sklearn.model_selection import train_test_split
xtrain,xtest,ytrain,ytest = train_test_split(X_pre,Y,train_size=0.8,random_state=21)

# model building
model = BernoulliNB()

# fit the model with the training data
model.fit(xtrain,ytrain['loan_status'])

# predict the target on the train dataset
predict_train = model.predict(xtrain)
print('Target on train data',predict_train) 

# Evaluation of model
# Accuray Score on train dataset
accuracy_train = accuracy_score(ytrain,predict_train)
print('accuracy_score on train dataset : ', accuracy_train)

# predict the target on the test dataset
predict_test = model.predict(xtest)
print('Target on test data',predict_test) 

# Accuracy Score on test dataset
accuracy_test = accuracy_score(ytest,predict_test)
print('accuracy_score on test dataset : ', accuracy_test)

Target on train data [1 0 0 ... 0 1 0]
accuracy_score on train dataset :  0.8413973910819337
Target on test data [0 0 1 ... 0 1 0]
accuracy_score on test dataset :  0.8444027623838349


In [2]:
# importing required libraries
import pandas as pd
from sklearn.naive_bayes import CategoricalNB
from sklearn.metrics import accuracy_score

# data gathering
path=r'https://raw.githubusercontent.com/sindhura-nk/Datasets/refs/heads/main/drug200.csv'
df = pd.read_csv(path)
X = df.drop(columns='Drug')
Y = df[['Drug']]

# data preprocessing and data cleaning
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import OneHotEncoder,MinMaxScaler
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer

cat = list(X.columns[X.dtypes=='object'])
con = list(X.columns[X.dtypes!='object'])

cat_pipe = make_pipeline(
    SimpleImputer(strategy='most_frequent'),
    OneHotEncoder(handle_unknown='ignore',sparse_output=False)
)

con_pipe = make_pipeline(
    SimpleImputer(strategy='median'),
    MinMaxScaler())

pre = ColumnTransformer([
    ('cat',cat_pipe,cat),
    ('con',con_pipe,con)
]).set_output(transform='pandas')
X_pre = pre.fit_transform(X)

# train test split
from sklearn.model_selection import train_test_split
xtrain,xtest,ytrain,ytest = train_test_split(X_pre,Y,train_size=0.8,random_state=21)

# model building
model = CategoricalNB()

# fit the model with the training data
model.fit(xtrain,ytrain['Drug'])

# predict the target on the train dataset
predict_train = model.predict(xtrain)
print('Target on train data',predict_train) 

# Accuray Score on train dataset
accuracy_train = accuracy_score(ytrain,predict_train)
print('accuracy_score on train dataset : ', accuracy_train)

# predict the target on the test dataset
predict_test = model.predict(xtest)
print('Target on test data',predict_test) 

# Accuracy Score on test dataset
accuracy_test = accuracy_score(ytest,predict_test)
print('accuracy_score on test dataset : ', accuracy_test)

Target on train data ['drugA' 'drugX' 'drugX' 'DrugY' 'drugX' 'drugB' 'DrugY' 'drugC' 'drugB'
 'drugX' 'DrugY' 'drugA' 'drugB' 'DrugY' 'drugX' 'drugX' 'drugC' 'drugX'
 'drugC' 'DrugY' 'DrugY' 'drugX' 'drugB' 'DrugY' 'drugX' 'DrugY' 'drugC'
 'drugX' 'drugA' 'DrugY' 'drugA' 'drugX' 'drugC' 'DrugY' 'drugX' 'drugX'
 'DrugY' 'DrugY' 'drugX' 'drugX' 'drugA' 'drugX' 'DrugY' 'drugC' 'drugA'
 'drugX' 'drugC' 'drugX' 'drugB' 'DrugY' 'drugX' 'drugA' 'DrugY' 'drugX'
 'drugC' 'DrugY' 'drugC' 'drugX' 'drugB' 'drugX' 'drugA' 'drugC' 'DrugY'
 'drugA' 'DrugY' 'DrugY' 'drugC' 'drugX' 'drugX' 'drugX' 'DrugY' 'drugX'
 'drugX' 'drugX' 'drugX' 'drugA' 'DrugY' 'drugC' 'drugB' 'drugX' 'drugC'
 'drugX' 'drugX' 'DrugY' 'drugX' 'drugC' 'DrugY' 'drugX' 'drugX' 'drugB'
 'DrugY' 'drugX' 'drugC' 'drugC' 'drugX' 'drugX' 'drugA' 'drugC' 'DrugY'
 'drugC' 'drugB' 'DrugY' 'drugC' 'drugX' 'drugX' 'drugX' 'DrugY' 'DrugY'
 'drugA' 'drugB' 'drugX' 'drugX' 'drugB' 'drugX' 'drugC' 'drugX' 'DrugY'
 'drugX' 'DrugY' 'DrugY' 'Drug