In [1]:
import pandas as pd
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

In [2]:
# reading dataset provided for training and testing
df=pd.read_csv("pre-train-data.csv")

In [3]:
# only involve target and independent variables
df=df[['buying','maint','doors','lug_boot','safety','class']]

In [4]:
#mapping
buying_m={'vhigh':4,'high':3,'med':2,'low':1}
maint_m={'vhigh':4,'high':3,'med':2,'low':1}
doors_m={'2':2,'3':3,'4':4,'5more':5}
lug_boot_m={'small':1,'med':2,'big':3}
safety_m={'low':1,'med':2,'high':3}
class_m={'unacc':1,'acc':2,'good':3,'vgood':4}

In [5]:
# convert all string value to numeric value for ML modeling
df['buying']=df['buying'].map(buying_m)
df['maint']=df['maint'].map(maint_m)
df['doors']=df['doors'].map(doors_m)
df['lug_boot']=df['lug_boot'].map(lug_boot_m)
df['safety']=df['safety'].map(safety_m)
df['class']=df['class'].map(class_m)

In [6]:
# split the dataframe to train and test dataset with 80/20 ratio
train, test = train_test_split(df, test_size=0.2)

In [7]:
# The following code is for Naive Bayes

# read the train and test dataset
train_data = train
test_data = test

# shape of the dataset
print('Shape of training data :',train_data.shape)
print('Shape of testing data :',test_data.shape)


# target variable - buying
# independent variable - maint,doors,lug_boot,safety,class

# seperate the independent and target variable on training data
train_x = train_data.drop(columns=['buying'],axis=1)
train_y = train_data['buying']

# seperate the independent and target variable on testing data
test_x = test_data.drop(columns=['buying'],axis=1)
test_y = test_data['buying']


# Create the object of the Naive Bayes model
model = GaussianNB()

# fit the model with the training data
model.fit(train_x,train_y)

# predict the target on the train dataset
predict_train = model.predict(train_x)
print('Target on train data',predict_train) 

# Accuray Score on train dataset
accuracy_train = accuracy_score(train_y,predict_train)
print('accuracy_score on train dataset : ', accuracy_train)

# predict the target on the test dataset
predict_test = model.predict(test_x)
print('Target on test data',predict_test) 

# Accuracy Score on test dataset
accuracy_test = accuracy_score(test_y,predict_test)
print('accuracy_score on test dataset : ', accuracy_test)

Shape of training data : (1382, 6)
Shape of testing data : (346, 6)
Target on train data [4 4 4 ... 4 4 4]
accuracy_score on train dataset :  0.3263386396526773
Target on test data [1 1 4 2 4 4 2 2 4 4 4 4 4 4 4 4 4 1 1 4 4 4 4 4 4 4 4 4 1 4 4 4 4 4 4 2 2
 4 4 4 4 2 4 4 4 1 2 4 4 4 4 4 4 4 4 1 2 4 2 2 4 4 4 4 4 4 4 4 1 4 2 2 4 4
 4 2 2 2 4 2 2 4 2 4 4 4 4 4 4 4 4 2 4 4 4 4 4 2 2 4 1 4 4 4 4 2 4 2 4 4 2
 4 4 2 4 4 4 4 4 1 4 4 4 4 2 4 4 4 4 4 2 2 4 2 4 4 4 4 4 4 4 4 4 4 4 4 2 4
 4 4 2 4 4 4 1 4 4 4 2 2 4 4 2 4 4 4 4 4 4 2 2 4 4 4 2 4 4 4 1 4 4 4 4 2 4
 4 2 2 4 2 2 4 4 4 2 2 4 2 2 4 4 2 4 4 4 4 4 4 4 1 4 1 4 4 4 4 2 4 2 4 2 4
 1 2 4 4 4 4 4 1 1 4 4 4 1 4 4 2 4 4 2 4 4 4 2 1 4 4 4 2 4 2 2 2 1 4 4 4 2
 4 4 2 1 4 4 4 4 4 4 2 2 4 4 2 4 4 2 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
 4 1 4 2 4 4 4 4 4 4 1 4 4 2 4 4 4 4 2 4 4 4 2 4 2 2 4 2 4 4 4 4 4 4 4 4 4
 2 4 4 1 4 4 4 4 4 1 4 4 4]
accuracy_score on test dataset :  0.315028901734104
