In [34]:
# Load EDA Pkgs
import pandas as pd 
import numpy as np


# Load Data Vis Pkg
import matplotlib.pyplot as plt 
import seaborn as sns

# Load ML Pkgs
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split


# For Neural network (MultiLayerPerceptron)
from sklearn.neural_network import MLPClassifier

col_names = ['buying','maint','doors' ,'persons','lug_boot','safety','class']


# Load dataset
df = pd.read_csv("data/car.data",names=col_names)

In [35]:
# We will then label-encode our data set using either of these methods:

# Custom Function
# Label Encoder from Sklearn
# OneHot Encoding
# Pandas Get Dummies

In [36]:
# Custom Function
buying_label = { ni: n for n,ni in enumerate(set(df['buying']))}
maint_label = { ni: n for n,ni in enumerate(set(df['maint']))}
doors_label = { ni: n for n,ni in enumerate(set(df['doors']))}
persons_label = { ni: n for n,ni in enumerate(set(df['persons']))}
lug_boot_label = { ni: n for n,ni in enumerate(set(df['lug_boot']))}
safety_label = { ni: n for n,ni in enumerate(set(df['safety']))}
class_label = { ni: n for n,ni in enumerate(set(df['class']))}

In [37]:
# In our Case we will be using a custom function to help us encode our data set 
# and then map them to our values for each column respectively. 
# We will then save these labels as dictionaries and use it for building the options sections of our ML app.

In [38]:
df1 =df
df1['buying'] = df1['buying'].map(buying_label)
df1['maint'] = df1['maint'].map(maint_label)
df1['doors'] = df1['doors'].map(doors_label)
df1['persons'] = df1['persons'].map(persons_label)
df1['lug_boot'] = df1['lug_boot'].map(lug_boot_label)
df1['safety'] = df1['safety'].map(safety_label)
df1['class'] = df1['class'].map(class_label)

In [39]:
#We can also use the label encoder option.`


In [40]:
from sklearn.preprocessing import LabelEncoder


In [41]:
lb=LabelEncoder()
df2 = df

In [42]:
for i in df2.columns:
    df2[i]=lb.fit_transform(df2[i])

In [43]:
# Building the Model
# To summarize we will be using 3 different ML algorithms 
#(LogisticRegression,Naive Bayes and Multi-Layer Perceptron Classifier).

# We will first split our dataset into training and test dataset.

In [44]:
Xfeatures = df1[['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety']]
ylabels = df1['class']

In [45]:
X_train, X_test, Y_train, Y_test = train_test_split(Xfeatures, ylabels, test_size=0.30, random_state=7)


In [46]:
# LOGISTICAL REGRESSION

# Using - Logisitic Regression
logit = LogisticRegression()
logit.fit(X_train, Y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [47]:
print("Accuracy Score:",accuracy_score(Y_test, logit.predict(X_test)))


Accuracy Score: 0.7225433526011561


In [48]:
# NAIVE BAYES 
print('Running Naive Bayes')
mnb = MultinomialNB()
mnb.fit(X_train, Y_train)
print("Accuracy Score:",accuracy_score(Y_test, mnb.predict(X_test)))
print('')

Running Naive Bayes
Accuracy Score: 0.7148362235067437



In [27]:
# Using Neural Network
nn_clf = MLPClassifier(solver='lbfgs', alpha=1e-5,hidden_layer_sizes=(5, 2), random_state=1)
nn_clf.fit(X_train,Y_train)

MLPClassifier(activation='relu', alpha=1e-05, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(5, 2), learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=200,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=1, shuffle=True, solver='lbfgs',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False)

In [28]:
print("Accuracy Score:",accuracy_score(Y_test, nn_clf.predict(X_test)))


Accuracy Score: 0.6994219653179191


In [30]:
# saving models 

# Save Models
import joblib

logit_model = open("logit_car_model.pkl","wb")
joblib.dump(logit,logit_model)
logit_model.close()



In [31]:
nb_model = open("nb_car_model.pkl","wb")
joblib.dump(nb,nb_model)
nb_model.close()


NameError: name 'nb' is not defined

In [32]:
nn_clf_model = open("nn_clf_car_model.pkl","wb")
joblib.dump(nn_clf,nn_clf_model)
nn_clf_model.close()

In [33]:
df

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,class
0,1,1,1,0,2,2,3
1,1,1,1,0,2,0,3
2,1,1,1,0,2,1,3
3,1,1,1,0,0,2,3
4,1,1,1,0,0,0,3
...,...,...,...,...,...,...,...
1723,3,3,0,1,0,0,2
1724,3,3,0,1,0,1,0
1725,3,3,0,1,1,2,3
1726,3,3,0,1,1,0,2
