In [2]:
import pandas as pd
import numpy as np
from chefboost import Chefboost as chef
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score
from tensorflow.keras.utils import to_categorical 
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Flatten, Conv2D, MaxPooling2D, Dropout
from tensorflow.keras.optimizers import SGD
import matplotlib.pyplot as plt
import time

%matplotlib inline

ModuleNotFoundError: No module named 'tensorflow'

In [None]:
features = ["buying", "maint", "doors", "persons", "lug_boot", "safety"]
# target column is required to be *Decision* by chefboost module
target = ["Decision"]
columns = features + target
df = pd.read_csv("data/car.data", header=None, names=columns)
df.head()

In [None]:
# training and testing split with random_state parameter for consistent split everytime
train_df, test_df = train_test_split(df,test_size=0.95, random_state=42)

In [None]:
# Prepare Testing data
X_test = test_df.drop(target, axis=1)
y_test = test_df[target]

In [None]:
def predict_all(model, data):
    # Method to predict for all the records
    def predict(instance):
        return chef.predict(model, instance)

    return list(map(predict, data.values))

In [None]:
%%markdown
# ID3 Model

In [None]:
# Training ID3 algorithm
ID3_model = chef.fit(train_df, {'algorithm': 'ID3'})
ID3_predictions = predict_all(ID3_model, X_test)
ID3_score = precision_score(y_test, ID3_predictions, average='weighted')
print("ID3 test accuracy: {}%".format(round(ID3_score*100, 2)))

In [None]:
# Saving ID3 model
chef.save_model(ID3_model, '../../models/ID3_model.pkl')

In [None]:
%%markdown
# CART Model

In [None]:
# Training CART algorithm
CART_model = chef.fit(train_df, {'algorithm': 'CART'})
CART_predictions = predict_all(CART_model, X_test)
CART_score = precision_score(y_test, CART_predictions, average='weighted')
print("CART test accuracy: {}%".format(round(CART_score*100, 2)))

In [None]:
# Saving CART model
chef.save_model(CART_model, '../../models/CART_model.pkl')

In [None]:
%%markdown
# C4.5 Model

In [None]:
# Training C4.5 algorithm
C45_model = chef.fit(train_df, {'algorithm': 'C4.5'})
C45_predictions = predict_all(C45_model, X_test)
C45_score = precision_score(y_test, C45_predictions, average='weighted')
print("C45 test accuracy: {}%".format(round(C45_score*100, 2)))

In [None]:
# Saving C4.5 model
chef.save_model(C45_model, '../../models/C45_model.pkl')

In [None]:
%%markdown
# Neural Network

In [None]:
# Preparing encoders from training data and transforming training data
encoders = {}
for i in train_df.columns:
    le = LabelEncoder()
    train_df[i]=le.fit_transform(train_df[i])
    encoders[i] = le
train_df.head()

In [None]:
# Using prepared encoders to transform testing data
for i in test_df.columns:
    le = encoders[i]
    test_df[i] = le.transform(test_df[i])
test_df.head()

In [None]:
# Seperating X and y for both training and testing data
X_train = train_df.drop(target, axis=1)
y_train = train_df[target]
X_test = test_df.drop(target, axis=1)
y_test = test_df[target]

In [None]:
# Converting target values to one hot vectors
y_train = to_categorical(y_train, num_classes=4)
y_test = to_categorical(y_test, num_classes=4)

In [None]:
# Creating model
model = Sequential()
model.add(Dense(256, activation='sigmoid', input_dim=6,kernel_initializer='he_uniform'))
model.add(Dense(256, activation='sigmoid'))
model.add(Dense(4, activation='softmax'))
model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])

In [None]:
# Training model
start_time = time.time()
history = model.fit(X_train, y_train, epochs=50, batch_size=32, verbose=2)
end_time = time.time()
print("Training Finished in %0.02f seconds" % (end_time - start_time))

In [None]:
# Evaluating model
loss,accuracy = model.evaluate(X_test, y_test, verbose=2)
print("Total test loss is: %0.04f" % loss)
print("Total test accuracy is: %0.02f" % (accuracy*100))

##### Plotting accuracy and loss vs epochs
epoch = history.epoch
loss = history.history["loss"]
accuracy = history.history["accuracy"]

plt.plot(epoch, loss, label="loss")
plt.plot(epoch, accuracy, label="accuracy")
plt.legend()