In [166]:
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd

# Fix random seed for reproducibility

In [167]:

seed = 7
np.random.seed(seed)

# load preprocess car dataset

In [168]:
dataset = pd.read_csv('car.data',header = None)

##Create mapping for ordinal value to numeric

In [169]:
dataMapping_rank = {"vhigh":4, "high":3, "med":2, "low":1}
dataMapping_size = {"big":3, "med":2, "small":1}
dataMapping_num = {"2":2, "3":3, "4":4, "5more":5, "more":5}
dataMapping_grade = {"unacc":1, "acc":2, "good":3, "vgood":4}

In [170]:
dataset['buying'] = dataset.iloc[:, 0].map(dataMapping_rank)
dataset['maint'] = dataset.iloc[:, 1].map(dataMapping_rank)
dataset['doors'] = dataset.iloc[:, 2].map(dataMapping_num)
dataset['persons'] = dataset.iloc[:, 3].map(dataMapping_num)
dataset['lug_boot'] = dataset.iloc[:, 4].map(dataMapping_size)
dataset['safety'] = dataset.iloc[:, 5].map(dataMapping_rank)
dataset['class'] = dataset.iloc[:, 6].map(dataMapping_grade)

# split into input (X) and output (y) variables

In [171]:
X = dataset.iloc[:, 8:14]
y = dataset.iloc[:, 7]

# split training and testing set

In [172]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=1/3,random_state=42, stratify=y)

# Random Forest Model

##train and evalutate model

In [173]:
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier(n_estimators=1000)
model.fit(X_train, y_train)
ypred = model.predict(X_test)

## model accuracy

In [174]:
from sklearn import metrics
print(metrics.classification_report(ypred, y_test))

              precision    recall  f1-score   support

           1       0.15      0.13      0.14       167
           2       0.06      0.06      0.06       133
           3       0.07      0.07      0.07       149
           4       0.14      0.16      0.15       127

    accuracy                           0.10       576
   macro avg       0.10      0.10      0.10       576
weighted avg       0.11      0.10      0.10       576



In [175]:
model.predict(pd.DataFrame([[3,4,0,3,3,3]]))

  "X does not have valid feature names, but"


array([1])

Result for 
- Maintenance = High
- Number of doors = 4
- Lug Boot Size = Big
- Safety = High
- Class Value = Good

"low" for prediction of buying

In [180]:
import tensorflow as tf

from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import Dropout
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D
from tensorflow.keras.utils import to_categorical

#preprocess data for CNN Deep Learning Model

In [183]:
X_train_tup = X_train.to_numpy()
X_test_tup = X_test.to_numpy()

In [184]:
y_train_dl = to_categorical(list(y_train))
y_test_dl = to_categorical(list(y_test))

##function for model deigne base on the input and output size

In [186]:
from keras.engine.training import Model
# fit and evaluate a model
def define_model(n_input, n_out, n_filters):
	model = Sequential()
	model.add(Conv1D(filters=n_filters, kernel_size=3, activation='relu', input_shape=n_input))
	model.add(Conv1D(filters=n_filters, kernel_size=3, activation='relu'))
	model.add(Dropout(0.5))
	model.add(MaxPooling1D(pool_size=2))
	model.add(Flatten())
	model.add(Dense(100, activation='relu'))
	model.add(Dense(n_out, activation='softmax'))
	model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
	return model

##generate CNN model

In [187]:
model = define_model((X_train_tup.shape[1],1),y_train_dl.shape[1], 8)

##illustrartion of model

In [188]:
model.summary()

Model: "sequential_10"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d_16 (Conv1D)          (None, 4, 8)              32        
                                                                 
 conv1d_17 (Conv1D)          (None, 2, 8)              200       
                                                                 
 dropout_6 (Dropout)         (None, 2, 8)              0         
                                                                 
 max_pooling1d_6 (MaxPooling  (None, 1, 8)             0         
 1D)                                                             
                                                                 
 flatten_6 (Flatten)         (None, 8)                 0         
                                                                 
 dense_11 (Dense)            (None, 100)               900       
                                                     

##train and evaluate the model

In [197]:
# fit network
verbose, epochs, batch_size = 0, 60, 32
model.fit(X_train_tup, y_train_dl, epochs=epochs, batch_size=batch_size, verbose=verbose)
# evaluate model
_, score = model.evaluate(X_test_tup, y_test_dl, batch_size=batch_size, verbose=0)

## model accuracy

In [198]:
score

0.3020833432674408

In [216]:
np.argmax(model.predict(pd.DataFrame([[3,4,0,3,3,3]])))

1

Result for 
- Maintenance = High
- Number of doors = 4
- Lug Boot Size = Big
- Safety = High
- Class Value = Good

"low" for prediction of buying