The Car Evaluation Database contains examples with the structural information removed, i.e., directly relates CAR to the six input attributes: buying, maint, doors, persons, lug_boot, safety.

Because of known underlying concept structure, this database may be particularly useful for testing constructive induction and structure discovery methods.

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [4]:
car = pd.read_csv("car.data", names = ["buying", "maint", "doors", "persons", "lug_boot", "safety", "Target"])

In [5]:
car.head()

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,Target
0,vhigh,vhigh,2,2,small,low,unacc
1,vhigh,vhigh,2,2,small,med,unacc
2,vhigh,vhigh,2,2,small,high,unacc
3,vhigh,vhigh,2,2,med,low,unacc
4,vhigh,vhigh,2,2,med,med,unacc


In [6]:
car.shape

(1728, 7)

In [7]:
car.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1728 entries, 0 to 1727
Data columns (total 7 columns):
buying      1728 non-null object
maint       1728 non-null object
doors       1728 non-null object
persons     1728 non-null object
lug_boot    1728 non-null object
safety      1728 non-null object
Target      1728 non-null object
dtypes: object(7)
memory usage: 94.6+ KB


In [8]:
car.isnull().sum()

buying      0
maint       0
doors       0
persons     0
lug_boot    0
safety      0
Target      0
dtype: int64

In [11]:
car["buying"].unique()

array(['vhigh', 'high', 'med', 'low'], dtype=object)

In [12]:
car["maint"].unique()

array(['vhigh', 'high', 'med', 'low'], dtype=object)

In [14]:
car["doors"].unique()

array(['2', '3', '4', '5more'], dtype=object)

In [16]:
car["persons"].unique()

array(['2', '4', 'more'], dtype=object)

In [17]:
car["lug_boot"].unique()

array(['small', 'med', 'big'], dtype=object)

In [19]:
car["safety"].unique()

array(['low', 'med', 'high'], dtype=object)

In [9]:
car["Target"].unique()

array(['unacc', 'acc', 'vgood', 'good'], dtype=object)

In [10]:
car["Target"].value_counts()

unacc    1210
acc       384
good       69
vgood      65
Name: Target, dtype: int64

In [129]:
df = car.copy()

In [130]:
# df = pd.get_dummies(df, columns = ['buying', 'maint', 'lug_boot', 'lug_boot', 'safety', 'Target'])
# df = df.map({'small':1, 'med':2, 'big':3, 'vhigh':4,  'high':3, 'med':2, 'low':1, '5more':6, 'more':5, 'unacc':1, 'acc':2, 'good':3, 'vgood':4})

df = df.replace('small',1)
df = df.replace('med', 2)
df = df.replace('big', 3)
df = df.replace('vhigh', 4)
df = df.replace('high', 3)
df = df.replace('med', 2)
df = df.replace('low', 1)
df = df.replace('5more', 6)
df = df.replace('more', 5)
df = df.replace('unacc', 1)
df = df.replace('acc', 2)
df = df.replace('good', 3)
df = df.replace('vgood', 4)

In [131]:
df.head()

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,Target
0,4,4,2,2,1,1,1
1,4,4,2,2,1,2,1
2,4,4,2,2,1,3,1
3,4,4,2,2,2,1,1
4,4,4,2,2,2,2,1


In [132]:
y = df['Target']
x = df.drop('Target',1)

In [133]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 0)

In [134]:
X_train_log = preprocessing.normalize(X_train, norm='l2')

In [150]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
logreg = LogisticRegression(random_state = 0).fit(X_train_log, y_train)

In [136]:
y_pred = logreg.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(accuracy)

0.22832369942196531


In [156]:
from sklearn.svm import SVC
clf = SVC(C = 100, gamma=1, kernel='rbf', decision_function_shape="ovr")
clf.fit(X_train,y_train)

SVC(C=100, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=1, kernel='rbf', max_iter=-1,
    probability=False, random_state=None, shrinking=True, tol=0.001,
    verbose=False)

In [157]:
y_pred_svm = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred_svm)
print(accuracy)

0.9797687861271677


In [158]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test,y_pred_svm)

array([[239,   1,   0,   0],
       [  1,  76,   2,   0],
       [  0,   2,  15,   0],
       [  0,   1,   0,   9]], dtype=int64)

In [151]:
print(classification_report(y_test, y_pred_svm))

              precision    recall  f1-score   support

           1       1.00      1.00      1.00       240
           2       0.95      0.96      0.96        79
           3       0.88      0.88      0.88        17
           4       1.00      0.90      0.95        10

    accuracy                           0.98       346
   macro avg       0.96      0.94      0.95       346
weighted avg       0.98      0.98      0.98       346

