In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.neighbors import KNeighborsClassifier
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder

In [2]:
df = pd.read_csv(r"C:\Users\이민규\Desktop\ai_class_git\week3\car_evaluation.csv")

In [3]:
df

Unnamed: 0,vhigh,vhigh.1,2,2.1,small,low,unacc
0,vhigh,vhigh,2,2,small,med,unacc
1,vhigh,vhigh,2,2,small,high,unacc
2,vhigh,vhigh,2,2,med,low,unacc
3,vhigh,vhigh,2,2,med,med,unacc
4,vhigh,vhigh,2,2,med,high,unacc
...,...,...,...,...,...,...,...
1722,low,low,5more,more,med,med,good
1723,low,low,5more,more,med,high,vgood
1724,low,low,5more,more,big,low,unacc
1725,low,low,5more,more,big,med,good


In [4]:
df.columns

Index(['vhigh', 'vhigh.1', '2', '2.1', 'small', 'low', 'unacc'], dtype='object')

In [5]:
df['unacc'].value_counts()

unacc
unacc    1209
acc       384
good       69
vgood      65
Name: count, dtype: int64

In [6]:
df.isnull().sum()

vhigh      0
vhigh.1    0
2          0
2.1        0
small      0
low        0
unacc      0
dtype: int64

In [7]:
label_encoder = LabelEncoder()
columns_to_encode = ['vhigh', 'vhigh.1', '2', '2.1', 'small', 'low']
for column in columns_to_encode:
    df.loc[:, column] = label_encoder.fit_transform(df[column])
df

Unnamed: 0,vhigh,vhigh.1,2,2.1,small,low,unacc
0,3,3,0,0,2,2,unacc
1,3,3,0,0,2,0,unacc
2,3,3,0,0,1,1,unacc
3,3,3,0,0,1,2,unacc
4,3,3,0,0,1,0,unacc
...,...,...,...,...,...,...,...
1722,1,1,3,2,1,2,good
1723,1,1,3,2,1,0,vgood
1724,1,1,3,2,0,1,unacc
1725,1,1,3,2,0,2,good


In [8]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(df.drop('unacc', axis=1))

In [9]:
y=df['unacc']
y.value_counts()

unacc
unacc    1209
acc       384
good       69
vgood      65
Name: count, dtype: int64

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y,test_size=0.2,random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [11]:
clf_svm = SVC(random_state=0)
clf_svm.fit(X_train_scaled, y_train)

pred_svm = clf_svm.predict(X_test_scaled)

print("\n--- SVM Classifier ---")
print(accuracy_score(y_test, pred_svm))
print(confusion_matrix(y_test, pred_svm))


--- SVM Classifier ---
0.8872832369942196
[[ 69   0   8   0]
 [ 14   0   0   1]
 [ 13   0 224   0]
 [  3   0   0  14]]


In [12]:
clf_lr = LogisticRegression(random_state=42, max_iter=200)
clf_lr.fit(X_train_scaled, y_train)

pred_lr = clf_lr.predict(X_test_scaled)

print ("\n--- Logistic Regression Classifier ---")
print (accuracy_score(y_test, pred_lr))
print (confusion_matrix(y_test, pred_lr))


--- Logistic Regression Classifier ---
0.684971098265896
[[ 12   0  64   1]
 [  2   0  13   0]
 [ 13   0 224   0]
 [ 11   0   5   1]]


In [13]:
clf_dt = DecisionTreeClassifier(random_state=42)
clf_dt.fit(X_train, y_train)

pred_dt = clf_dt.predict(X_test)

print ("\n--- Decision Tree Classifier ---")
print (accuracy_score(y_test, pred_dt))
print (confusion_matrix(y_test, pred_dt))


--- Decision Tree Classifier ---
0.9710982658959537
[[ 73   2   0   2]
 [  2  12   0   1]
 [  1   0 236   0]
 [  0   2   0  15]]


In [14]:
print ("\n--- Radom Forest ---")
rf_clf = RandomForestClassifier(random_state=42)
rf_clf.fit(X_train, y_train)
pred = rf_clf.predict(X_test)
print(accuracy_score(y_test,pred))
print (confusion_matrix(y_test, pred))


--- Radom Forest ---
0.9624277456647399
[[ 72   1   3   1]
 [  2  10   0   3]
 [  1   0 236   0]
 [  2   0   0  15]]


In [15]:
print("\n--- KNN Classifier ---")
knn_clf = KNeighborsClassifier(n_neighbors=5)
knn_clf.fit(X_train, y_train)
pred_knn = knn_clf.predict(X_test)

print(accuracy_score(y_test, pred_knn))
print(confusion_matrix(y_test, pred_knn))


--- KNN Classifier ---
0.9277456647398844
[[ 73   0   4   0]
 [ 12   2   1   0]
 [  1   0 236   0]
 [  7   0   0  10]]
