In [46]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, classification_report
import seaborn as sns
import matplotlib.pyplot as plt

In [47]:
# 데이터 로드
file_path = "C:/AI_File/car_evaluation.csv"   # 데이터 파일 경로
df = pd.read_csv(file_path, header = None)
display(df)

Unnamed: 0,0,1,2,3,4,5,6
0,vhigh,vhigh,2,2,small,low,unacc
1,vhigh,vhigh,2,2,small,med,unacc
2,vhigh,vhigh,2,2,small,high,unacc
3,vhigh,vhigh,2,2,med,low,unacc
4,vhigh,vhigh,2,2,med,med,unacc
...,...,...,...,...,...,...,...
1723,low,low,5more,more,med,med,good
1724,low,low,5more,more,med,high,vgood
1725,low,low,5more,more,big,low,unacc
1726,low,low,5more,more,big,med,good


In [48]:
df.columns=['price', 'maint', 'doors', 'persons', 'lug_capacity', 'safety',
       'output']

In [49]:
df.columns

Index(['price', 'maint', 'doors', 'persons', 'lug_capacity', 'safety',
       'output'],
      dtype='object')

In [50]:
# 결측치 확인
print("\n 결측값 확인:")
print(df.isnull().sum())


 결측값 확인:
price           0
maint           0
doors           0
persons         0
lug_capacity    0
safety          0
output          0
dtype: int64


In [51]:


# LabelEncoder 초기화

label_encoder = LabelEncoder()

# 특정 컬럼만 인코딩

columns_to_encode = ['price', 'maint', 'doors', 'persons', 'lug_capacity', 'safety',
       'output']  # 인코딩할 컬럼 리스트

for column in columns_to_encode:

    df[column] = label_encoder.fit_transform(df[column])

display(df)

Unnamed: 0,price,maint,doors,persons,lug_capacity,safety,output
0,3,3,0,0,2,1,2
1,3,3,0,0,2,2,2
2,3,3,0,0,2,0,2
3,3,3,0,0,1,1,2
4,3,3,0,0,1,2,2
...,...,...,...,...,...,...,...
1723,1,1,3,2,1,2,1
1724,1,1,3,2,1,0,3
1725,1,1,3,2,0,1,2
1726,1,1,3,2,0,2,1


In [53]:
df['output'].value_counts()

2    1210
0     384
1      69
3      65
Name: output, dtype: int64

In [54]:

X = df.drop('output', axis=1).values
y = df['output'].values

In [55]:
X

array([[3, 3, 0, 0, 2, 1],
       [3, 3, 0, 0, 2, 2],
       [3, 3, 0, 0, 2, 0],
       ...,
       [1, 1, 3, 2, 0, 1],
       [1, 1, 3, 2, 0, 2],
       [1, 1, 3, 2, 0, 0]])

In [56]:
y

array([2, 2, 2, ..., 2, 1, 3])

In [57]:

# 데이터 분할 (80% 훈련, 20% 테스트)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# 표준화 (Scaling)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [58]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((1382, 6), (346, 6), (1382,), (346,))

In [59]:
from sklearn.tree import DecisionTreeClassifier

dt_model = DecisionTreeClassifier(random_state=42)
dt_model.fit(X_train, y_train)
dt_pred = dt_model.predict(X_test)

print("\nDecision Tree Model")
print(f"Accuracy: {accuracy_score(y_test, dt_pred):.4f}")
print("Classification Report:")
print(classification_report(y_test, dt_pred))
print("- Confusion Matrix -")
print (confusion_matrix(y_test, dt_pred))


Decision Tree Model
Accuracy: 0.9769
Classification Report:
              precision    recall  f1-score   support

           0       0.97      0.92      0.95        79
           1       0.81      1.00      0.89        17
           2       0.99      1.00      0.99       240
           3       1.00      0.90      0.95        10

    accuracy                           0.98       346
   macro avg       0.94      0.95      0.95       346
weighted avg       0.98      0.98      0.98       346

- Confusion Matrix -
[[ 73   4   2   0]
 [  0  17   0   0]
 [  1   0 239   0]
 [  1   0   0   9]]


In [60]:
from sklearn.ensemble import RandomForestClassifier

rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train, y_train)
rf_pred = rf_model.predict(X_test)

print("\nRandom Forest Model")
print(f"Accuracy: {accuracy_score(y_test, rf_pred):.4f}")
print("Classification Report:")
print(classification_report(y_test, rf_pred))
print("- Confusion Matrix -")
print (confusion_matrix(y_test, rf_pred))


Random Forest Model
Accuracy: 0.9711
Classification Report:
              precision    recall  f1-score   support

           0       0.94      0.95      0.94        79
           1       0.88      0.82      0.85        17
           2       1.00      0.99      0.99       240
           3       0.82      0.90      0.86        10

    accuracy                           0.97       346
   macro avg       0.91      0.92      0.91       346
weighted avg       0.97      0.97      0.97       346

- Confusion Matrix -
[[ 75   2   1   1]
 [  2  14   0   1]
 [  2   0 238   0]
 [  1   0   0   9]]


In [61]:
from sklearn.svm import SVC

svm_model = SVC()
svm_model.fit(X_train, y_train)
svm_pred = svm_model.predict(X_test)

print("\nSupport Vector Machine Model")
print(f"Accuracy: {accuracy_score(y_test, svm_pred):.4f}")
print("Classification Report:")
print(classification_report(y_test, svm_pred))
print("- Confusion Matrix -")
print (confusion_matrix(y_test, svm_pred))


Support Vector Machine Model
Accuracy: 0.9191
Classification Report:
              precision    recall  f1-score   support

           0       0.77      0.95      0.85        79
           1       1.00      0.47      0.64        17
           2       0.99      0.95      0.97       240
           3       0.70      0.70      0.70        10

    accuracy                           0.92       346
   macro avg       0.87      0.77      0.79       346
weighted avg       0.93      0.92      0.92       346

- Confusion Matrix -
[[ 75   0   3   1]
 [  7   8   0   2]
 [ 12   0 228   0]
 [  3   0   0   7]]


In [62]:
from sklearn.linear_model import LogisticRegression


lr_model = LogisticRegression(max_iter=1000)
lr_model.fit(X_train, y_train)
lr_pred = lr_model.predict(X_test)

print("\nLogistic Regression Model")
print(f"Accuracy: {accuracy_score(y_test, lr_pred):.4f}")
print("Classification Report:")
print(classification_report(y_test, lr_pred))
print("- Confusion Matrix -")
print (confusion_matrix(y_test, lr_pred))


Logistic Regression Model
Accuracy: 0.6503
Classification Report:
              precision    recall  f1-score   support

           0       0.26      0.11      0.16        79
           1       0.00      0.00      0.00        17
           2       0.71      0.89      0.79       240
           3       0.22      0.20      0.21        10

    accuracy                           0.65       346
   macro avg       0.30      0.30      0.29       346
weighted avg       0.56      0.65      0.59       346

- Confusion Matrix -
[[  9   0  67   3]
 [  1   0  16   0]
 [ 22   0 214   4]
 [  2   0   6   2]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [63]:
from sklearn.neighbors import KNeighborsClassifier
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

knn_model = KNeighborsClassifier()
knn_model.fit(X_train, y_train)
knn_pred = knn_model.predict(X_test)

print("\nK-Nearest Neighbors Model")
print(f"Accuracy: {accuracy_score(y_test, knn_pred):.4f}")
print("Classification Report:")
print(classification_report(y_test, knn_pred))
print("- Confusion Matrix -")
print (confusion_matrix(y_test, knn_pred))


K-Nearest Neighbors Model
Accuracy: 0.9422
Classification Report:
              precision    recall  f1-score   support

           0       0.85      0.91      0.88        79
           1       0.89      0.47      0.62        17
           2       0.98      0.99      0.98       240
           3       1.00      0.90      0.95        10

    accuracy                           0.94       346
   macro avg       0.93      0.82      0.86       346
weighted avg       0.94      0.94      0.94       346

- Confusion Matrix -
[[ 72   1   6   0]
 [  9   8   0   0]
 [  3   0 237   0]
 [  1   0   0   9]]
