### 수업 시작 전 실습(car_evaluation.csv의 결측치 확인, 레이블링, 5가지 분류)

In [1]:
import pandas as pd
import requests
from io import StringIO
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
import random

In [2]:
car_local = "C:/AI_Dataset/car_evaluation.csv"
car_local_df = pd.read_csv(car_local)
print(car_local_df.head())

   vhigh vhigh.1  2 2.1  small   low  unacc
0  vhigh   vhigh  2   2  small   med  unacc
1  vhigh   vhigh  2   2  small  high  unacc
2  vhigh   vhigh  2   2    med   low  unacc
3  vhigh   vhigh  2   2    med   med  unacc
4  vhigh   vhigh  2   2    med  high  unacc


In [3]:
# 결측치 확인
print(car_local_df.isnull().sum())
# 결측치 없음

vhigh      0
vhigh.1    0
2          0
2.1        0
small      0
low        0
unacc      0
dtype: int64


In [4]:
# 데이터 레이블링
encoder = LabelEncoder()
for col in car_local_df.select_dtypes(include=['object']).columns:  # 변환할 범주형 컬럼 리스트
    car_local_df[col] = encoder.fit_transform(car_local_df[col])
print(car_local_df.head())

   vhigh  vhigh.1  2  2.1  small  low  unacc
0      3        3  0    0      2    2      2
1      3        3  0    0      2    0      2
2      3        3  0    0      1    1      2
3      3        3  0    0      1    2      2
4      3        3  0    0      1    0      2


In [5]:
# X_train, y_train, X_test, y_test 분리
X = car_local_df.iloc[:, :-1]
y = car_local_df.iloc[:, -1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
# 모델 학습 및 평가

# 모델 리스트
models = {
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier(),
    "Support Vector Machine": SVC(),
    "Logistic Regression": LogisticRegression(),
    "K-Nearest Neighbors": KNeighborsClassifier()
}

# 모델 학습 및 평가
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    cm = confusion_matrix(y_test, y_pred)
    print(f"{name} Accuracy: {acc:.4f}")
    print(f"{name} Confusion Matrix:\n{cm}\n")

Decision Tree Accuracy: 0.9711
Decision Tree Confusion Matrix:
[[ 73   2   0   2]
 [  2  12   0   1]
 [  1   0 236   0]
 [  0   2   0  15]]

Random Forest Accuracy: 0.9624
Random Forest Confusion Matrix:
[[ 72   1   3   1]
 [  1  10   0   4]
 [  1   0 236   0]
 [  2   0   0  15]]

Support Vector Machine Accuracy: 0.8988
Support Vector Machine Confusion Matrix:
[[ 61   0  16   0]
 [ 11   2   0   2]
 [  1   0 236   0]
 [  5   0   0  12]]

Logistic Regression Accuracy: 0.6850
Logistic Regression Confusion Matrix:
[[ 12   0  64   1]
 [  2   0  13   0]
 [ 13   0 224   0]
 [ 11   0   5   1]]

K-Nearest Neighbors Accuracy: 0.8757
K-Nearest Neighbors Confusion Matrix:
[[ 54   1  22   0]
 [ 12   2   1   0]
 [  0   0 237   0]
 [  6   0   1  10]]

