In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# 1. 데이터 불러오기
url = "https://raw.githubusercontent.com/MyungKyuYi/AI-class/main/mobile.csv"
df = pd.read_csv(url)

# 2. 데이터 전처리
# X (특징), y (레이블) 분리 (마지막 컬럼이 타겟이라고 가정)
X = df.iloc[:, :-1]  # 마지막 열 제외 (특징)
y = df.iloc[:, -1]   # 마지막 열 (타겟)

# 훈련/테스트 데이터 분할 (80% 훈련, 20% 테스트)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 3. 데이터 정규화 (SVM & LR 성능 향상을 위해 필요)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 4. 모델 학습 및 평가
models = {
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier(n_estimators=100),
    "SVM": SVC(),
    "Logistic Regression": LogisticRegression(max_iter=1000)
}

# 결과 저장용 딕셔너리
results = {}

for name, model in models.items():
    if name in ["SVM", "Logistic Regression"]:
        model.fit(X_train_scaled, y_train)  # 정규화된 데이터 사용
        y_pred = model.predict(X_test_scaled)
    else:
        model.fit(X_train, y_train)  # 원본 데이터 사용
        y_pred = model.predict(X_test)
    
    acc = accuracy_score(y_test, y_pred)
    results[name] = acc
    print(f"{name} Accuracy: {acc:.4f}")

# 5. 최종 결과 출력
print("\n모델별 정확도 비교:")
for model, acc in results.items():
    print(f"{model}: {acc:.4f}")

Decision Tree Accuracy: 0.8250
Random Forest Accuracy: 0.8900
SVM Accuracy: 0.8925
Logistic Regression Accuracy: 0.9750

📊 모델별 정확도 비교:
Decision Tree: 0.8250
Random Forest: 0.8900
SVM: 0.8925
Logistic Regression: 0.9750
