In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report

import requests
from io import StringIO

# GitHub의 raw 파일 URL
url = "https://raw.githubusercontent.com/MyungKyuYi/AI-class/main/mobile.csv"

# 파일 다운로드
response = requests.get(url)
response.raise_for_status()  # 요청이 성공했는지 확인

# 문자열 형태로 변환 후 DataFrame으로 로드
data = StringIO(response.text)
df = pd.read_csv(data)

In [2]:
df.columns

Index(['battery_power', 'blue', 'clock_speed', 'dual_sim', 'fc', 'four_g',
       'int_memory', 'm_dep', 'mobile_wt', 'n_cores', 'pc', 'px_height',
       'px_width', 'ram', 'sc_h', 'sc_w', 'talk_time', 'three_g',
       'touch_screen', 'wifi', 'price_range'],
      dtype='object')

In [3]:
# 데이터 분할
X = df.drop("price_range", axis=1)
y = df["price_range"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [4]:
def evaluate(model, X_test, y_test):
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    print(f"{model.__class__} Accuracy: {acc:.4f}")
    print(classification_report(y_test, y_pred))

In [5]:
# 결정 트리
dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train, y_train)
evaluate(dt, X_test, y_test)

<class 'sklearn.tree._classes.DecisionTreeClassifier'> Accuracy: 0.8300
              precision    recall  f1-score   support

           0       0.88      0.92      0.90       100
           1       0.78      0.74      0.76       100
           2       0.75      0.80      0.77       100
           3       0.92      0.86      0.89       100

    accuracy                           0.83       400
   macro avg       0.83      0.83      0.83       400
weighted avg       0.83      0.83      0.83       400



In [6]:
# 랜덤 포레스트
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
evaluate(rf, X_test, y_test)

<class 'sklearn.ensemble._forest.RandomForestClassifier'> Accuracy: 0.8800
              precision    recall  f1-score   support

           0       0.95      0.96      0.96       100
           1       0.82      0.83      0.83       100
           2       0.81      0.80      0.80       100
           3       0.94      0.93      0.93       100

    accuracy                           0.88       400
   macro avg       0.88      0.88      0.88       400
weighted avg       0.88      0.88      0.88       400



In [7]:
# 데이터 정규화
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [8]:
# 서포트 벡터 머신
svm = SVC(kernel='linear', random_state=42)
svm.fit(X_train, y_train)
evaluate(svm, X_test, y_test)

<class 'sklearn.svm._classes.SVC'> Accuracy: 0.9625
              precision    recall  f1-score   support

           0       0.99      0.96      0.97       100
           1       0.95      0.95      0.95       100
           2       0.95      0.95      0.95       100
           3       0.96      0.99      0.98       100

    accuracy                           0.96       400
   macro avg       0.96      0.96      0.96       400
weighted avg       0.96      0.96      0.96       400



In [9]:
# 로지스틱 회귀
lr = LogisticRegression(max_iter=2000, random_state=42)
lr.fit(X_train, y_train)
evaluate(lr, X_test, y_test)

<class 'sklearn.linear_model._logistic.LogisticRegression'> Accuracy: 0.9625
              precision    recall  f1-score   support

           0       0.99      0.98      0.98       100
           1       0.96      0.96      0.96       100
           2       0.95      0.93      0.94       100
           3       0.95      0.98      0.97       100

    accuracy                           0.96       400
   macro avg       0.96      0.96      0.96       400
weighted avg       0.96      0.96      0.96       400



In [14]:
# 최적의 k값 찾기
k_values = range(1, 99)
accuracies = []

for k in k_values:
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(X_train, y_train)
    y_pred = knn.predict(X_test)
    accuracies.append(accuracy_score(y_test, y_pred))

# 최적의 k값 찾기
best_k = k_values[np.argmax(accuracies)]
print(f'최적의 k 값: {best_k}')

# KNN 분류 모델 생성
knn = KNeighborsClassifier(n_neighbors=best_k)
knn.fit(X_train, y_train)
evaluate(knn, X_test, y_test)

최적의 k 값: 64
<class 'sklearn.neighbors._classification.KNeighborsClassifier'> Accuracy: 0.7000
              precision    recall  f1-score   support

           0       0.77      0.82      0.79       100
           1       0.63      0.59      0.61       100
           2       0.60      0.68      0.64       100
           3       0.83      0.71      0.76       100

    accuracy                           0.70       400
   macro avg       0.71      0.70      0.70       400
weighted avg       0.71      0.70      0.70       400

