In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier, export_text
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier


# 붓꽃 데이터 CSV 파일 읽기
file_path = "https://raw.githubusercontent.com/MyungKyuYi/AI-class/refs/heads/main/mobile.csv"
df = pd.read_csv(file_path)

# 데이터프레임 확인
print(df.head())
print(df.columns)


   battery_power  blue  clock_speed  dual_sim  fc  four_g  int_memory  m_dep  \
0            842     0          2.2         0   1       0           7    0.6   
1           1021     1          0.5         1   0       1          53    0.7   
2            563     1          0.5         1   2       1          41    0.9   
3            615     1          2.5         0   0       0          10    0.8   
4           1821     1          1.2         0  13       1          44    0.6   

   mobile_wt  n_cores  ...  px_height  px_width   ram  sc_h  sc_w  talk_time  \
0        188        2  ...         20       756  2549     9     7         19   
1        136        3  ...        905      1988  2631    17     3          7   
2        145        5  ...       1263      1716  2603    11     2          9   
3        131        6  ...       1216      1786  2769    16     8         11   
4        141        2  ...       1208      1212  1411     8     2         15   

   three_g  touch_screen  wifi  price_

In [3]:
# 특성과 레이블 분리
X = df.iloc[:, :-1].values  # 마지막 열 제외
y = df.iloc[:, -1].values   # 마지막 열 (클래스)

# 데이터 분할 (훈련: 80%, 테스트: 20%)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 데이터 정규화 (SVM과 로지스틱 회귀에 유용)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [4]:
# 의사결정나무(DT) 학습 및 평가
dt_model = DecisionTreeClassifier(random_state=42)
dt_model.fit(X_train, y_train)
y_pred_dt = dt_model.predict(X_test)
dt_accuracy = accuracy_score(y_test, y_pred_dt)
print(f"Decision Tree Accuracy: {dt_accuracy:.4f}")

# y_test와 y_pred 비교 (10개)
comparison_df = pd.DataFrame({'Actual': y_test[:10], 'Predicted': y_pred_dt[:10]})
print(comparison_df)

Decision Tree Accuracy: 0.8350
   Actual  Predicted
0       0          0
1       2          2
2       1          1
3       3          3
4       1          1
5       1          1
6       2          2
7       0          0
8       3          2
9       1          1


In [5]:
# 랜덤 포레스트(RF) 학습 및 평가
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_test)
rf_accuracy = accuracy_score(y_test, y_pred_rf)
print(f"Random Forest Accuracy: {rf_accuracy:.4f}")

# y_test와 y_pred 비교 (10개)
comparison_df = pd.DataFrame({'Actual': y_test[:10], 'Predicted': y_pred_dt[:10]})
print(comparison_df)

Random Forest Accuracy: 0.8925
   Actual  Predicted
0       0          0
1       2          2
2       1          1
3       3          3
4       1          1
5       1          1
6       2          2
7       0          0
8       3          2
9       1          1


In [6]:
# 서포트 벡터 머신(SVM) 학습 및 평가
svm_model = SVC()
svm_model.fit(X_train, y_train)
y_pred_svm = svm_model.predict(X_test)
svm_accuracy = accuracy_score(y_test, y_pred_svm)
print(f"SVM Accuracy: {svm_accuracy:.4f}")

# y_test와 y_pred 비교 (10개)
comparison_df = pd.DataFrame({'Actual': y_test[:10], 'Predicted': y_pred_dt[:10]})
print(comparison_df)

SVM Accuracy: 0.8925
   Actual  Predicted
0       0          0
1       2          2
2       1          1
3       3          3
4       1          1
5       1          1
6       2          2
7       0          0
8       3          2
9       1          1


In [7]:
# 로지스틱 회귀(LR) 학습 및 평가
lr_model = LogisticRegression(max_iter=200)
lr_model.fit(X_train, y_train)
y_pred_lr = lr_model.predict(X_test)
lr_accuracy = accuracy_score(y_test, y_pred_lr)
print(f"Logistic Regression Accuracy: {lr_accuracy:.4f}")

# y_test와 y_pred 비교 (10개)
comparison_df = pd.DataFrame({'Actual': y_test[:10], 'Predicted': y_pred_dt[:10]})
print(comparison_df)

Logistic Regression Accuracy: 0.9750
   Actual  Predicted
0       0          0
1       2          2
2       1          1
3       3          3
4       1          1
5       1          1
6       2          2
7       0          0
8       3          2
9       1          1
