In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

file_path = "mobile.csv"
df = pd.read_csv(file_path)

In [2]:
# 데이터프레임 확인
df.head()

Unnamed: 0,battery_power,blue,clock_speed,dual_sim,fc,four_g,int_memory,m_dep,mobile_wt,n_cores,...,px_height,px_width,ram,sc_h,sc_w,talk_time,three_g,touch_screen,wifi,price_range
0,842,0,2.2,0,1,0,7,0.6,188,2,...,20,756,2549,9,7,19,0,0,1,1
1,1021,1,0.5,1,0,1,53,0.7,136,3,...,905,1988,2631,17,3,7,1,1,0,2
2,563,1,0.5,1,2,1,41,0.9,145,5,...,1263,1716,2603,11,2,9,1,1,0,2
3,615,1,2.5,0,0,0,10,0.8,131,6,...,1216,1786,2769,16,8,11,1,0,0,2
4,1821,1,1.2,0,13,1,44,0.6,141,2,...,1208,1212,1411,8,2,15,1,1,0,1


In [3]:
df.columns

Index(['battery_power', 'blue', 'clock_speed', 'dual_sim', 'fc', 'four_g',
       'int_memory', 'm_dep', 'mobile_wt', 'n_cores', 'pc', 'px_height',
       'px_width', 'ram', 'sc_h', 'sc_w', 'talk_time', 'three_g',
       'touch_screen', 'wifi', 'price_range'],
      dtype='object')

In [4]:
# 데이터 분할
X = df.drop("price_range", axis=1)
y = df["price_range"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [5]:
def evaluate(model, X_test, y_test):
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    print(f"{model.__class__} Accuracy: {acc:.4f}")
    print(classification_report(y_test, y_pred))

In [6]:
# 결정 트리
dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train, y_train)
evaluate(dt, X_test, y_test)

<class 'sklearn.tree._classes.DecisionTreeClassifier'> Accuracy: 0.8300
              precision    recall  f1-score   support

           0       0.88      0.92      0.90       100
           1       0.78      0.74      0.76       100
           2       0.75      0.80      0.77       100
           3       0.92      0.86      0.89       100

    accuracy                           0.83       400
   macro avg       0.83      0.83      0.83       400
weighted avg       0.83      0.83      0.83       400



In [7]:
# 랜덤 포레스트
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
evaluate(rf, X_test, y_test)

<class 'sklearn.ensemble._forest.RandomForestClassifier'> Accuracy: 0.8800
              precision    recall  f1-score   support

           0       0.95      0.96      0.96       100
           1       0.82      0.83      0.83       100
           2       0.81      0.80      0.80       100
           3       0.94      0.93      0.93       100

    accuracy                           0.88       400
   macro avg       0.88      0.88      0.88       400
weighted avg       0.88      0.88      0.88       400



In [8]:
# 서포트 벡터 머신
svm = SVC(kernel='linear', random_state=42)
svm.fit(X_train, y_train)
evaluate(svm, X_test, y_test)

<class 'sklearn.svm._classes.SVC'> Accuracy: 0.9825
              precision    recall  f1-score   support

           0       0.99      1.00      1.00       100
           1       0.99      0.97      0.98       100
           2       0.95      0.99      0.97       100
           3       1.00      0.97      0.98       100

    accuracy                           0.98       400
   macro avg       0.98      0.98      0.98       400
weighted avg       0.98      0.98      0.98       400



In [9]:
# 로지스틱 회귀
lr = LogisticRegression(max_iter=2000, random_state=42)
lr.fit(X_train, y_train)
evaluate(lr, X_test, y_test)

<class 'sklearn.linear_model._logistic.LogisticRegression'> Accuracy: 0.6725
              precision    recall  f1-score   support

           0       0.90      0.85      0.88       100
           1       0.62      0.58      0.60       100
           2       0.48      0.58      0.52       100
           3       0.75      0.68      0.71       100

    accuracy                           0.67       400
   macro avg       0.69      0.67      0.68       400
weighted avg       0.69      0.67      0.68       400



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
