In [64]:
import pandas as pd

In [65]:
data = pd.read_csv('/Users/sangji/Documents/grad/data-science-computing/week7_to_15/week13/laptop_price.csv', encoding='latin1')
data.head()

Unnamed: 0,laptop_ID,Company,Product,TypeName,Inches,ScreenResolution,Cpu,Ram,Memory,Gpu,OpSys,Weight,Price_euros
0,1,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 2.3GHz,8GB,128GB SSD,Intel Iris Plus Graphics 640,macOS,1.37kg,1339.69
1,2,Apple,Macbook Air,Ultrabook,13.3,1440x900,Intel Core i5 1.8GHz,8GB,128GB Flash Storage,Intel HD Graphics 6000,macOS,1.34kg,898.94
2,3,HP,250 G6,Notebook,15.6,Full HD 1920x1080,Intel Core i5 7200U 2.5GHz,8GB,256GB SSD,Intel HD Graphics 620,No OS,1.86kg,575.0
3,4,Apple,MacBook Pro,Ultrabook,15.4,IPS Panel Retina Display 2880x1800,Intel Core i7 2.7GHz,16GB,512GB SSD,AMD Radeon Pro 455,macOS,1.83kg,2537.45
4,5,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 3.1GHz,8GB,256GB SSD,Intel Iris Plus Graphics 650,macOS,1.37kg,1803.6


In [66]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import time

In [67]:
# Price_euros 컬럼 값을 이용하여 Target 컬럼 생성
# Price_euros 컬럼 값이 600 이상 700 이하인 경우 1, 아닌 경우 0을 Target 컬럼에 할당
data['Target'] = data['Price_euros'].apply(lambda x: 1 if 600 <= x <= 700 else 0)

# feature, target 분리
X = data.drop(columns=['laptop_ID', 'Price_euros', 'Target']) # laptop_ID, Price_euros, Target 제외한 나머지 컬럼을 사용하여 모델을 학습시킴
y = data['Target']

In [68]:

label_encoders = {}
for column in X.select_dtypes(include=['object']).columns: # data에서 object 타입인 컬럼들만 추출
    le = LabelEncoder()                                    # 각 object 타입 컬럼에 대해, LabelEncoder 객체 생성
    X[column] = le.fit_transform(X[column])                # 해당 컬럼의 문자열 값을 LabelEncoder를 이용하여 고유 숫자로 변환하고 # 변환된 값을 데이터 X의 해당 컬럼에 할당
    # 예를 들어 ['Apple', 'Dell', 'HP', 'Lenovo'] 라는 값이 ['0', '1', '2', '3']으로 변환됨
    label_encoders[column] = le                            # 각 컬럼에 사용된 LabelEncoder 객체를 label_encoders 딕셔너리에 저장

In [69]:
# train, test 데이터 분리
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [70]:

start_time = time.time()
# 모델 생성
model = RandomForestClassifier(random_state=42)
# 모델 학습
model.fit(X_train, y_train)
# 학습 시간 측정
training_time = time.time() - start_time

In [71]:
# 학습된 모델을 이용하여 test 데이터에 대한 예측값 생성
y_pred = model.predict(X_test)
# 정확도 계산
accuracy = accuracy_score(y_test, y_pred)
# classification_report 생성
report = classification_report(y_test, y_pred)

In [72]:

# 학습 시간, 정확도, classification_report 출력
print(f"Training time: {training_time:.2f} seconds")
print(f"Accuracy: {accuracy:.2%}")
print("Classification Report:")
print(report)

Training time: 0.09 seconds
Accuracy: 93.10%
Classification Report:
              precision    recall  f1-score   support

           0       0.93      1.00      0.96       242
           1       0.67      0.11      0.18        19

    accuracy                           0.93       261
   macro avg       0.80      0.55      0.57       261
weighted avg       0.91      0.93      0.91       261

