In [9]:
import time
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [10]:
# Load data
data = pd.read_csv('/Users/sangji/Documents/grad/data-science-computing/week7_to_15/week13/laptop_price.csv', encoding='latin1')


In [11]:
# Preprocessing the data
# Define the target variable: 1 if 600 <= Price_euros <= 700, else 0
data['Target'] = np.where((data['Price_euros'] >= 600) & (data['Price_euros'] <= 700), 1, 0)

# Drop irrelevant columns for the model
features = data.drop(columns=['laptop_ID', 'Company', 'Product', 'ScreenResolution', 'Cpu', 'Gpu', 'OpSys', 'Price_euros', 'Target'])
features = pd.get_dummies(features)

# Extract target
target = data['Target']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train a Random Forest Classifier
model = RandomForestClassifier(random_state=42)

start_time = time.time()
model.fit(X_train, y_train)
training_time = time.time() - start_time

# Make predictions on the test set
y_pred = model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)

# Generate evaluation metrics
classification_rep = classification_report(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

# Output the results
evaluation_results = {
    "Training Time (seconds)": training_time,
    "Test Accuracy": accuracy,
    "Classification Report": classification_rep,
    "Confusion Matrix": conf_matrix
}

evaluation_results

{'Training Time (seconds)': 0.10035300254821777,
 'Test Accuracy': 0.9272030651340997,
 'Classification Report': '              precision    recall  f1-score   support\n\n           0       0.93      0.99      0.96       242\n           1       0.50      0.11      0.17        19\n\n    accuracy                           0.93       261\n   macro avg       0.72      0.55      0.57       261\nweighted avg       0.90      0.93      0.90       261\n',
 'Confusion Matrix': array([[240,   2],
        [ 17,   2]])}