# Cell Phone Price Prediction
Data is sourced from https://www.kaggle.com/datasets/atefehmirnaseri/cell-phone-price

In [None]:
import numpy as np
from sklearn import set_config
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier

set_config(display="diagram")

import pandas as pd

In [None]:
train_data = pd.read_csv('module 9/cross_validation/data/CellPhone_train.csv')

In [None]:
train_data.describe()

In [None]:
train_data.info()

In [None]:
train_data.isna().mean()

In [None]:
train_data.sample(5)

In [None]:
X = train_data.drop('price_range', axis=1)
y = train_data['price_range']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=42)

## Decision Tree Classifier Cross Validation 5 folds

In [None]:
decision_tree_pipe = Pipeline([
    ('poly_features', PolynomialFeatures(include_bias=False)),
    ('scaler', StandardScaler()),
    ('decision_tree', DecisionTreeClassifier())
])
param_dict = {
    'poly_features__degree': [2, 3, 4],
    'decision_tree__max_depth': [4, 5, 6],
}

grid_decision_tree = GridSearchCV(estimator=decision_tree_pipe, param_grid=param_dict, cv=5)
grid_decision_tree.fit(X_train, y_train)

In [None]:
best_decision_tree_train_mse = accuracy_score(grid_decision_tree.best_estimator_.predict(X_train), y_train)
best_decision_tree_test_mse = accuracy_score(grid_decision_tree.best_estimator_.predict(X_test), y_test)
print(f'Train Accuracy: {best_decision_tree_train_mse}')
print(f'Test Accuracy: {best_decision_tree_test_mse}')
print(f'Best params: {grid_decision_tree.best_params_}')

Train Accuracy: 0.93375

Test Accuracy: 0.895

Best params: {'decision_tree__max_depth': 5, 'poly_features__degree': 4}

## Random Forest

In [None]:
random_forest_pipe = Pipeline([
    ('poly_features', PolynomialFeatures(include_bias=False)),
    ('scaler', StandardScaler()),
    ('random_forest', RandomForestClassifier())
])
param_dict = {
    'poly_features__degree': [2, 3, 4],
    'random_forest__max_depth': [4, 5, 6],
    'random_forest__n_estimators': [100, 200],
}

grid_random_forest = GridSearchCV(estimator=random_forest_pipe, param_grid=param_dict, cv=5)
grid_random_forest.fit(X_train, y_train)

In [None]:
best_random_forest_train_mse = accuracy_score(grid_random_forest.best_estimator_.predict(X_train), y_train)
best_random_forest_test_mse = accuracy_score(grid_random_forest.best_estimator_.predict(X_test), y_test)

print(f'Train Accuracy: {best_random_forest_train_mse}')
print(f'Test Accuracy: {best_random_forest_test_mse}')
print(f'Best params: {grid_random_forest.best_params_}')

Train Accuracy: 0.959375

Test Accuracy: 0.8925

Best params: {'poly_features__degree': 4, 'random_forest__max_depth': 6, 'random_forest__n_estimators': 100}

## C-Support Vector Classification

In [None]:
svc_pipe = Pipeline([
    ('poly_features', PolynomialFeatures(include_bias=False)),
    ('scaler', StandardScaler()),
    ('svc', SVC())
])
param_dict = {
    'poly_features__degree': [2, 3, 4],
    'svc__C': np.arange(0.01, 1, .1)
}

grid_svc = GridSearchCV(estimator=svc_pipe, param_grid=param_dict, cv=5)
grid_svc.fit(X_train, y_train)

In [None]:
best_svc_train_mse = accuracy_score(grid_svc.best_estimator_.predict(X_train), y_train)
best_svc_test_mse = accuracy_score(grid_svc.best_estimator_.predict(X_test), y_test)

print(f'Train Accuracy: {best_svc_train_mse}')
print(f'Test Accuracy: {best_svc_test_mse}')
print(f'Best params: {grid_svc.best_params_}')