Mobile Phone Price Prediction Project

In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from google.colab import drive

In [2]:
drive.mount('/content/drive')


Mounted at /content/drive


In [3]:
df = pd.read_csv('/content/drive/MyDrive/Cellphone.csv')  # Update path accordingly


In [4]:
# Drop non-feature columns
df = df.drop(['Product_id', 'Sale'], axis=1)

In [5]:
df.columns = df.columns.str.replace(' ', '_')

In [6]:
numeric_columns = df.columns[df.columns != 'Price']
df[numeric_columns] = df[numeric_columns].apply(pd.to_numeric, errors='coerce')

In [8]:
 #Handle missing values (if any)
df.fillna(df.mean(), inplace=True)

In [9]:
# Split features and target
X = df.drop('Price', axis=1)
y = df['Price']

In [10]:
# Split train/test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [11]:
# Feature Scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [12]:
# Initialize and train model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [13]:
# Make predictions
y_pred = model.predict(X_test)

In [14]:
# Evaluate model
print("Model Evaluation:")
print(f"MAE: {mean_absolute_error(y_test, y_pred):.2f}")
print(f"MSE: {mean_squared_error(y_test, y_pred):.2f}")
print(f"RMSE: {np.sqrt(mean_squared_error(y_test, y_pred)):.2f}")
print(f"R² Score: {r2_score(y_test, y_pred):.2f}")

Model Evaluation:
MAE: 94.78
MSE: 15742.31
RMSE: 125.47
R² Score: 0.97


In [15]:
# Feature Importance
feature_importance = pd.Series(model.feature_importances_, index=X.columns)
print("\nFeature Importance:")
print(feature_importance.sort_values(ascending=False))


Feature Importance:
ram             0.319363
ppi             0.290588
internal_mem    0.154877
battery         0.059339
thickness       0.048621
cpu_freq        0.030288
RearCam         0.022790
Front_Cam       0.019584
resoloution     0.018726
cpu_core        0.017951
weight          0.017875
dtype: float64


In [16]:
def predict_price(features):
    """
    features: dictionary of feature values
    """
    df_input = pd.DataFrame([features])
    df_input = df_input[X.columns]  # Ensure correct order
    scaled_input = scaler.transform(df_input)
    return model.predict(scaled_input)[0]

In [19]:
sample_features = {
    'weight': 150,
    'resoloution': 5.5,
    'ppi': 401,
    'cpu_core': 8,
    'cpu_freq': 1.5,
    'internal_mem': 32,
    'ram': 4,
    'RearCam': 16,
    'Front_Cam': 8,
    'battery': 3000,
    'thickness': 7.6
}

In [20]:
print(f"\nPredicted Price: ${predict_price(sample_features):.2f}")



Predicted Price: $2928.71


In [21]:
sample_features = {
    'weight': 250,
    'resoloution': 8.5,
    'ppi': 301,
    'cpu_core': 9,
    'cpu_freq': 1.6,
    'internal_mem': 128,
    'ram': 16,
    'RearCam': 16,
    'Front_Cam': 8,
    'battery': 3000,
    'thickness': 7.6
}
print(f"\nPredicted Price: ${predict_price(sample_features):.2f}")



Predicted Price: $3394.57
