In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
import joblib


In [None]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [None]:


data = pd.read_csv("/content/drive/My Drive/Sales Prediction Model/laptop_cleaned2.csv")

# Display the first few rows of the dataframe
print(data.head())


   Unnamed: 0                                               Name    Brand  \
0           0  HP Victus 15-fb0157AX Gaming Laptop (AMD Ryzen...       HP   
1           1  Lenovo V15 G4 ‎82YU00W7IN Laptop (AMD Ryzen 3 ...   Lenovo   
2           2  HP 15s-fq5007TU Laptop (12th Gen Core i3/ 8GB/...       HP   
3           3  Samsung Galaxy Book2 Pro 13 Laptop (12th Gen C...  Samsung   
4           4  Tecno Megabook T1 Laptop (11th Gen Core i3/ 8G...    Tecno   

   Price  Rating Processor_brand Processor_name Processor_variant  \
0  50399    4.30             AMD    AMD Ryzen 5             5600H   
1  26690    4.45             AMD    AMD Ryzen 3             7320U   
2  37012    4.65           Intel  Intel Core i3             1215U   
3  69990    4.75           Intel  Intel Core i5             1240P   
4  23990    4.25           Intel  Intel Core i3            1115G4   

   Processor_gen  Core_per_processor  ...           Graphics_name  \
0            5.0                 6.0  ...     AMD Rad

In [None]:
# Step 4: Check for Missing Values
print("Missing values in each column:")
print(data.isnull().sum())

# Step 5: Handle Missing Values
# Fill NaN values (e.g., with mean for numerical columns)
for col in data.select_dtypes(include=[np.number]).columns:
    data[col].fillna(data[col].mean(), inplace=True)

# For categorical columns, you can fill with mode or create a separate category
for col in data.select_dtypes(include=['object']).columns:
    data[col].fillna(data[col].mode()[0], inplace=True)

# Step 6: Define Features and Target Variable
X = data.drop('Price', axis=1)  # Adjust if another column is the target
y = data['Price']  # Update as per your dataset

# Step 7: Preprocess Data
categorical_cols = ['Brand', 'Processor_brand', 'Processor_name',
                    'Processor_variant', 'Processor_gen',
                    'RAM_type', 'Storage_type',
                    'Graphics_name', 'Graphics_brand',
                    'Operating_system']
numerical_cols = ['Rating', 'Core_per_processor',
                  'Total_processor', 'Threads',
                  'RAM_GB', 'Storage_capacity_GB',
                  'Graphics_GB', 'Display_size_inches',
                  'Horizontal_pixel', 'Vertical_pixel',
                  'ppi', 'Touch_screen']

# Define preprocessing steps
numerical_transformer = StandardScaler()
categorical_transformer = OneHotEncoder(handle_unknown='ignore')  # Update here

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_cols),
        ('cat', categorical_transformer, categorical_cols)
    ])

# Step 8: Create a Pipeline
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor(n_estimators=100, random_state=42))
])

# Step 9: Split the Data into Training and Test Sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 10: Train the Model
model.fit(X_train, y_train)

# Step 11: Make Predictions
y_pred = model.predict(X_test)

# Step 12: Evaluate Model Performance
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)

# Step 13: Save the Model and Predictions
model_path = '/content/drive/My Drive/sales_prediction_model.joblib'  # Update the path
joblib.dump(model, model_path)

# Save predictions to a CSV file
predictions = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
predictions_path = '/content/drive/My Drive/predictions.csv'  # Update the path
predictions.to_csv(predictions_path, index=False)



In [None]:
import pandas as pd
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import numpy as np

y_actual = predictions['Actual']
y_pred = predictions['Predicted']

# Step 3: Calculate Evaluation Metrics
mse = mean_squared_error(y_actual, y_pred)
mae = mean_absolute_error(y_actual, y_pred)
r2 = r2_score(y_actual, y_pred)
rmse = np.sqrt(mse)

# Step 4: Display the Results
print("Evaluation Metrics:")
print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")
print(f"R-squared (R²): {r2:.2f}")

Evaluation Metrics:
Mean Squared Error (MSE): 711737621.09
Mean Absolute Error (MAE): 12568.37
Root Mean Squared Error (RMSE): 26678.41
R-squared (R²): 0.80


In [None]:
mape = np.mean(np.abs((y_actual - y_pred) / y_actual)) * 100

# Step 4: Calculate Efficiency
efficiency = 100 - mape

# Step 5: Display the Results
print("Efficiency of Predictions:")
print(f"Efficiency: {efficiency:.2f}%")

Efficiency of Predictions:
Efficiency: 85.96%
