In [None]:
# Importing necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [None]:
# Reading csv file
laptop = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/Laptop Price Prediction/laptop.csv")

In [None]:
# Fetching first five rows
laptop.head()

In [None]:
# Fetching information about data
laptop.info()

In [None]:
# Fetching statistical data
laptop.describe()

In [None]:
# Fetching column names
laptop.columns

# Exploratory Data Analysis

In [None]:
# Unique company names and their counts
laptop['Company'].value_counts()

In [None]:
# Company column analysis

company_counts = laptop['Company'].value_counts()

plt.figure(figsize=(8,6))
plt.pie(company_counts, labels=company_counts.index, autopct='%1.1f%%')
plt.title('Company Distribution')
plt.show()


In [None]:
# comparing company and their average prices

import matplotlib.pyplot as plt
sns.barplot(x = 'Company', y = 'Price_euros', data = laptop, hue = 'Company', palette = 'magma')
plt.xticks(rotation = 90)
plt.show()


In [None]:
# Unique TypeNames and their counts
laptop['TypeName'].value_counts()

In [None]:
# TypeName column analysis

type_counts = laptop['TypeName'].value_counts()

plt.figure(figsize=(8,6))
plt.pie(type_counts, labels=type_counts.index, autopct='%1.1f%%')
plt.title('TypeName Distribution')
plt.show()


In [None]:
# comparing typename and average price_euros

import matplotlib.pyplot as plt
sns.barplot(x = 'TypeName', y = 'Price_euros', data = laptop, hue = 'TypeName', palette = 'magma')
plt.xticks(rotation = 90)
plt.show()


In [None]:
# Unique Inches values and their counts
laptop['Inches'].value_counts()

In [None]:
# countplot of inches column

sns.countplot(x = 'Inches', data = laptop, hue = 'Inches', palette = 'viridis')
plt.xticks(rotation = 90)
plt.show()


In [None]:
# Price density with inches

sns.jointplot(x= 'Inches', y = 'Price_euros', data = laptop, kind = 'kde', fill = True)

In [None]:
# Unique Cpu names and their counts
laptop['Cpu'].value_counts()

In [None]:
# Analyzing ram and price

sns.stripplot(x='Ram', y='Price_euros', hue = 'Ram', palette = 'magma', data=laptop)
plt.show()


# Data Preprocessing

In [None]:
# Checking null values
laptop.isnull().sum()

In [None]:
# checking duplicate values
laptop.duplicated().sum()

In [None]:
# Dropping columns which are of no use for our project
laptop.drop(['laptop_ID', 'Product'], axis = 1, inplace = True)

laptop.head()

In [None]:
# Removing kg from weight column and changing its datatype to float
laptop['Weight'] = laptop['Weight'].astype(str).str.replace('kg', '').astype('float64')

laptop.head()

In [None]:
# Weight analysis with price
sns.jointplot(x = 'Weight', y = 'Price_euros', data = laptop, kind ='kde', fill = True)

In [None]:
!pip install category_encoders

In [None]:
# target encoding on Company, TypeName, ScreenResolution, Cpu, Ram, Memory, Gpu, OpSys columns

import category_encoders as ce

target_cols = ['Company', 'TypeName', 'ScreenResolution', 'Cpu', 'Ram', 'Memory', 'Gpu', 'OpSys']

for col in target_cols:
    # Create a target encoder object
    target_encoder = ce.TargetEncoder(cols=[col])

    # Fit the encoder on the data
    target_encoder.fit(laptop[col], laptop['Price_euros'])

    # Transform the data
    laptop[col] = target_encoder.transform(laptop[col])

laptop.head()


In [None]:
# Correlation analysis
plt.figure(figsize=(12, 10))
sns.heatmap(laptop.corr(numeric_only = True) , annot = True, cmap = 'YlGnBu')

In [None]:
# Dividing the data into into input and output variables
X = laptop.drop(['Price_euros'], axis = 1)
y = laptop['Price_euros']

In [None]:
# Performing standard scaling to put all the numerical features on a similar scale
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X = sc.fit_transform(X)

In [None]:
# Dividing X and y into train-test-split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=101)

In [None]:
# Importing libraries for evaluating the model
from sklearn.metrics import mean_squared_error, r2_score

# Modeling

### GradientBoostingRegressor

In [None]:
# Training a model
from sklearn.ensemble import GradientBoostingRegressor
gb = GradientBoostingRegressor(max_depth=6, n_estimators=50)
gb.fit(X_train, y_train)

In [None]:
# Training Accuracy

# Predictions on the train det
gb_train_pred = gb.predict(X_train)

# Evaluation
gb_train_rmse = np.sqrt(mean_squared_error(y_train, gb_train_pred))
gb_train_r2 = r2_score(y_train, gb_train_pred)

print('Training RMSE:', gb_train_rmse)
print('Training R2 Score:', gb_train_r2)

In [None]:
# Testing Accuracy

# Predictions on the train det
gb_test_pred = gb.predict(X_test)

# Evaluation
gb_test_rmse = np.sqrt(mean_squared_error(y_test, gb_test_pred))
gb_test_r2 = r2_score(y_test, gb_test_pred)

print('Testing RMSE:', gb_test_rmse)
print('Testing R2 Score:', gb_test_r2)

### CatBoostRegressor

In [None]:
# Installing catboost
!!pip install catboost
from catboost import CatBoostRegressor

In [None]:
# cat boost regressor


# Define the model
cat = CatBoostRegressor(iterations= 945,
                        learning_rate=0.04,
                        depth= 7,
                        loss_function='RMSE',
                        eval_metric='RMSE',
                        l2_leaf_reg=3)

# Fit the model
cat.fit(X_train, y_train,
         eval_set=(X_test, y_test),
         verbose=200,
         plot=True)


In [None]:
# Training Accuracy

# Predictions for the train dataset
cat_train_pred = cat.predict(X_train)

# evaluation
cat_train_rmse = np.sqrt(mean_squared_error(y_train, cat_train_pred))
cat_train_r2 = r2_score(y_train, cat_train_pred)

print('Training RMSE:', cat_train_rmse)
print('Training r2_score:', cat_train_r2)

In [None]:
# Testing Accuracy

# Predictions on the train det
cat_test_pred = cat.predict(X_test)

# Evaluation
cat_test_rmse = np.sqrt(mean_squared_error(y_test, cat_test_pred))
cat_test_r2 = r2_score(y_test, cat_test_pred)

print('Testing RMSE:', cat_test_rmse)
print('Testing R2 Score:', cat_test_r2)

# Result analysis

In [None]:
# Result Analysis of training data

# Create a figure with 3 rows and 2 columns
fig, axes = plt.subplots(2, 2, figsize=(20, 15))

# Scatter plot for Gradient Boosting Regressor
axes[0, 0].scatter(y_train, gb_train_pred, marker = '*' , c = 'orange', label = 'Gradient Boosting Regressor - Train')
axes[0, 0].set_title('Gradient Boosting Regressor - Train')
axes[0, 0].set_xlabel('Actual')
axes[0, 0].set_ylabel('Predicted')
axes[0, 0].legend()

# Regression line for Gradient Boosting Regressor
m, b = np.polyfit(y_train, gb_train_pred, 1)
axes[0, 0].plot(y_train, m * y_train + b, linestyle='--', color='black', linewidth=2)

# Scatter plot for Cat Boost Regressor
axes[1, 0].scatter(y_train, cat_train_pred, marker = '*' , c = 'orange', label = 'cat Boost Regressor - Train')
axes[1, 0].set_title('cat Boost Regressor - Train')
axes[1, 0].set_xlabel('Actual')
axes[1, 0].set_ylabel('Predicted')
axes[1, 0].legend()

# Regression line for Cat Boost Regressor
m, b = np.polyfit(y_train, cat_train_pred, 1)
axes[1, 0].plot(y_train, m * y_train + b, linestyle='--', color='black', linewidth=2)

# Show the plot
plt.show()


In [None]:
# Result Analysis of Testing Data

# Create a figure with 3 rows and 2 columns
fig, axes = plt.subplots(2, 2, figsize=(20, 15))

# Scatter plot for Gradient Boosting Regressor
axes[0, 0].scatter(y_test, gb_test_pred, marker = '*' , c = 'orange', label = 'Gradient Boosting Regressor - Test')
axes[0, 0].set_title('Gradient Boosting Regressor - Test')
axes[0, 0].set_xlabel('Actual')
axes[0, 0].set_ylabel('Predicted')
axes[0, 0].legend()

# Regression line for Gradient Boosting Regressor
m, b = np.polyfit(y_test, gb_test_pred, 1)
axes[0, 0].plot(y_test, m * y_test + b, linestyle='--', color='black', linewidth=2)


# Scatter plot for Cat Boost Regressor
axes[1, 0].scatter(y_test, cat_test_pred, marker = '*' , c = 'orange', label = 'Cat Boost Regressor - Test')
axes[1, 0].set_title('Cat Boost Regressor - Test')
axes[1, 0].set_xlabel('Actual')
axes[1, 0].set_ylabel('Predicted')
axes[1, 0].legend()

# Regression line for  Cat Boost Regressor
m, b = np.polyfit(y_test, cat_test_pred, 1)
axes[1, 0].plot(y_test, m * y_test + b, linestyle='--', color='black', linewidth=2)

# Show the plot
plt.show()
