In [1]:
import pandas as pd
import numpy as np
import xgboost as xgb
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score

In [2]:
dataset = pd.read_csv("laptop_data.csv")
dataset.head()

Unnamed: 0.1,Unnamed: 0,Company,TypeName,Inches,ScreenResolution,Cpu,Ram,Memory,Gpu,OpSys,Weight,Price
0,0,Apple,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 2.3GHz,8GB,128GB SSD,Intel Iris Plus Graphics 640,macOS,1.37kg,71378.6832
1,1,Apple,Ultrabook,13.3,1440x900,Intel Core i5 1.8GHz,8GB,128GB Flash Storage,Intel HD Graphics 6000,macOS,1.34kg,47895.5232
2,2,HP,Notebook,15.6,Full HD 1920x1080,Intel Core i5 7200U 2.5GHz,8GB,256GB SSD,Intel HD Graphics 620,No OS,1.86kg,30636.0
3,3,Apple,Ultrabook,15.4,IPS Panel Retina Display 2880x1800,Intel Core i7 2.7GHz,16GB,512GB SSD,AMD Radeon Pro 455,macOS,1.83kg,135195.336
4,4,Apple,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 3.1GHz,8GB,256GB SSD,Intel Iris Plus Graphics 650,macOS,1.37kg,96095.808


In [3]:
dataset.isnull().sum()

Unnamed: 0          0
Company             0
TypeName            0
Inches              0
ScreenResolution    0
Cpu                 0
Ram                 0
Memory              0
Gpu                 0
OpSys               0
Weight              0
Price               0
dtype: int64

In [4]:
dataset.info

<bound method DataFrame.info of       Unnamed: 0 Company            TypeName  Inches  \
0              0   Apple           Ultrabook    13.3   
1              1   Apple           Ultrabook    13.3   
2              2      HP            Notebook    15.6   
3              3   Apple           Ultrabook    15.4   
4              4   Apple           Ultrabook    13.3   
...          ...     ...                 ...     ...   
1298        1298  Lenovo  2 in 1 Convertible    14.0   
1299        1299  Lenovo  2 in 1 Convertible    13.3   
1300        1300  Lenovo            Notebook    14.0   
1301        1301      HP            Notebook    15.6   
1302        1302    Asus            Notebook    15.6   

                                ScreenResolution  \
0             IPS Panel Retina Display 2560x1600   
1                                       1440x900   
2                              Full HD 1920x1080   
3             IPS Panel Retina Display 2880x1800   
4             IPS Panel Retina Disp

df = pd.get_dummies(df, columns=['Company', 'TypeName', 'Inches', 'ScreenResolution', 'Cpu', 'Ram','Memory','Gpu','OpSys','Weight'])
df

In [5]:
le = LabelEncoder()
dataset['Company'] = le.fit_transform(dataset['Company'])
dataset['TypeName'] = le.fit_transform(dataset['TypeName'])
dataset['Inches'] = le.fit_transform(dataset['Inches'])
dataset['ScreenResolution'] = le.fit_transform(dataset['ScreenResolution'])
dataset['Cpu'] = le.fit_transform(dataset['Cpu'])
dataset['Ram'] = le.fit_transform(dataset['Ram'])
dataset['Memory'] = le.fit_transform(dataset['Memory'])
dataset['Gpu'] = le.fit_transform(dataset['Gpu'])
dataset['OpSys'] = le.fit_transform(dataset['OpSys'])
dataset['Weight'] = le.fit_transform(dataset['Weight'])
dataset.head()

Unnamed: 0.1,Unnamed: 0,Company,TypeName,Inches,ScreenResolution,Cpu,Ram,Memory,Gpu,OpSys,Weight,Price
0,0,1,4,7,23,65,8,4,58,8,38,71378.6832
1,1,1,4,7,1,63,8,2,51,8,35,47895.5232
2,2,7,3,14,8,74,8,16,53,4,74,30636.0
3,3,1,4,13,25,85,1,29,9,8,71,135195.336
4,4,1,4,7,23,67,8,16,59,8,38,96095.808


In [6]:
df = dataset.drop(columns=['Unnamed: 0','OpSys'])
df.head()

Unnamed: 0,Company,TypeName,Inches,ScreenResolution,Cpu,Ram,Memory,Gpu,Weight,Price
0,1,4,7,23,65,8,4,58,38,71378.6832
1,1,4,7,1,63,8,2,51,35,47895.5232
2,7,3,14,8,74,8,16,53,74,30636.0
3,1,4,13,25,85,1,29,9,71,135195.336
4,1,4,7,23,67,8,16,59,38,96095.808


In [7]:
df.isnull().sum()

Company             0
TypeName            0
Inches              0
ScreenResolution    0
Cpu                 0
Ram                 0
Memory              0
Gpu                 0
Weight              0
Price               0
dtype: int64

In [8]:
X = df.drop(columns=['Price'])
X.head()

Unnamed: 0,Company,TypeName,Inches,ScreenResolution,Cpu,Ram,Memory,Gpu,Weight
0,1,4,7,23,65,8,4,58,38
1,1,4,7,1,63,8,2,51,35
2,7,3,14,8,74,8,16,53,74
3,1,4,13,25,85,1,29,9,71
4,1,4,7,23,67,8,16,59,38


In [9]:
y = df['Price']
y.head()

0     71378.6832
1     47895.5232
2     30636.0000
3    135195.3360
4     96095.8080
Name: Price, dtype: float64

In [10]:
X_train,X_test,y_train, y_test = train_test_split(X,y,test_size=0.2, random_state=2)

# Linear Regression

In [21]:
model = LinearRegression()
model.fit(X_train, y_train)

In [22]:
pred = model.predict(X_test)
model.score(X_test,y_test)

0.46409007576015016

In [23]:
model.predict([[1,4,7,23,65,8,4,58,38]])



array([51102.70692138])

In [24]:
mse = mean_squared_error(y_test, pred)
r2 = r2_score(y_test, pred)

print("Mean Squared Error:", mse)
print("R-squared:", r2)

Mean Squared Error: 567499279.7791002
R-squared: 0.46409007576015016


# K Nearest Neighbour(KNN)

In [29]:
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(X_train)
x_test_scaled = scaler.transform(X_test)

k = 3  
knn = KNeighborsRegressor(n_neighbors=k)
knn.fit(x_train_scaled, y_train)

y_pred = knn.predict(x_test_scaled)

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Mean Squared Error:", mse)
print("R-squared:", r2)


Mean Squared Error: 319055408.8391647
R-squared: 0.6987045339584094


In [30]:
knn.predict([[1,4,13,25,85,1,29,9,71]])

array([188420.4576])

# Gradient Boosting Regressors (e.g., XGBoost)

In [149]:
xg = xgb.XGBRegressor(
    objective='reg:squarederror',  
    n_estimators=150,             
    learning_rate=1,            
    max_depth=3                   
)

xg.fit(X_train, y_train)
y_pred = xg.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Mean Squared Error:", mse)
print("R-squared:", r2)

Mean Squared Error: 163067079.13041538
R-squared: 0.8460099084939616


In [150]:
predict = np.array([[1, 4, 13, 25, 85, 1, 29, 9, 71]]) 
predicted_price = xg.predict(predict)
print("Predicted Price:", predicted_price[0])

Predicted Price: 135854.84


In [151]:
predict = np.array([[1,4,7,23,65,8,4,58,38]])

predicted_price = xg.predict(predict)
print("Predicted Price:", predicted_price[0])

Predicted Price: 75392.76


In [152]:
diff = y - predicted_price
diff.head()

0    -4014.074613
1   -27497.234612
2   -44756.757812
3    59802.578187
4    20703.050188
Name: Price, dtype: float64