In [105]:
import numpy as np
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler , OneHotEncoder,LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,confusion_matrix,r2_score
from sklearn.compose import ColumnTransformer
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

from sklearn.linear_model import LinearRegression
from sklearn.decomposition import PCA


In [69]:
df=pd.read_csv('Laptops.csv')
df.head()

Unnamed: 0,Brand,Model Name,Processor,Operating System,Storage,RAM,Screen Size,Touch_Screen,Price
0,HP,15s-fq5007TU,Core i3,Windows 11 Home,512 GB,8 GB,39.62 cm (15.6 Inch),No,"₹38,990"
1,HP,15s-fy5003TU,Core i3,Windows 11 Home,512 GB,8 GB,39.62 cm (15.6 Inch),No,"₹37,990"
2,Apple,2020 Macbook Air,M1,Mac OS Big Sur,256 GB,8 GB,33.78 cm (13.3 inch),No,"₹70,990"
3,Apple,2020 Macbook Air,M1,Mac OS Big Sur,256 GB,8 GB,33.78 cm (13.3 inch),No,"₹70,990"
4,Apple,2020 Macbook Air,M1,Mac OS Big Sur,256 GB,8 GB,33.78 cm (13.3 inch),No,"₹70,990"


In [19]:
df.isnull().sum()

Brand                0
Model Name           0
Processor            0
Operating System     0
Storage             12
RAM                  0
Screen Size          0
Touch_Screen         0
Price                0
dtype: int64

In [70]:
df['Storage']=df['Storage'].map({'512 GB':512,'256 GB':256,'1 TB':1000,'2 TB':2000,'4 TB':4000,'128 GB':128,'64 GB':64,'3 TB':3000,'6 TB':6000})
df.dropna(inplace=True)
print(df.shape)


(825, 9)


In [71]:
df['RAM']=df['RAM'].map({'8 GB':8, '16 GB':16, '4 GB':4, '12 GB':12, '32 GB':32, '64 GB':64, '18 GB':18}).astype(int)
print(df["RAM"])

0       8
1       8
2       8
3       8
4       8
       ..
832     8
833     8
834    16
835     8
836    16
Name: RAM, Length: 825, dtype: int64


In [72]:
df.rename(columns={'Screen Size':'Screen_Size' ,'Operating System':'Operating_System', 'Model Name':'Model_Name'},inplace=True)
df.drop(columns=['Model_Name'],inplace=True)
df.head()

Unnamed: 0,Brand,Processor,Operating_System,Storage,RAM,Screen_Size,Touch_Screen,Price
0,HP,Core i3,Windows 11 Home,512.0,8,39.62 cm (15.6 Inch),No,"₹38,990"
1,HP,Core i3,Windows 11 Home,512.0,8,39.62 cm (15.6 Inch),No,"₹37,990"
2,Apple,M1,Mac OS Big Sur,256.0,8,33.78 cm (13.3 inch),No,"₹70,990"
3,Apple,M1,Mac OS Big Sur,256.0,8,33.78 cm (13.3 inch),No,"₹70,990"
4,Apple,M1,Mac OS Big Sur,256.0,8,33.78 cm (13.3 inch),No,"₹70,990"


In [73]:
df['Price'] = df['Price'].str.replace('₹', '', regex=False) \
                         .str.replace(',', '', regex=False) \
                         .astype(int)
df.head()

Unnamed: 0,Brand,Processor,Operating_System,Storage,RAM,Screen_Size,Touch_Screen,Price
0,HP,Core i3,Windows 11 Home,512.0,8,39.62 cm (15.6 Inch),No,38990
1,HP,Core i3,Windows 11 Home,512.0,8,39.62 cm (15.6 Inch),No,37990
2,Apple,M1,Mac OS Big Sur,256.0,8,33.78 cm (13.3 inch),No,70990
3,Apple,M1,Mac OS Big Sur,256.0,8,33.78 cm (13.3 inch),No,70990
4,Apple,M1,Mac OS Big Sur,256.0,8,33.78 cm (13.3 inch),No,70990


In [88]:
le=LabelEncoder()
df['Touch_Screen']=le.fit_transform(df['Touch_Screen'])
df['Storage']=df['Storage'].astype(int)
# df.drop(columns='Screen_Size',inplace=True)


df.head()


Unnamed: 0,Brand,Processor,Operating_System,Storage,RAM,Touch_Screen,Price
0,HP,Core i3,Windows 11 Home,512,8,0,38990
1,HP,Core i3,Windows 11 Home,512,8,0,37990
2,Apple,M1,Mac OS Big Sur,256,8,0,70990
3,Apple,M1,Mac OS Big Sur,256,8,0,70990
4,Apple,M1,Mac OS Big Sur,256,8,0,70990


In [360]:
preprocess=ColumnTransformer(
    transformers=[
        ('num',StandardScaler(),['Storage','RAM']),
        ('cat',OneHotEncoder(handle_unknown='ignore'),['Brand','Processor','Operating_System','Touch_Screen'])
    ]
)
pipe=Pipeline([
    ('step',preprocess),
    ('pca',PCA(n_components=2)),
    ('model',RandomForestClassifier(n_estimators=100))
])



x=df[['Brand','Processor','Operating_System','Storage','RAM','Touch_Screen']]
y=df['Price']


x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)
pipe.fit(x_train,y_train)
pred=pipe.predict(x_test)
print("Accuracy : ",np.round(r2_score(y_test,pred),2)*100,'%')


Accuracy :  80.0 %


In [337]:
data = {
    'Brand':'HP',
    'Processor':'Core i3',
    'Operating_System':'Windows 11 Home',
    'Storage':4000,
    'RAM':32,
    'Touch_Screen':1
}

df_input = pd.DataFrame([data])

result = pipe.predict(df_input)
print('price : ',result[0])


price :  74990


In [338]:
import joblib
joblib.dump(pipe,'LaptopPricePredict.pkl')

['LaptopPricePredict.pkl']

In [339]:
pipe=joblib.load('LaptopPricePredict.pkl')
data = {
    'Brand':'HP',
    'Processor':'Core i3',
    'Operating_System':'Windows 11 Home',
    'Storage':4000,
    'RAM':32,
    'Touch_Screen':1
}

df_input = pd.DataFrame([data])

result = pipe.predict(df_input)
print('price : ',result[0])


price :  74990


In [362]:
df.to_csv('Cleaned_Laptop_data.csv',index=False)