## Import The Data

In [366]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error

import warnings
warnings.filterwarnings('ignore')

In [367]:
df = pd.read_csv("laptop_price.csv")

## Understand The Data

In [368]:
print(df.shape[0])
print(df.shape[1])

1303
13


In [369]:
df.isnull().sum()

laptop_ID           0
Company             0
Product             0
TypeName            0
Inches              0
ScreenResolution    0
Cpu                 0
Ram                 0
Memory              0
Gpu                 0
OpSys               0
Weight              0
Price_euros         0
dtype: int64

In [370]:
df.dtypes

laptop_ID             int64
Company              object
Product              object
TypeName             object
Inches              float64
ScreenResolution     object
Cpu                  object
Ram                  object
Memory               object
Gpu                  object
OpSys                object
Weight               object
Price_euros         float64
dtype: object

In [371]:
df.describe()

Unnamed: 0,laptop_ID,Inches,Price_euros
count,1303.0,1303.0,1303.0
mean,660.155794,15.017191,1123.686992
std,381.172104,1.426304,699.009043
min,1.0,10.1,174.0
25%,331.5,14.0,599.0
50%,659.0,15.6,977.0
75%,990.5,15.6,1487.88
max,1320.0,18.4,6099.0


In [372]:
for column in df.columns:
    print(f"{column} : {len(df[column].unique())}")

laptop_ID : 1303
Company : 19
Product : 618
TypeName : 6
Inches : 18
ScreenResolution : 40
Cpu : 118
Ram : 9
Memory : 39
Gpu : 110
OpSys : 9
Weight : 179
Price_euros : 791


In [373]:
df.head(5)

Unnamed: 0,laptop_ID,Company,Product,TypeName,Inches,ScreenResolution,Cpu,Ram,Memory,Gpu,OpSys,Weight,Price_euros
0,1,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 2.3GHz,8GB,128GB SSD,Intel Iris Plus Graphics 640,macOS,1.37kg,1339.69
1,2,Apple,Macbook Air,Ultrabook,13.3,1440x900,Intel Core i5 1.8GHz,8GB,128GB Flash Storage,Intel HD Graphics 6000,macOS,1.34kg,898.94
2,3,HP,250 G6,Notebook,15.6,Full HD 1920x1080,Intel Core i5 7200U 2.5GHz,8GB,256GB SSD,Intel HD Graphics 620,No OS,1.86kg,575.0
3,4,Apple,MacBook Pro,Ultrabook,15.4,IPS Panel Retina Display 2880x1800,Intel Core i7 2.7GHz,16GB,512GB SSD,AMD Radeon Pro 455,macOS,1.83kg,2537.45
4,5,Apple,MacBook Pro,Ultrabook,13.3,IPS Panel Retina Display 2560x1600,Intel Core i5 3.1GHz,8GB,256GB SSD,Intel Iris Plus Graphics 650,macOS,1.37kg,1803.6


## Clean And Prepare The Data

In [374]:
df.rename(columns = {'Price_euros' : 'Price'}, inplace= True)  

In [375]:
df = df.join(pd.get_dummies(df['Company']))
df = df.drop("Company", axis= 1)

In [376]:
df = df.join(pd.get_dummies(df['TypeName']))
df = df.drop("TypeName", axis= 1)

In [377]:
df["ScreenResolution"] = df.ScreenResolution.str.split(" ").apply(lambda x: x[-1])
df["ScreenWitdh"] = df.ScreenResolution.str.split("x").apply(lambda x: x[0])
df["ScreenHeight"] = df.ScreenResolution.str.split("x").apply(lambda x: x[1])

df = df.drop("ScreenResolution", axis= 1)

In [378]:
df["CpuBrand"] = df.Cpu.str.split(" ").apply(lambda x: x[0])
df["CpuFrequency"] = df.Cpu.str.split(" ").apply(lambda x: x[-1])

df["CpuFrequency"] = df["CpuFrequency"].str[:-3]

df= df.drop("Cpu", axis= 1)

In [379]:
df["Ram"] = df["Ram"].str[:-2]

In [380]:
df['MemoryAmount'] = df.Memory.str.split(" ").apply(lambda x: x[0])
df['MemoryType'] = df.Memory.str.split(" ").apply(lambda x: x[1])

In [381]:
def turnMemoryIntoMb(value):
    if "GB" in value:
        return float(value[:value.find("GB")]) * 1000
    elif "TB" in value:
        return float(value[:value.find("TB")]) * 1000000


In [382]:
df["MemoryAmount"] = df["MemoryAmount"].apply(turnMemoryIntoMb)

df.drop("Memory", axis = 1)

Unnamed: 0,laptop_ID,Product,Inches,Ram,Gpu,OpSys,Weight,Price,Acer,Apple,...,Netbook,Notebook,Ultrabook,Workstation,ScreenWitdh,ScreenHeight,CpuBrand,CpuFrequency,MemoryAmount,MemoryType
0,1,MacBook Pro,13.3,8,Intel Iris Plus Graphics 640,macOS,1.37kg,1339.69,False,True,...,False,False,True,False,2560,1600,Intel,2.3,128000.0,SSD
1,2,Macbook Air,13.3,8,Intel HD Graphics 6000,macOS,1.34kg,898.94,False,True,...,False,False,True,False,1440,900,Intel,1.8,128000.0,Flash
2,3,250 G6,15.6,8,Intel HD Graphics 620,No OS,1.86kg,575.00,False,False,...,False,True,False,False,1920,1080,Intel,2.5,256000.0,SSD
3,4,MacBook Pro,15.4,16,AMD Radeon Pro 455,macOS,1.83kg,2537.45,False,True,...,False,False,True,False,2880,1800,Intel,2.7,512000.0,SSD
4,5,MacBook Pro,13.3,8,Intel Iris Plus Graphics 650,macOS,1.37kg,1803.60,False,True,...,False,False,True,False,2560,1600,Intel,3.1,256000.0,SSD
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1298,1316,Yoga 500-14ISK,14.0,4,Intel HD Graphics 520,Windows 10,1.8kg,638.00,False,False,...,False,False,False,False,1920,1080,Intel,2.5,128000.0,SSD
1299,1317,Yoga 900-13ISK,13.3,16,Intel HD Graphics 520,Windows 10,1.3kg,1499.00,False,False,...,False,False,False,False,3200,1800,Intel,2.5,512000.0,SSD
1300,1318,IdeaPad 100S-14IBR,14.0,2,Intel HD Graphics,Windows 10,1.5kg,229.00,False,False,...,False,True,False,False,1366,768,Intel,1.6,64000.0,Flash
1301,1319,15-AC110nv (i7-6500U/6GB/1TB/Radeon,15.6,6,AMD Radeon R5 M330,Windows 10,2.19kg,764.00,False,False,...,False,True,False,False,1366,768,Intel,2.5,1000000.0,HDD


In [383]:
df["Weight"] = df["Weight"].str[:-2]
df["Weight"] = df["Weight"].astype("float")

In [384]:
df["GpuBrand"] = df["Gpu"].str.split(" ").apply(lambda x: x[0])
df.drop("Gpu", axis= 1)

Unnamed: 0,laptop_ID,Product,Inches,Ram,Memory,OpSys,Weight,Price,Acer,Apple,...,Notebook,Ultrabook,Workstation,ScreenWitdh,ScreenHeight,CpuBrand,CpuFrequency,MemoryAmount,MemoryType,GpuBrand
0,1,MacBook Pro,13.3,8,128GB SSD,macOS,1.37,1339.69,False,True,...,False,True,False,2560,1600,Intel,2.3,128000.0,SSD,Intel
1,2,Macbook Air,13.3,8,128GB Flash Storage,macOS,1.34,898.94,False,True,...,False,True,False,1440,900,Intel,1.8,128000.0,Flash,Intel
2,3,250 G6,15.6,8,256GB SSD,No OS,1.86,575.00,False,False,...,True,False,False,1920,1080,Intel,2.5,256000.0,SSD,Intel
3,4,MacBook Pro,15.4,16,512GB SSD,macOS,1.83,2537.45,False,True,...,False,True,False,2880,1800,Intel,2.7,512000.0,SSD,AMD
4,5,MacBook Pro,13.3,8,256GB SSD,macOS,1.37,1803.60,False,True,...,False,True,False,2560,1600,Intel,3.1,256000.0,SSD,Intel
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1298,1316,Yoga 500-14ISK,14.0,4,128GB SSD,Windows 10,1.80,638.00,False,False,...,False,False,False,1920,1080,Intel,2.5,128000.0,SSD,Intel
1299,1317,Yoga 900-13ISK,13.3,16,512GB SSD,Windows 10,1.30,1499.00,False,False,...,False,False,False,3200,1800,Intel,2.5,512000.0,SSD,Intel
1300,1318,IdeaPad 100S-14IBR,14.0,2,64GB Flash Storage,Windows 10,1.50,229.00,False,False,...,True,False,False,1366,768,Intel,1.6,64000.0,Flash,Intel
1301,1319,15-AC110nv (i7-6500U/6GB/1TB/Radeon,15.6,6,1TB HDD,Windows 10,2.19,764.00,False,False,...,True,False,False,1366,768,Intel,2.5,1000000.0,HDD,AMD


In [385]:
df = df.join(pd.get_dummies(df.OpSys))
df.drop("OpSys", axis = 1)

Unnamed: 0,laptop_ID,Product,Inches,Ram,Memory,Gpu,Weight,Price,Acer,Apple,...,GpuBrand,Android,Chrome OS,Linux,Mac OS X,No OS,Windows 10,Windows 10 S,Windows 7,macOS
0,1,MacBook Pro,13.3,8,128GB SSD,Intel Iris Plus Graphics 640,1.37,1339.69,False,True,...,Intel,False,False,False,False,False,False,False,False,True
1,2,Macbook Air,13.3,8,128GB Flash Storage,Intel HD Graphics 6000,1.34,898.94,False,True,...,Intel,False,False,False,False,False,False,False,False,True
2,3,250 G6,15.6,8,256GB SSD,Intel HD Graphics 620,1.86,575.00,False,False,...,Intel,False,False,False,False,True,False,False,False,False
3,4,MacBook Pro,15.4,16,512GB SSD,AMD Radeon Pro 455,1.83,2537.45,False,True,...,AMD,False,False,False,False,False,False,False,False,True
4,5,MacBook Pro,13.3,8,256GB SSD,Intel Iris Plus Graphics 650,1.37,1803.60,False,True,...,Intel,False,False,False,False,False,False,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1298,1316,Yoga 500-14ISK,14.0,4,128GB SSD,Intel HD Graphics 520,1.80,638.00,False,False,...,Intel,False,False,False,False,False,True,False,False,False
1299,1317,Yoga 900-13ISK,13.3,16,512GB SSD,Intel HD Graphics 520,1.30,1499.00,False,False,...,Intel,False,False,False,False,False,True,False,False,False
1300,1318,IdeaPad 100S-14IBR,14.0,2,64GB Flash Storage,Intel HD Graphics,1.50,229.00,False,False,...,Intel,False,False,False,False,False,True,False,False,False
1301,1319,15-AC110nv (i7-6500U/6GB/1TB/Radeon,15.6,6,1TB HDD,AMD Radeon R5 M330,2.19,764.00,False,False,...,AMD,False,False,False,False,False,True,False,False,False


In [386]:
cpuCategories = pd.get_dummies(df.CpuBrand)
cpuCategories.columns = [col + "_CPU" for col in cpuCategories.columns]

df = df.join(cpuCategories)
df.drop("CpuBrand", axis = 1)

Unnamed: 0,laptop_ID,Product,Inches,Ram,Memory,Gpu,OpSys,Weight,Price,Acer,...,Linux,Mac OS X,No OS,Windows 10,Windows 10 S,Windows 7,macOS,AMD_CPU,Intel_CPU,Samsung_CPU
0,1,MacBook Pro,13.3,8,128GB SSD,Intel Iris Plus Graphics 640,macOS,1.37,1339.69,False,...,False,False,False,False,False,False,True,False,True,False
1,2,Macbook Air,13.3,8,128GB Flash Storage,Intel HD Graphics 6000,macOS,1.34,898.94,False,...,False,False,False,False,False,False,True,False,True,False
2,3,250 G6,15.6,8,256GB SSD,Intel HD Graphics 620,No OS,1.86,575.00,False,...,False,False,True,False,False,False,False,False,True,False
3,4,MacBook Pro,15.4,16,512GB SSD,AMD Radeon Pro 455,macOS,1.83,2537.45,False,...,False,False,False,False,False,False,True,False,True,False
4,5,MacBook Pro,13.3,8,256GB SSD,Intel Iris Plus Graphics 650,macOS,1.37,1803.60,False,...,False,False,False,False,False,False,True,False,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1298,1316,Yoga 500-14ISK,14.0,4,128GB SSD,Intel HD Graphics 520,Windows 10,1.80,638.00,False,...,False,False,False,True,False,False,False,False,True,False
1299,1317,Yoga 900-13ISK,13.3,16,512GB SSD,Intel HD Graphics 520,Windows 10,1.30,1499.00,False,...,False,False,False,True,False,False,False,False,True,False
1300,1318,IdeaPad 100S-14IBR,14.0,2,64GB Flash Storage,Intel HD Graphics,Windows 10,1.50,229.00,False,...,False,False,False,True,False,False,False,False,True,False
1301,1319,15-AC110nv (i7-6500U/6GB/1TB/Radeon,15.6,6,1TB HDD,AMD Radeon R5 M330,Windows 10,2.19,764.00,False,...,False,False,False,True,False,False,False,False,True,False


In [387]:
gpuCategories = pd.get_dummies(df.GpuBrand)
gpuCategories.columns = [col + "_GPU" for col in gpuCategories.columns]

df = df.join(gpuCategories)
df.drop("GpuBrand", axis = 1)

Unnamed: 0,laptop_ID,Product,Inches,Ram,Memory,Gpu,OpSys,Weight,Price,Acer,...,Windows 10 S,Windows 7,macOS,AMD_CPU,Intel_CPU,Samsung_CPU,AMD_GPU,ARM_GPU,Intel_GPU,Nvidia_GPU
0,1,MacBook Pro,13.3,8,128GB SSD,Intel Iris Plus Graphics 640,macOS,1.37,1339.69,False,...,False,False,True,False,True,False,False,False,True,False
1,2,Macbook Air,13.3,8,128GB Flash Storage,Intel HD Graphics 6000,macOS,1.34,898.94,False,...,False,False,True,False,True,False,False,False,True,False
2,3,250 G6,15.6,8,256GB SSD,Intel HD Graphics 620,No OS,1.86,575.00,False,...,False,False,False,False,True,False,False,False,True,False
3,4,MacBook Pro,15.4,16,512GB SSD,AMD Radeon Pro 455,macOS,1.83,2537.45,False,...,False,False,True,False,True,False,True,False,False,False
4,5,MacBook Pro,13.3,8,256GB SSD,Intel Iris Plus Graphics 650,macOS,1.37,1803.60,False,...,False,False,True,False,True,False,False,False,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1298,1316,Yoga 500-14ISK,14.0,4,128GB SSD,Intel HD Graphics 520,Windows 10,1.80,638.00,False,...,False,False,False,False,True,False,False,False,True,False
1299,1317,Yoga 900-13ISK,13.3,16,512GB SSD,Intel HD Graphics 520,Windows 10,1.30,1499.00,False,...,False,False,False,False,True,False,False,False,True,False
1300,1318,IdeaPad 100S-14IBR,14.0,2,64GB Flash Storage,Intel HD Graphics,Windows 10,1.50,229.00,False,...,False,False,False,False,True,False,False,False,True,False
1301,1319,15-AC110nv (i7-6500U/6GB/1TB/Radeon,15.6,6,1TB HDD,AMD Radeon R5 M330,Windows 10,2.19,764.00,False,...,False,False,False,False,True,False,True,False,False,False


In [388]:
df["Ram"] = df["Ram"].astype("int")
df["CpuFrequency"] = df["CpuFrequency"].astype("float64")
df["ScreenWitdh"] = df["ScreenWitdh"].astype("int")
df["ScreenHeight"] = df["ScreenHeight"].astype("int")