In [424]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib as plt
import plotly.express as px
import seaborn as sns
from plotly.offline import iplot,plot
from plotly.subplots import make_subplots
from warnings import filterwarnings

In [425]:
# Disable filter warning
filterwarnings('ignore')

In [426]:
# Read dataframe
data = pd.read_csv('laptopi.csv')
data.sample(5)

Unnamed: 0.1,Unnamed: 0,Brand,Model Name,Processor,Operating System,Storage,RAM,Screen Size,Touch_Screen,Price
160,160,Lenovo,15ARH7,Ryzen 5 Hexa Core,Windows 11 Home,512 GB,16 GB,39.62 cm (15.6 Inch),No,"₹56,990"
820,820,HP,16-n0050AX,Ryzen 7 Octa Core,Windows 11 Home,512 GB,16 GB,40.89 cm (16.1 Inch),No,"₹88,149"
498,498,HP,15-fd0022TU,Core i5,Windows 11 Home,512 GB,8 GB,39.62 cm (15.6 Inch),No,"₹60,490"
591,591,Acer,EX215-54,Core i3,Windows 11 Home,256 GB,4 GB,39.62 cm (15.6 inch),No,"₹29,500"
569,569,GIGABYTE,AORUS 15 BKF,Core i7,Windows 11 Pro,1 TB,16 GB,39.62 cm (15.6 Inch),No,"₹1,48,990"


In [427]:
# Get data info
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 837 entries, 0 to 836
Data columns (total 10 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   Unnamed: 0        837 non-null    int64 
 1   Brand             837 non-null    object
 2   Model Name        837 non-null    object
 3   Processor         837 non-null    object
 4   Operating System  837 non-null    object
 5   Storage           825 non-null    object
 6   RAM               837 non-null    object
 7   Screen Size       837 non-null    object
 8   Touch_Screen      837 non-null    object
 9   Price             837 non-null    object
dtypes: int64(1), object(9)
memory usage: 65.5+ KB


In [428]:
# Check duplicated value
data.duplicated().sum()

0

In [429]:
# Check null values
data.isnull().sum()

Unnamed: 0           0
Brand                0
Model Name           0
Processor            0
Operating System     0
Storage             12
RAM                  0
Screen Size          0
Touch_Screen         0
Price                0
dtype: int64

In [430]:
# Remove row with null values
data.dropna(inplace=True)
data.shape

(825, 10)

In [431]:
# Remove unnamed column
data.drop(columns='Unnamed: 0',inplace=True)

In [432]:
# Pre Edit Processeur Column
data['Processor'].value_counts()

Processor
Core i5                   361
Core i3                   159
Ryzen 5 Hexa Core          81
Core i7                    68
Ryzen 7 Octa Core          67
Celeron Dual Core          15
Ryzen 3 Quad Core          13
Celeron Quad Core          10
Ryzen 5 Quad Core          10
Ryzen 3 Dual Core           7
Core i9                     4
Pentium Silver              4
M2                          3
Athlon Dual Core            3
M1                          3
Ryzen 9 Octa Core           2
Ryzen 5                     2
M1 Max                      1
Ryzen 5 Dual Core           1
Ryzen 7 Quad Core           1
M1 Pro                      1
M3 Pro                      1
M2 Max                      1
MediaTek MT8788             1
Ryzen Z1 Octa Core          1
MediaTek Kompanio 520       1
MediaTek Kompanio 500       1
Ryzen Z1 HexaCore           1
MediaTek Kompanio 1200      1
Ryzen 9 16 Core             1
Name: count, dtype: int64

In [433]:
# Edit Processor Column
data["Processor"] = data['Processor'].apply(lambda x:" ".join(x.split()[0:2]))

In [434]:
# Post Edit Processor Column
data['Processor'].value_counts()

Processor
Core i5              361
Core i3              159
Ryzen 5               94
Core i7               68
Ryzen 7               68
Ryzen 3               20
Celeron Dual          15
Celeron Quad          10
Pentium Silver         4
Core i9                4
Ryzen 9                3
Athlon Dual            3
MediaTek Kompanio      3
M1                     3
M2                     3
Ryzen Z1               2
MediaTek MT8788        1
M1 Max                 1
M2 Max                 1
M3 Pro                 1
M1 Pro                 1
Name: count, dtype: int64

In [435]:
# Adding Processor Manufacturer Column
data['Processor Manufcaturer']=data['Processor']

In [436]:
# Edit  Processor Manufcaturer Column
def processor_manufactuer(data):
    if 'core' in data.lower():
        return 'Intel'
    elif 'ryzen' in data.lower():
        return 'Amd'
    elif any (m in data for m in ["M1", "M2", "M3"]):
        return 'Mac'
    else:
        return 'Others'

data['Processor Manufcaturer']=data['Processor Manufcaturer'].apply(processor_manufactuer)

In [437]:
# Post Edit Processor Manufacturer Column
data['Processor Manufcaturer'].value_counts()

Processor Manufcaturer
Intel     592
Amd       187
Others     36
Mac        10
Name: count, dtype: int64

In [438]:
# Pre Edit Operation System Column
data['Operating System'].value_counts()

Operating System
Windows 11 Home    747
Windows 10 Home     27
Chrome              13
Windows 10          10
DOS                  7
Mac OS Monterey      5
Windows 11 Pro       5
Mac OS Big Sur       3
Windows 10 Pro       3
Prime OS             1
Ubuntu               1
macOS Ventura        1
macOS Sonoma         1
Mac OS Mojave        1
Name: count, dtype: int64

In [439]:
# Edit Operation System Column
def operationsystem(data):
    if 'windows' in data.lower():
        return 'Windows OS'
    elif 'mac' in data.lower():
        return 'Mac OS'
    elif 'chrome' in  data.lower():
        return 'Chrome OS'
    else:
        return 'Others'

data['Operating System']=data['Operating System'].apply(operationsystem)

In [440]:
# Post Edit Operation System Column
data['Operating System'].value_counts()

Operating System
Windows OS    792
Chrome OS      13
Mac OS         11
Others          9
Name: count, dtype: int64

In [441]:
# Pre Edit Storage Column
data['Storage'].value_counts()

Storage
512 GB    627
1 TB      101
256 GB     49
2 TB       20
128 GB     12
4 TB       10
64 GB       4
3 TB        1
6 TB        1
Name: count, dtype: int64

In [442]:
# Edit Storage Column
data['Storage']=data['Storage'].str.replace('GB','')
data['Storage']=data['Storage'].str.replace('TB','000')
data['Storage']=data['Storage'].str.replace(' ','')
data['Storage']=data['Storage'].astype(int)

In [443]:
# Post Edit Storage Column
data['Storage'].value_counts()

Storage
512     627
1000    101
256      49
2000     20
128      12
4000     10
64        4
3000      1
6000      1
Name: count, dtype: int64

In [444]:
# Pre Edit Ram Column
data['RAM'].value_counts()

RAM
8 GB     414
16 GB    373
4 GB      24
32 GB      9
12 GB      2
64 GB      2
18 GB      1
Name: count, dtype: int64

In [445]:
# Edit Ram Column
data['RAM']=data['RAM'].str.replace('GB','')
data['RAM']=data['RAM'].astype(int)

In [446]:
# Post Edit Ram Column
data['RAM'].value_counts()

RAM
8     414
16    373
4      24
32      9
12      2
64      2
18      1
Name: count, dtype: int64

In [447]:
# Pre Edit Screen Size Column
data['Screen Size'].value_counts()

Screen Size
39.62 cm (15.6 Inch)     363
39.62 cm (15.6 inch)     184
35.56 cm (14 inch)       124
35.56 cm (14 Inch)        49
40.64 cm (16 Inch)        19
33.78 cm (13.3 inch)      10
40.64 cm (16 inch)         8
40.89 cm (16.1 Inch)       7
43.94 cm (17.3 Inch)       6
33.78 cm (13.3 Inch)       6
96.52 cm (38 cm)           6
38.1 cm (15 inch)          4
35.81 cm (14.1 inch)       3
40.89 cm (16.1 inch)       3
39.01 cm (15.36 inch)      3
34.29 cm (13.5 inch)       3
35.81 cm (14.1 Inch)       2
33.02 cm (13 inch)         2
100.63 cm (39.62 cm)       2
17.78 cm (7 Inch)          2
43.94 cm (17.3 inch)       2
34.29 cm (13.5 Inch)       2
38.86 cm (15.3 Inch)       1
31.5 cm (12.4 Inch)        1
36.07 cm (14.2 inch)       1
38.0 cm (14.96 cm)         1
90.32 cm (35.56 cm)        1
30.48 cm (12 inch)         1
29.46 cm (11.6 Inch)       1
41.15 cm (16.2 inch)       1
34.04 cm (13.4 inch)       1
35.0 cm (13.78 inch)       1
34.04 cm (13.4 Inch)       1
26.67 cm (10.5 inch)       1
38

In [448]:
# Edit Screen Size Column
data['Screen Size'] = data['Screen Size'].apply(lambda x:''.join(x.split(' ')[0]))
data['Screen Size']= data['Screen Size'].astype(float)

In [449]:
# Post Edit Screen Size Column
data['Screen Size'].value_counts()

Screen Size
39.62     547
35.56     173
40.64      27
33.78      16
40.89      10
43.94       8
96.52       6
35.81       5
34.29       5
38.10       4
39.01       3
17.78       2
33.02       2
34.04       2
29.46       2
38.00       2
100.63      2
26.67       1
34.54       1
35.00       1
41.15       1
90.32       1
30.48       1
38.86       1
36.07       1
31.50       1
Name: count, dtype: int64

In [450]:
# Pre Edit Price Column
data['Price'].value_counts()

Price
₹53,990      65
₹37,990      42
₹54,990      41
₹49,990      40
₹38,990      38
             ..
₹52,890       1
₹1,47,743     1
₹64,600       1
₹1,99,990     1
₹70,500       1
Name: count, Length: 269, dtype: int64

In [451]:
# Edit Price Column
data['Price']=data['Price'].str.replace('₹','')
data['Price']=data['Price'].str.replace(',','')
data['Price']=data['Price'].astype(int)
data['Price']=data['Price']*0.0120648 #Conversion From Indian Rupees to US Dollar
data['Price']=round(data['Price'],2) #Round values

In [452]:
# Post Edit Price Column
data['Price'].value_counts()

Price
651.38     65
458.34     42
663.44     41
603.12     40
470.41     38
           ..
638.11      1
1782.49     1
779.39      1
2412.84     1
850.57      1
Name: count, Length: 269, dtype: int64

In [453]:
data.head()

Unnamed: 0,Brand,Model Name,Processor,Operating System,Storage,RAM,Screen Size,Touch_Screen,Price,Processor Manufcaturer
0,HP,15s-fq5007TU,Core i3,Windows OS,512,8,39.62,No,470.41,Intel
1,HP,15s-fy5003TU,Core i3,Windows OS,512,8,39.62,No,458.34,Intel
2,Apple,2020 Macbook Air,M1,Mac OS,256,8,33.78,No,856.48,Mac
3,Apple,2020 Macbook Air,M1,Mac OS,256,8,33.78,No,856.48,Mac
4,Apple,2020 Macbook Air,M1,Mac OS,256,8,33.78,No,856.48,Mac
