### 1. Importing Libraries

In [1]:
import pandas as pd
import numpy as np

In [2]:
## Reading the scraped laptop data with appropriate column names
col_names = ['ProductName' , 'Processor' , 'RAM' , 'OS' , 'Disk' , 'Dim', 'Category']
data  = pd.read_csv("csvfiles/catalogue_laptop.csv",names = col_names,index_col = False)

In [3]:
## SAMPLE OF SCRAPED DATA
data.head()

Unnamed: 0,ProductName,Processor,RAM,OS,Disk,Dim,Category
0,Lenovo Ideapad 130 Core i3 7th Gen - (4 GB/1 T...,Intel Core i3 Processor (7th Gen),4 GB DDR4 RAM,64 bit Windows 10 Operating System,1 TB HDD,39.62 cm (15.6 inch) Display,Laptop
1,Lenovo Ideapad 130 Core i3 7th Gen - (4 GB/1 T...,Intel Core i3 Processor (7th Gen),4 GB DDR4 RAM,64 bit Windows 10 Operating System,1 TB HDD,39.62 cm (15.6 inch) Display,Laptop
2,HP 14q Core i3 7th Gen - (8 GB/256 GB SSD/Wind...,Intel Core i3 Processor (7th Gen),8 GB DDR4 RAM,64 bit Windows 10 Operating System,256 GB SSD,35.56 cm (14 inch) Display,Laptop
3,Dell Vostro 3000 Core i3 8th Gen - (4 GB/1 TB ...,Intel Core i3 Processor (8th Gen),4 GB DDR4 RAM,Linux/Ubuntu Operating System,1 TB HDD,35.56 cm (14 inch) Display,Laptop
4,HP 14q Core i5 8th Gen - (8 GB/1 TB HDD/Window...,Intel Core i5 Processor (8th Gen),8 GB DDR4 RAM,64 bit Windows 10 Operating System,1 TB HDD,35.56 cm (14 inch) Display,Laptop


### 2. Identifying Brand and Model from data

In [4]:
## Scraped Product Name containing the Brand, Model and features all together
prodName = data.ProductName
prodName.values

array(['Lenovo Ideapad 130 Core i3 7th Gen - (4 GB/1 TB HDD/Windows 10 Home) 130-15IKB Laptop',
       'Lenovo Ideapad 130 Core i3 7th Gen - (4 GB/1 TB HDD/Windows 10 Home) 130-15IKB Laptop',
       'HP 14q Core i3 7th Gen - (8 GB/256 GB SSD/Windows 10 Home) 14q-cs0023TU Thin and Light Laptop',
       'Dell Vostro 3000 Core i3 8th Gen - (4 GB/1 TB HDD/Linux) 3480 Laptop',
       'HP 14q Core i5 8th Gen - (8 GB/1 TB HDD/Windows 10 Home) 14q-cs0017tu Thin and Light Laptop',
       'Lenovo Ideapad 130 Core i5 8th Gen - (8 GB/1 TB HDD/Windows 10 Home/2 GB Graphics) 130-15IKB Laptop',
       'Lenovo Ideapad 330 Core i3 7th Gen - (8 GB/1 TB HDD/Windows 10 Home) 330-15IKB Laptop',
       'Asus VivoBook Gaming Core i5 9th Gen - (8 GB + 32 GB Optane/512 GB SSD/Windows 10 Home/4 GB Graphics/N...',
       'MSI Modern 14 Core i5 10th Gen - (8 GB/512 GB SSD/Windows 10 Home) A10M-652IN Thin and Light Laptop',
       'HP DA Core i3 7th Gen - (4 GB/1 TB HDD/Windows 10 Home) 15-DA0410TU Laptop',
      

In [5]:
## Splitting the brand from the Product Name (it is the first string in the Product Name)
brand = prodName.str.split(' ',1)
brand

0      [Lenovo, Ideapad 130 Core i3 7th Gen - (4 GB/1...
1      [Lenovo, Ideapad 130 Core i3 7th Gen - (4 GB/1...
2      [HP, 14q Core i3 7th Gen - (8 GB/256 GB SSD/Wi...
3      [Dell, Vostro 3000 Core i3 8th Gen - (4 GB/1 T...
4      [HP, 14q Core i5 8th Gen - (8 GB/1 TB HDD/Wind...
                             ...                        
411    [Lenovo, Ideapad 500 Core i5 6th Gen - (4 GB/1...
412    [Lenovo, Ideapad 330 Core i7 8th Gen - (8 GB/1...
413    [Lenovo, Ideapad 320 APU Quad Core A6 - (4 GB/...
414    [Lenovo, G50-30 Pentium Quad Core 4th Gen - (4...
415    [Lenovo, Ideapad 330 Core i3 7th Gen - (4 GB/1...
Name: ProductName, Length: 416, dtype: object

In [6]:
## Creating a new column with only Brand names
brands = brand.loc[:].map(lambda x: x[0])
brands

0      Lenovo
1      Lenovo
2          HP
3        Dell
4          HP
        ...  
411    Lenovo
412    Lenovo
413    Lenovo
414    Lenovo
415    Lenovo
Name: ProductName, Length: 416, dtype: object

In [7]:
## Extracting the model from the Product Name (generalizing that )
model = brand.loc[:].map(lambda x: x[1])
models = model.str.split('Core')
models

0      [Ideapad 130 ,  i3 7th Gen - (4 GB/1 TB HDD/Wi...
1      [Ideapad 130 ,  i3 7th Gen - (4 GB/1 TB HDD/Wi...
2      [14q ,  i3 7th Gen - (8 GB/256 GB SSD/Windows ...
3      [Vostro 3000 ,  i3 8th Gen - (4 GB/1 TB HDD/Li...
4      [14q ,  i5 8th Gen - (8 GB/1 TB HDD/Windows 10...
                             ...                        
411    [Ideapad 500 ,  i5 6th Gen - (4 GB/1 TB HDD/Wi...
412    [Ideapad 330 ,  i7 8th Gen - (8 GB/1 TB HDD/Wi...
413    [Ideapad 320 APU Quad ,  A6 - (4 GB/1 TB HDD/W...
414    [G50-30 Pentium Quad ,  4th Gen - (4 GB/500 GB...
415    [Ideapad 330 ,  i3 7th Gen - (4 GB/1 TB HDD/DO...
Name: ProductName, Length: 416, dtype: object

In [8]:
## Creating a new column with only Model names
models = models.loc[:].map(lambda x: x[0])
models

0               Ideapad 130 
1               Ideapad 130 
2                       14q 
3               Vostro 3000 
4                       14q 
               ...          
411             Ideapad 500 
412             Ideapad 330 
413    Ideapad 320 APU Quad 
414     G50-30 Pentium Quad 
415             Ideapad 330 
Name: ProductName, Length: 416, dtype: object

In [9]:
## Inserting the Brand and Model as separate columns and deleting the original Product Name
data = data.drop(columns = 'ProductName')
data.insert(0, "Brand", brands, True) 
data.insert(1, "Model", models, True)
data["Temp"] = False ## inserting this column for flag value required in the next step

data.head()

Unnamed: 0,Brand,Model,Processor,RAM,OS,Disk,Dim,Category,Temp
0,Lenovo,Ideapad 130,Intel Core i3 Processor (7th Gen),4 GB DDR4 RAM,64 bit Windows 10 Operating System,1 TB HDD,39.62 cm (15.6 inch) Display,Laptop,False
1,Lenovo,Ideapad 130,Intel Core i3 Processor (7th Gen),4 GB DDR4 RAM,64 bit Windows 10 Operating System,1 TB HDD,39.62 cm (15.6 inch) Display,Laptop,False
2,HP,14q,Intel Core i3 Processor (7th Gen),8 GB DDR4 RAM,64 bit Windows 10 Operating System,256 GB SSD,35.56 cm (14 inch) Display,Laptop,False
3,Dell,Vostro 3000,Intel Core i3 Processor (8th Gen),4 GB DDR4 RAM,Linux/Ubuntu Operating System,1 TB HDD,35.56 cm (14 inch) Display,Laptop,False
4,HP,14q,Intel Core i5 Processor (8th Gen),8 GB DDR4 RAM,64 bit Windows 10 Operating System,1 TB HDD,35.56 cm (14 inch) Display,Laptop,False


In [10]:
## Sample of Acer Laptops
data.loc[data.Brand == 'Acer'].head()

Unnamed: 0,Brand,Model,Processor,RAM,OS,Disk,Dim,Category,Temp
15,Acer,Aspire 5,Intel Core i3 Processor (8th Gen),4 GB DDR4 RAM,64 bit Windows 10 Operating System,1 TB HDD,39.62 cm (15.6 inch) Display,Laptop,False
16,Acer,Nitro 5,Intel Core i7 Processor (9th Gen),8 GB DDR4 RAM,64 bit Windows 10 Operating System,1 TB HDD|256 GB SSD,39.62 cm (15.6 inch) Display,Laptop,False
38,Acer,Nitro 5,Intel Core i7 Processor (9th Gen),8 GB DDR4 RAM,64 bit Windows 10 Operating System,1 TB HDD|256 GB SSD,39.62 cm (15.6 inch) Display,Laptop,False
40,Acer,Aspire 5,Intel Core i3 Processor (8th Gen),4 GB DDR4 RAM,64 bit Windows 10 Operating System,1 TB HDD,39.62 cm (15.6 inch) Display,Laptop,False
49,Acer,Nitro 5 Ryzen 5 Quad,AMD Ryzen 5 Quad Core Processor,8 GB DDR4 RAM,64 bit Windows 10 Operating System,1 TB HDD|256 GB SSD,39.62 cm (15.6 inch) Display,Laptop,False


### 3. Standarizing and Cleaning the Model Names

In [11]:
## Reading the file containing the Model Names
names = pd.read_csv("csvfiles/laptop_models.csv")

In [12]:
## Sample of the Models present
names.head()

Unnamed: 0,Brand,Model
0,Apple,MacBook Air
1,Apple,MacBook Pro
2,HP,EliteBook
3,HP,Zbook
4,HP,Notebook


In [13]:
## Replacing the Model names according to the above list

## converting to lowercase to make comparisons easy
data.Model = data.Model.str.lower()
#print(data.Model)
for i in range(len(names)):
    b = names.Brand[i]
    m = names.Model[i]  
    #print(m)
    data.Model[data.Brand.str.contains(str(b)) & data.Model.str.contains(str(m.lower()))] = str(m)
    
    ## maintaining a flag to see all the rows that have been modified
    data.Temp[data.Brand.str.contains(str(b)) & data.Model.str.contains(str(m))] = True
   

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.Model[data.Brand.str.contains(str(b)) & data.Model.str.contains(str(m.lower()))] = str(m)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.Temp[data.Brand.str.contains(str(b)) & data.Model.str.contains(str(m))] = True


In [14]:
## Sample of replaced data. We can now see all the Model categories are uniform
data.loc[data.Brand == 'Dell']

Unnamed: 0,Brand,Model,Processor,RAM,OS,Disk,Dim,Category,Temp
3,Dell,Vostro,Intel Core i3 Processor (8th Gen),4 GB DDR4 RAM,Linux/Ubuntu Operating System,1 TB HDD,35.56 cm (14 inch) Display,Laptop,True
19,Dell,14 3000,Intel Core i3 Processor (7th Gen),4 GB DDR4 RAM,Linux/Ubuntu Operating System,1 TB HDD,35.56 cm (14 inch) Display,Laptop,False
26,Dell,Vostro,Intel Core i3 Processor (8th Gen),4 GB DDR4 RAM,Linux/Ubuntu Operating System,1 TB HDD,35.56 cm (14 inch) Display,Laptop,True
44,Dell,14 3000,Intel Core i3 Processor (7th Gen),4 GB DDR4 RAM,Linux/Ubuntu Operating System,1 TB HDD,35.56 cm (14 inch) Display,Laptop,False
79,Dell,G,Intel Core i5 Processor (8th Gen),8 GB DDR4 RAM,64 bit Windows 10 Operating System,1 TB HDD|128 GB SSD,39.62 cm (15.6 inch) Display,Laptop,True
100,Dell,Inspiron,Intel Core i7 Processor (9th Gen),16 GB DDR4 RAM,64 bit Windows 10 Operating System,512 GB SSD,39.62 cm (15.6 inch) Display,Laptop,True
135,Dell,Inspiron,Intel Core i5 Processor (8th Gen),8 GB DDR4 RAM,64 bit Windows 10 Operating System,1 TB HDD|512 GB SSD,39.62 cm (15.6 inch) Display,Laptop,True
154,Dell,Inspiron,Intel Core i7 Processor (9th Gen),8 GB DDR4 RAM,64 bit Windows 10 Operating System,512 GB SSD,39.62 cm (15.6 inch) Display,Laptop,True
161,Dell,XPS,Intel Core i9 Processor (9th Gen),32 GB DDR4 RAM,64 bit Windows 10 Operating System,1 TB SSD,39.62 cm (15.6 inch) Display,Laptop,True
165,Dell,Inspiron,AMD APU Dual Core A6 Processor (7th Gen),4 GB DDR4 RAM,64 bit Windows 10 Operating System,1 TB HDD,39.62 cm (15.6 inch) Display,Laptop,True


In [15]:
## Sample of Lenovo Data
data.loc[data.Brand == 'Lenovo']

Unnamed: 0,Brand,Model,Processor,RAM,OS,Disk,Dim,Category,Temp
0,Lenovo,Ideapad,Intel Core i3 Processor (7th Gen),4 GB DDR4 RAM,64 bit Windows 10 Operating System,1 TB HDD,39.62 cm (15.6 inch) Display,Laptop,True
1,Lenovo,Ideapad,Intel Core i3 Processor (7th Gen),4 GB DDR4 RAM,64 bit Windows 10 Operating System,1 TB HDD,39.62 cm (15.6 inch) Display,Laptop,True
5,Lenovo,Ideapad,Intel Core i5 Processor (8th Gen),8 GB DDR4 RAM,64 bit Windows 10 Operating System,1 TB HDD,39.62 cm (15.6 inch) Display,Laptop,True
6,Lenovo,Ideapad,Intel Core i3 Processor (7th Gen),8 GB DDR4 RAM,64 bit Windows 10 Operating System,1 TB HDD,39.62 cm (15.6 inch) Display,Laptop,True
10,Lenovo,Ideapad,Intel Celeron Dual Core Processor,4 GB DDR4 RAM,64 bit Windows 10 Operating System,1 TB HDD,39.62 cm (15.6 inch) Display,Laptop,True
...,...,...,...,...,...,...,...,...,...
411,Lenovo,Ideapad,Intel Core i5 Processor (6th Gen),4 GB DDR3 RAM,64 bit Windows 10 Operating System,1 TB HDD,35.56 cm (14 inch) Display,Laptop,True
412,Lenovo,Ideapad,Intel Core i7 Processor (8th Gen),8 GB DDR4 RAM,64 bit Windows 10 Operating System,1 TB HDD,39.62 cm (15.6 inch) Display,Laptop,True
413,Lenovo,Ideapad,AMD APU Quad Core A6 Processor,4 GB DDR3 RAM,64 bit Windows 10 Operating System,1 TB HDD,39.62 cm (15.6 inch) Display,Laptop,True
414,Lenovo,g50-30 pentium quad,Intel Pentium Quad Core Processor (4th Gen),4 GB DDR3 RAM,DOS Operating System,500 GB HDD,39.62 cm (15.6 inch) Display,Laptop,False


In [16]:
## Sample of Microsoft data (need to check Surface Pro)
data.loc[data.Brand == 'Microsoft']

Unnamed: 0,Brand,Model,Processor,RAM,OS,Disk,Dim,Category,Temp
123,Microsoft,Surface,Intel Core i7 Processor (7th Gen),8 GB DDR3 RAM,64 bit Windows 10 Operating System,256 GB SSD,34.29 cm (13.5 inch) Touchscreen Display,Laptop,True
139,Microsoft,SurfacePro,Intel Core i5 Processor (8th Gen),8 GB DDR3 RAM,64 bit Windows 10 Operating System,256 GB SSD,31.24 cm (12.3 inch) Touchscreen Display,Laptop,True
178,Microsoft,Surface,Intel Core i7 Processor (7th Gen),16 GB DDR3 RAM,64 bit Windows 10 Operating System,512 GB SSD,34.29 cm (13.5 inch) Touchscreen Display,Laptop,True
185,Microsoft,SurfacePro,Intel Core i7 Processor (8th Gen),8 GB DDR3 RAM,64 bit Windows 10 Operating System,256 GB SSD,31.24 cm (12.3 inch) Touchscreen Display,Laptop,True
192,Microsoft,SurfacePro,Intel Core i5 Processor (10th Gen),8 GB LPDDR4X RAM,64 bit Windows 10 Operating System,256 GB SSD,31.24 cm (12.3 inch) Touchscreen Display,Laptop,True
250,Microsoft,SurfacePro,Intel Core i3 Processor (10th Gen),4 GB LPDDR4X RAM,64 bit Windows 10 Operating System,128 GB SSD,31.24 cm (12.3 inch) Touchscreen Display,Laptop,True
266,Microsoft,SurfacePro,Intel Core i5 Processor (7th Gen),8 GB DDR3 RAM,64 bit Windows 10 Operating System,128 GB SSD,31.24 cm (12.3 inch) Touchscreen Display,Laptop,True
384,Microsoft,SurfacePro,Intel Core i5 Processor (8th Gen),8 GB DDR3 RAM,64 bit Windows 10 Operating System,128 GB SSD,31.24 cm (12.3 inch) Touchscreen Display,Laptop,True


In [17]:
data2 = data

data2

Unnamed: 0,Brand,Model,Processor,RAM,OS,Disk,Dim,Category,Temp
0,Lenovo,Ideapad,Intel Core i3 Processor (7th Gen),4 GB DDR4 RAM,64 bit Windows 10 Operating System,1 TB HDD,39.62 cm (15.6 inch) Display,Laptop,True
1,Lenovo,Ideapad,Intel Core i3 Processor (7th Gen),4 GB DDR4 RAM,64 bit Windows 10 Operating System,1 TB HDD,39.62 cm (15.6 inch) Display,Laptop,True
2,HP,14q,Intel Core i3 Processor (7th Gen),8 GB DDR4 RAM,64 bit Windows 10 Operating System,256 GB SSD,35.56 cm (14 inch) Display,Laptop,False
3,Dell,Vostro,Intel Core i3 Processor (8th Gen),4 GB DDR4 RAM,Linux/Ubuntu Operating System,1 TB HDD,35.56 cm (14 inch) Display,Laptop,True
4,HP,14q,Intel Core i5 Processor (8th Gen),8 GB DDR4 RAM,64 bit Windows 10 Operating System,1 TB HDD,35.56 cm (14 inch) Display,Laptop,False
...,...,...,...,...,...,...,...,...,...
411,Lenovo,Ideapad,Intel Core i5 Processor (6th Gen),4 GB DDR3 RAM,64 bit Windows 10 Operating System,1 TB HDD,35.56 cm (14 inch) Display,Laptop,True
412,Lenovo,Ideapad,Intel Core i7 Processor (8th Gen),8 GB DDR4 RAM,64 bit Windows 10 Operating System,1 TB HDD,39.62 cm (15.6 inch) Display,Laptop,True
413,Lenovo,Ideapad,AMD APU Quad Core A6 Processor,4 GB DDR3 RAM,64 bit Windows 10 Operating System,1 TB HDD,39.62 cm (15.6 inch) Display,Laptop,True
414,Lenovo,g50-30 pentium quad,Intel Pentium Quad Core Processor (4th Gen),4 GB DDR3 RAM,DOS Operating System,500 GB HDD,39.62 cm (15.6 inch) Display,Laptop,False


In [18]:
## Random Assign of models in the remaining list
ran_name = []

b = data2.loc[data["Temp"] == False].Brand
for i in b:
    ran_name.append(names.Model[names.Brand == i].sample(n=1).to_string(index = False)) 

idx=0
for i in data2.loc[data2["Temp"] == False].index:

    data2.Model[i] = ran_name[idx]
    data2.at[i,'Model']= ran_name[idx]
   
    idx = idx+1



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data2.Model[i] = ran_name[idx]


In [19]:
## these are the columns with unknown model names
data[data.Temp == False]

Unnamed: 0,Brand,Model,Processor,RAM,OS,Disk,Dim,Category,Temp
2,HP,Pavilion,Intel Core i3 Processor (7th Gen),8 GB DDR4 RAM,64 bit Windows 10 Operating System,256 GB SSD,35.56 cm (14 inch) Display,Laptop,False
4,HP,ProBook,Intel Core i5 Processor (8th Gen),8 GB DDR4 RAM,64 bit Windows 10 Operating System,1 TB HDD,35.56 cm (14 inch) Display,Laptop,False
9,HP,Pavilion,Intel Core i3 Processor (7th Gen),4 GB DDR4 RAM,64 bit Windows 10 Operating System,1 TB HDD,39.62 cm (15.6 inch) Display,Laptop,False
12,Lenovo,Ideapad,Intel Core i7 Processor (8th Gen),8 GB DDR4 RAM,64 bit Windows 10 Operating System,1 TB HDD,39.62 cm (15.6 inch) Display,Laptop,False
17,HP,Pavilion,AMD Ryzen 3 Dual Core Processor,4 GB DDR4 RAM,64 bit Windows 10 Operating System,1 TB HDD,39.62 cm (15.6 inch) Display,Laptop,False
...,...,...,...,...,...,...,...,...,...
402,HP,Notebook,Intel Core i3 Processor (7th Gen),4 GB DDR4 RAM,64 bit Windows 10 Operating System,1 TB HDD,39.62 cm (15.6 inch) Display,Laptop,False
404,Acer,Nitro,Intel Core i5 Processor (8th Gen),8 GB DDR4 RAM,64 bit Windows 10 Operating System,1 TB HDD,39.62 cm (15.6 inch) Display,Laptop,False
405,HP,Zbook,Intel Core i7 Processor (8th Gen),16 GB DDR4 RAM,64 bit Windows 10 Operating System,1 TB HDD|256 GB SSD,39.62 cm (15.6 inch) Display,Laptop,False
410,Lenovo,ThinkPad P Series,Intel Core i5 Processor (4th Gen),4 GB DDR3 RAM,DOS Operating System,1 TB HDD,39.62 cm (15.6 inch) Display,Laptop,False


In [20]:
## removing the flag column, --> Complete Data
data = data.drop(columns = 'Temp')
data

Unnamed: 0,Brand,Model,Processor,RAM,OS,Disk,Dim,Category
0,Lenovo,Ideapad,Intel Core i3 Processor (7th Gen),4 GB DDR4 RAM,64 bit Windows 10 Operating System,1 TB HDD,39.62 cm (15.6 inch) Display,Laptop
1,Lenovo,Ideapad,Intel Core i3 Processor (7th Gen),4 GB DDR4 RAM,64 bit Windows 10 Operating System,1 TB HDD,39.62 cm (15.6 inch) Display,Laptop
2,HP,Pavilion,Intel Core i3 Processor (7th Gen),8 GB DDR4 RAM,64 bit Windows 10 Operating System,256 GB SSD,35.56 cm (14 inch) Display,Laptop
3,Dell,Vostro,Intel Core i3 Processor (8th Gen),4 GB DDR4 RAM,Linux/Ubuntu Operating System,1 TB HDD,35.56 cm (14 inch) Display,Laptop
4,HP,ProBook,Intel Core i5 Processor (8th Gen),8 GB DDR4 RAM,64 bit Windows 10 Operating System,1 TB HDD,35.56 cm (14 inch) Display,Laptop
...,...,...,...,...,...,...,...,...
411,Lenovo,Ideapad,Intel Core i5 Processor (6th Gen),4 GB DDR3 RAM,64 bit Windows 10 Operating System,1 TB HDD,35.56 cm (14 inch) Display,Laptop
412,Lenovo,Ideapad,Intel Core i7 Processor (8th Gen),8 GB DDR4 RAM,64 bit Windows 10 Operating System,1 TB HDD,39.62 cm (15.6 inch) Display,Laptop
413,Lenovo,Ideapad,AMD APU Quad Core A6 Processor,4 GB DDR3 RAM,64 bit Windows 10 Operating System,1 TB HDD,39.62 cm (15.6 inch) Display,Laptop
414,Lenovo,Yoga,Intel Pentium Quad Core Processor (4th Gen),4 GB DDR3 RAM,DOS Operating System,500 GB HDD,39.62 cm (15.6 inch) Display,Laptop


In [21]:
# trimming some names to make it look neat
data['RAM'] = data['RAM'].map(lambda x: x.rstrip('RAM'))
data['OS'] = data['OS'].map(lambda x: x.rstrip('Operating System'))
data['Dim'] = data['Dim'].map(lambda x: x.rstrip('Display'))

data.head()

Unnamed: 0,Brand,Model,Processor,RAM,OS,Disk,Dim,Category
0,Lenovo,Ideapad,Intel Core i3 Processor (7th Gen),4 GB DDR4,64 bit Windows 10,1 TB HDD,39.62 cm (15.6 inch),Laptop
1,Lenovo,Ideapad,Intel Core i3 Processor (7th Gen),4 GB DDR4,64 bit Windows 10,1 TB HDD,39.62 cm (15.6 inch),Laptop
2,HP,Pavilion,Intel Core i3 Processor (7th Gen),8 GB DDR4,64 bit Windows 10,256 GB SSD,35.56 cm (14 inch),Laptop
3,Dell,Vostro,Intel Core i3 Processor (8th Gen),4 GB DDR4,Linux/Ubuntu,1 TB HDD,35.56 cm (14 inch),Laptop
4,HP,ProBook,Intel Core i5 Processor (8th Gen),8 GB DDR4,64 bit Windows 10,1 TB HDD,35.56 cm (14 inch),Laptop


In [22]:
# writing into a csv file
data.to_csv("cleanedfiles/final_laptops.csv")