### 1. Importing Libraries

In [1]:
import pandas as pd
import numpy as np

In [2]:
## Reading the scraped laptop data with appropriate column names
col_names = ['ProductName' , 'DisplayType' , 'Dim' , 'RespTime' , 'HDMI' , 'Category']
data  = pd.read_csv("csvfiles/catalogue_monitor1.csv",names = col_names,index_col = False)

In [3]:
## SAMPLE OF SCRAPED DATA
data.head()

Unnamed: 0,ProductName,DisplayType,Dim,RespTime,HDMI,Category
0,Lenovo 27 inch Full HD Monitor (D27-20),Display Type: LED,68.58 cm (27 inch) Full HD Display,Response Time: 4 ms | VGA,HDMI,Monitor
1,HP 23.8 inch Full HD LED Backlit IPS Panel Mon...,Display Type: LED,60.45 cm (23.8 inch) Full HD Display,Response Time: 5 ms | VGA,HDMI Ports - 1,Monitor
2,HP 21.5 inch Full HD LED Backlit IPS Panel Mon...,Display Type: LED Backlit,54.61 cm (21.5 inch) Full HD Display,Response Time: 14 ms | VGA,HDMI Ports - 1,Monitor
3,Samsung 21.5 inch Full HD LED Backlit Monitor ...,Display Type: backlit led,54.61 cm (21.5 inch) Full HD Display,Response Time: 4 ms | VGA,HDMI,Monitor
4,LG 18.5 inch HD LED Backlit Monitor (19M38HB -...,Display Type: LED Backlit,46.99 cm (18.5 inch) HD Display,Response Time: 5 ms | VGA,HDMI Ports - 1,Monitor


### 2. Identifying Brand and Model from data

In [4]:
## Scraped Product Name containing the Brand, Model and features all together
prodName = data.ProductName
prodName

0                Lenovo 27 inch Full HD Monitor (D27-20)
1      HP 23.8 inch Full HD LED Backlit IPS Panel Mon...
2      HP 21.5 inch Full HD LED Backlit IPS Panel Mon...
3      Samsung 21.5 inch Full HD LED Backlit Monitor ...
4      LG 18.5 inch HD LED Backlit Monitor (19M38HB -...
                             ...                        
233               Dell 22 inch Full HD Monitor (E2218HN)
234    LG 22 inch Full HD LED Backlit IPS Panel Monit...
235    MarQ by Flipkart 23.8 inch Full HD LED Backlit...
236    Philips 27 inch Full HD LED Backlit IPS Panel ...
237           Lenovo 23.8 inch Full HD Monitor (Q24i-10)
Name: ProductName, Length: 238, dtype: object

In [5]:
## Splitting the brand from the Product Name (it is the first string in the Product Name)
brand = prodName.str.split(' ',1)
brand

0             [Lenovo, 27 inch Full HD Monitor (D27-20)]
1      [HP, 23.8 inch Full HD LED Backlit IPS Panel M...
2      [HP, 21.5 inch Full HD LED Backlit IPS Panel M...
3      [Samsung, 21.5 inch Full HD LED Backlit Monito...
4      [LG, 18.5 inch HD LED Backlit Monitor (19M38HB...
                             ...                        
233            [Dell, 22 inch Full HD Monitor (E2218HN)]
234    [LG, 22 inch Full HD LED Backlit IPS Panel Mon...
235    [MarQ, by Flipkart 23.8 inch Full HD LED Backl...
236    [Philips, 27 inch Full HD LED Backlit IPS Pane...
237        [Lenovo, 23.8 inch Full HD Monitor (Q24i-10)]
Name: ProductName, Length: 238, dtype: object

In [6]:
## Creating a new column with only Brand names
brands = brand.loc[:].map(lambda x: x[0])
brands

0       Lenovo
1           HP
2           HP
3      Samsung
4           LG
        ...   
233       Dell
234         LG
235       MarQ
236    Philips
237     Lenovo
Name: ProductName, Length: 238, dtype: object

In [7]:
## Inserting the Brand and Model as separate columns and deleting the original Product Name
data = data.drop(columns = 'ProductName')
data.insert(0, "Brand", brands, True) 
data.head()

Unnamed: 0,Brand,DisplayType,Dim,RespTime,HDMI,Category
0,Lenovo,Display Type: LED,68.58 cm (27 inch) Full HD Display,Response Time: 4 ms | VGA,HDMI,Monitor
1,HP,Display Type: LED,60.45 cm (23.8 inch) Full HD Display,Response Time: 5 ms | VGA,HDMI Ports - 1,Monitor
2,HP,Display Type: LED Backlit,54.61 cm (21.5 inch) Full HD Display,Response Time: 14 ms | VGA,HDMI Ports - 1,Monitor
3,Samsung,Display Type: backlit led,54.61 cm (21.5 inch) Full HD Display,Response Time: 4 ms | VGA,HDMI,Monitor
4,LG,Display Type: LED Backlit,46.99 cm (18.5 inch) HD Display,Response Time: 5 ms | VGA,HDMI Ports - 1,Monitor


### 3. Standarizing and Cleaning data in columns

In [8]:
## DisplayType column
data['DisplayType'] = data['DisplayType'].map(lambda x: x.lstrip('Display Type:'))
data['RespTime'] = data['RespTime'].map(lambda x: x.lstrip('Response Time:').rstrip('| VGA'))
data['Dim'] = data['Dim'].map(lambda x: x.rstrip('Display'))

data.head()

Unnamed: 0,Brand,DisplayType,Dim,RespTime,HDMI,Category
0,Lenovo,LED,68.58 cm (27 inch) Full HD,4 ms,HDMI,Monitor
1,HP,LED,60.45 cm (23.8 inch) Full HD,5 ms,HDMI Ports - 1,Monitor
2,HP,LED Backlit,54.61 cm (21.5 inch) Full HD,14 ms,HDMI Ports - 1,Monitor
3,Samsung,backlit led,54.61 cm (21.5 inch) Full HD,4 ms,HDMI,Monitor
4,LG,LED Backlit,46.99 cm (18.5 inch) HD,5 ms,HDMI Ports - 1,Monitor


In [9]:
data.to_csv("cleanedfiles/final_monitors.csv")