In [1]:
import pandas as pd
import numpy as np
from numpy import nan
import json
import re


In [2]:
pd.set_option('display.max_colwidth', None)

In [3]:
filename = '/Users/rupalgandhre/SpringBoard/DataScience_Capstone2/data/raw/effy_all_jewelry_cat.csv'
raw_data = pd.read_csv(filename )


In [None]:
raw_data.info()

In [None]:
raw_data.columns

In [None]:
raw_data.head(1)

## Explore The Data

In [4]:
#Count the number of missing values in each column and sort them.
missing = pd.concat([raw_data.isnull().sum(), 100 * raw_data.isnull().mean()], axis=1)
missing.columns=['count', '%']
missing.sort_values(by='count', ascending=True).head(20)


Unnamed: 0,count,%
Description,0,0.0
Sex,0,0.0
Jewelry_Type,0,0.0
Metal,2,0.080939
Discount_Price,13,0.526103
Stones,17,0.687981
Price,40,1.618778
Metal Color,221,8.943747
Top Width,2094,84.743019
Length,2109,85.350061


In [None]:
raw_data.select_dtypes('object')

In [5]:
#Check if there are duplicate products
raw_data['Description'].value_counts().head(20)

Effy Pave Classica 14K White Gold Diamond Ring, 0.32 TCW                      3
Effy Blush 14K Rose Gold Morganite and Diamond Ring, 2.64 TCW                 3
Effy 14K Yellow Gold Turquoise and Diamond Ring, 0.39 TCW                     2
Effy Royale Bleu 14K White Gold Sapphire and Diamond Ring, 2.04 TCW           2
Effy Ruby Royale 14K Yellow Gold Ruby Stud Earrings, 1.14 TCW                 2
Effy Pave Classica 14K White Gold Diamond Pendant, 0.32 TCW                   2
Effy 14K Yellow Gold Cultured Fresh Water Pearl Earrings                      2
Effy 14K Yellow Gold Cultured Fresh Water Pearl Necklace                      2
Effy 925 Sterling Silver & 18K Yellow Gold Cultured Fresh Water Pearl Ring    2
Effy Pave Classica 14K White Gold Diamond Ring, 0.59 TCW                      2
Effy Seaside 14K White Gold Blue Sapphire & Diamond Starfish Pendant          2
Effy Ruby Royale 14K Yellow Gold Ruby and Diamond Ring, 1.36 TCW              2
Effy Trio 14K Tri Color Gold Diamond Rin

In [6]:
raw_data.loc[raw_data['Description'] =='Effy Pave Classica 14K White Gold Diamond Ring, 0.32 TCW'][['Description','Discount_Price', 'Price','Metal','Metal Color','Stones']]


Unnamed: 0,Description,Discount_Price,Price,Metal,Metal Color,Stones
598,"Effy Pave Classica 14K White Gold Diamond Ring, 0.32 TCW",1326.5,1895.0,14K Gold,White,Multi Shape Diamond 0.32
915,"Effy Pave Classica 14K White Gold Diamond Ring, 0.32 TCW",1125.0,2250.0,14K Gold,White,"Round Diamond 0.32,"
921,"Effy Pave Classica 14K White Gold Diamond Ring, 0.32 TCW",1547.5,3095.0,14K Gold,White,"Round Diamond 0.32,"


In [7]:
raw_data.loc[raw_data['Description'] == 'Effy Blush 14K Rose Gold Morganite and Diamond Ring, 2.64 TCW'][['Description','Discount_Price', 'Price','Metal','Metal Color','Stones']]



Unnamed: 0,Description,Discount_Price,Price,Metal,Metal Color,Stones
617,"Effy Blush 14K Rose Gold Morganite and Diamond Ring, 2.64 TCW",1676.5,2395.0,14K Gold,Rose,"Emerald Cut Morganite 2.55, Round Diamond 0.09,"
704,"Effy Blush 14K Rose Gold Morganite and Diamond Ring, 2.64 TCW",1956.5,2795.0,14K Gold,Rose,"Oval Morganite 2.35, Round Diamond 0.29"
1495,"Effy Blush 14K Rose Gold Morganite and Diamond Ring, 2.64 TCW",1347.5,2695.0,14K Gold,Rose,"Oval Morganite 2.33, Round Cognac Diamond 0.17, Round Diamond 0.14,"


## Observation from Price being null: 
If Discount_Price and Price both are NaN --> Product is sold out --> These records are dropped 
If only Price is Nan --> Discount_Price is final price and product is not on sale --> The 'Price' for this product is updated with 'Discount_Price'

In [8]:
#Check for records with null price
raw_data.loc[raw_data['Price'].isnull() | raw_data['Discount_Price'].isnull()]

Unnamed: 0,Description,Discount_Price,Price,Metal,Metal Color,Stones,Length,Hoop Size,"Height 3/8"" x Length",Diameter,...,"18"" Chain Included. Heart Length","18"" Chain Included. Size of Heart","16"" - 18"" Adjustable Chain. Butterfly Size",Necklace Length,"16""-18"" Adjustable Chain Included. Pendant Legnth","17"" Chain Included. Pendant Length","17"" Attached Chain. Pendant Length","4 Strand 18"" Necklace. Pearl Sizes",Pendant Length,"25"" Gemstone"
13,"Effy Pave Classica 14K White Gold Diamond 2"" Hoop Earrings, 2.65 TCW",,,14K Gold,White,"Round Diamond 2.65,",,,,,...,,,,,,,,,,
57,"Effy Ruby Royale 14K Rose Gold Ruby and Diamond Hoop Earrings, 0.55 TCW",,,14K Gold,Rose,"Round Ruby 0.31, Round Diamond 0.24","5/8""",,,,...,,,,,,,,,,
159,"Effy 925 Sterling Silver Pink Sapphire Splash Small Hoop Earrings, 1.80 TCW",295.0,,Sterling Silver,,Round Pink Sapphire 1.8,,,,"5/8""",...,,,,,,,,,,
262,"Effy 925 Sterling Silver Blue Sapphire Splash 1/2"" Hoop Earrings, 1.77 TCW",295.0,,Sterling Silver,,Round Blue Sapphire 1.77,,,,"5/8""",...,,,,,,,,,,
338,"Effy 14K White Gold Diamond Stud Earring, 2.00 TCW",6995.0,,14K Gold,White,Round Diamond 2,,,,,...,,,,,,,,,,
433,"Effy D'Oro 14K Yellow Gold Diamond Flower Ring, 1.61 TCW",,,14K Gold,Yellow,Round Diamond 1.61,,,,,...,,,,,,,,,,
471,"Effy 14K White Gold Diamond Band, 0.58 TCW",1395.0,,14K Gold,White,"Round Diamond 0.58,",,,,,...,,,,,,,,,,
481,"Effy Bridal 14K White Gold Diamond Solitaire Ring, 0.20 TCW",850.0,,14K Gold,White,Round Diamond 0.2,,,,,...,,,,,,,,,,
506,"Effy Pave Classica 14K White Gold Diamond Ring, 2.18 TCW",,,14K Gold,White,Round Diamond 2.18,,,,,...,,,,,,,,,,
521,"Effy Pave Classica 14K White Gold Diamond Ring, 1.42 TCW",6250.0,,14K Gold,White,"Round Diamond 0.77, Baguette Diamond 0.65,",,,,,...,,,,,,,,,,


In [9]:
#If both the prices are NaN then the product is sold-out. Remove these records
missing_price = raw_data[['Price', 'Discount_Price']].isnull().sum(axis=1)
missing_price.value_counts()/len(missing_price) * 100

0    98.381222
1     1.092675
2     0.526103
dtype: float64

In [10]:
raw_data = raw_data[missing_price != 2]

In [11]:
raw_data.loc[raw_data['Price'].isnull() & raw_data['Discount_Price'].isnull()]

Unnamed: 0,Description,Discount_Price,Price,Metal,Metal Color,Stones,Length,Hoop Size,"Height 3/8"" x Length",Diameter,...,"18"" Chain Included. Heart Length","18"" Chain Included. Size of Heart","16"" - 18"" Adjustable Chain. Butterfly Size",Necklace Length,"16""-18"" Adjustable Chain Included. Pendant Legnth","17"" Chain Included. Pendant Length","17"" Attached Chain. Pendant Length","4 Strand 18"" Necklace. Pearl Sizes",Pendant Length,"25"" Gemstone"


In [12]:
#If Price is NaN then the product is not on sale. Update these records with 'Discount_Price'
raw_data.loc[raw_data['Price'].isnull(), 'Price'] = raw_data['Discount_Price']

In [13]:
raw_data.loc[raw_data['Description']=='Effy 14K White Gold Diamond Stud Earring, 2.00 TCW']

Unnamed: 0,Description,Discount_Price,Price,Metal,Metal Color,Stones,Length,Hoop Size,"Height 3/8"" x Length",Diameter,...,"18"" Chain Included. Heart Length","18"" Chain Included. Size of Heart","16"" - 18"" Adjustable Chain. Butterfly Size",Necklace Length,"16""-18"" Adjustable Chain Included. Pendant Legnth","17"" Chain Included. Pendant Length","17"" Attached Chain. Pendant Length","4 Strand 18"" Necklace. Pearl Sizes",Pendant Length,"25"" Gemstone"
338,"Effy 14K White Gold Diamond Stud Earring, 2.00 TCW",6995.0,6995.0,14K Gold,White,Round Diamond 2,,,,,...,,,,,,,,,,


## Observation on Metal Color being null
All Sterling Silver products have Metal Color as Nan --> The Metal Color for this products is replaced with their respective Metal

If 'Metal' and 'Metal Color' both are Nan --> This products are made of Pearls

In [19]:
raw_data.loc[raw_data['Metal Color'].isnull() & raw_data['Metal'].isnull()]

Unnamed: 0,Description,Discount_Price,Price,Metal,Metal Color,Stones,Length,Hoop Size,"Height 3/8"" x Length",Diameter,...,"18"" Chain Included. Heart Length","18"" Chain Included. Size of Heart","16"" - 18"" Adjustable Chain. Butterfly Size",Necklace Length,"16""-18"" Adjustable Chain Included. Pendant Legnth","17"" Chain Included. Pendant Length","17"" Attached Chain. Pendant Length","4 Strand 18"" Necklace. Pearl Sizes",Pendant Length,"25"" Gemstone"
2414,"Effy Cultured Fresh Water Pearl 40"" Necklace",245.0,350.0,,,Round Pearl,,,,,...,,,,,,,,,,
2440,Effy Multi Color Cultured Fresh Water Pearl Necklace,206.5,295.0,,,Round Pearl,"36""",,,,...,,,,,,,,,,


In [21]:
raw_data.loc[raw_data['Metal Color'].isnull() & raw_data['Metal'].isnull(), ['Metal', 'Metal Color']] =raw_data['Stones']

In [22]:
raw_data.loc[raw_data['Description'] == 'Effy Cultured Fresh Water Pearl 40" Necklace']

Unnamed: 0,Description,Discount_Price,Price,Metal,Metal Color,Stones,Length,Hoop Size,"Height 3/8"" x Length",Diameter,...,"18"" Chain Included. Heart Length","18"" Chain Included. Size of Heart","16"" - 18"" Adjustable Chain. Butterfly Size",Necklace Length,"16""-18"" Adjustable Chain Included. Pendant Legnth","17"" Chain Included. Pendant Length","17"" Attached Chain. Pendant Length","4 Strand 18"" Necklace. Pearl Sizes",Pendant Length,"25"" Gemstone"
2414,"Effy Cultured Fresh Water Pearl 40"" Necklace",245.0,350.0,Round Pearl,Round Pearl,Round Pearl,,,,,...,,,,,,,,,,


In [23]:
 raw_data.loc[raw_data['Metal Color'].isnull(), 'Metal'].unique()

array(['Sterling Silver', 'Sterling Silver & 18K Gold',
       'Stainless Steel & 18K Gold', '14K Gold'], dtype=object)

In [25]:
raw_data.loc[raw_data['Metal Color'].isnull() | raw_data['Metal'].isnull()]

Unnamed: 0,Description,Discount_Price,Price,Metal,Metal Color,Stones,Length,Hoop Size,"Height 3/8"" x Length",Diameter,...,"18"" Chain Included. Heart Length","18"" Chain Included. Size of Heart","16"" - 18"" Adjustable Chain. Butterfly Size",Necklace Length,"16""-18"" Adjustable Chain Included. Pendant Legnth","17"" Chain Included. Pendant Length","17"" Attached Chain. Pendant Length","4 Strand 18"" Necklace. Pearl Sizes",Pendant Length,"25"" Gemstone"
15,"Effy Splash Sterling Silver Pink Sapphire Circle Earrings, 3.60 TCW",665.0,950.0,Sterling Silver,,Round Pink Sapphire 3.6,,,,"1/2""",...,,,,,,,,,,
16,"Effy Splash Sterling Silver Multi Color Sapphire Square Earrings, 3.75 TCW",836.5,1195.0,Sterling Silver,,Round Multi Color 3.75,,,,,...,,,,,,,,,,
25,"Effy Tanzanite Royale Sterling Silver Tanzanite Stud Earrings, 1.52 TCW",367.5,525.0,Sterling Silver,,Trillion Tanzanite 1.52,"1/4""",,,,...,,,,,,,,,,
30,"Effy Splash Silver Blue Sapphire 1"" Hoop Earrings, 3.97 TCW",696.5,995.0,Sterling Silver,,"Round Blue Sapphire 3.97,",,,,,...,,,,,,,,,,
36,Effy 925 Sterling Silver & 18K Yellow Gold Cultured Fresh Water Pearl Stud Earrings,332.5,475.0,Sterling Silver & 18K Gold,,Round Pearl,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2433,"Effy Sterling Silver and 18K Yellow Gold Round Onyx Pendant, 36.50 TCW",547.5,1095.0,Sterling Silver & 18K Gold,,Round Onyx 36.5,,,,,...,,,,,,"1.75""",,,,
2436,"Effy 925 Sterling Silver Blue Sapphire Double Triangle Necklace, 1.30 TCW",287.5,575.0,Sterling Silver,,Round Blue Sapphire 1.3,"18""",,,,...,,,,,,,,,,
2441,"Effy 925 Sterling Silver Sapphire and Diamond Necklace, 0.66 TCW",357.5,715.0,Sterling Silver,,"Round Blue Sapphire 0.57, Round Diamond 0.09","19""",,,,...,,,,,,,,,,
2443,"Effy Seaside Sterling Silver Multi Gemstone Crab Pendant, 1.75 TCW",402.5,575.0,Sterling Silver,,"Oval Amethyst 1.1, Round Pink Sapphire 0.65",,,,,...,,,,,,,,,,


In [26]:
 raw_data.loc[raw_data['Metal Color'].isnull(), 'Metal Color'] = raw_data['Metal']

In [28]:
raw_data.loc[raw_data['Metal Color'].isnull() | raw_data['Metal'].isnull()]

Unnamed: 0,Description,Discount_Price,Price,Metal,Metal Color,Stones,Length,Hoop Size,"Height 3/8"" x Length",Diameter,...,"18"" Chain Included. Heart Length","18"" Chain Included. Size of Heart","16"" - 18"" Adjustable Chain. Butterfly Size",Necklace Length,"16""-18"" Adjustable Chain Included. Pendant Legnth","17"" Chain Included. Pendant Length","17"" Attached Chain. Pendant Length","4 Strand 18"" Necklace. Pearl Sizes",Pendant Length,"25"" Gemstone"


In [29]:
raw_data.loc[raw_data['Description'] == 'Effy Splash Sterling Silver Pink Sapphire Circle Earrings, 3.60 TCW' ]

Unnamed: 0,Description,Discount_Price,Price,Metal,Metal Color,Stones,Length,Hoop Size,"Height 3/8"" x Length",Diameter,...,"18"" Chain Included. Heart Length","18"" Chain Included. Size of Heart","16"" - 18"" Adjustable Chain. Butterfly Size",Necklace Length,"16""-18"" Adjustable Chain Included. Pendant Legnth","17"" Chain Included. Pendant Length","17"" Attached Chain. Pendant Length","4 Strand 18"" Necklace. Pearl Sizes",Pendant Length,"25"" Gemstone"
15,"Effy Splash Sterling Silver Pink Sapphire Circle Earrings, 3.60 TCW",665.0,950.0,Sterling Silver,Sterling Silver,Round Pink Sapphire 3.6,,,,"1/2""",...,,,,,,,,,,


## Observation on Stones
The 'Stones' value is Nan, if there are no stones in the products -- The 'Stones' column is updated with 'No Stones'

In [33]:
raw_data.loc[raw_data['Stones'].isnull()]

Unnamed: 0,Description,Discount_Price,Price,Metal,Metal Color,Stones,Length,Hoop Size,"Height 3/8"" x Length",Diameter,...,"18"" Chain Included. Heart Length","18"" Chain Included. Size of Heart","16"" - 18"" Adjustable Chain. Butterfly Size",Necklace Length,"16""-18"" Adjustable Chain Included. Pendant Legnth","17"" Chain Included. Pendant Length","17"" Attached Chain. Pendant Length","4 Strand 18"" Necklace. Pearl Sizes",Pendant Length,"25"" Gemstone"
135,Effy 925 Classic Sterling Silver and 18K Yellow Gold Earrings,486.5,695.0,Sterling Silver & 18K Gold,Yellow,,"3/4""",,,,...,,,,,,,,,,
153,Effy 925 Sterling Silver & 18K Gold Cultured Pearl Earrings,332.5,475.0,Sterling Silver,Sterling Silver,,,,,,...,,,,,,,,,,
263,Effy 925 Sterling Silver and 18K Yellow Gold Earrings,346.5,495.0,Sterling Silver & 18K Gold,Yellow,,"3/4""",,,,...,,,,,,,,,,
373,Effy 925 Sterling Silver and 18K Yellow Gold Accented Hoop Earrings,385.0,550.0,Sterling Silver & 18K Gold,Yellow,,,,,"1.25""",...,,,,,,,,,,
905,Effy 925 Classic Sterling Silver and 18K Yellow Gold Ring,455.0,650.0,Sterling Silver & 18K Gold,Yellow,,,,,,...,,,,,,,,,,
1307,Effy Seaside Sterling Silver and 18K Gold Turtle Ring,385.0,550.0,Sterling Silver & 18K Gold,Sterling Silver & 18K Gold,,,,,,...,,,,,,,,,,
1430,Effy 925 Sterling Silver and 18K Gold Ring,206.5,295.0,Sterling Silver & 18K Gold,Yellow,,,,,,...,,,,,,,,,,
1489,Effy 925 Sterling Silver and 18K Yellow Gold Accented Ring,242.5,485.0,Sterling Silver & 18K Gold,Yellow,,,,,,...,,,,,,,,,,
1613,Effy Seaside Sterling Silver & 18K Gold Anchor Bangle,385.0,550.0,Sterling Silver & 18K Gold,Sterling Silver & 18K Gold,,,,,,...,,,,,,,,,,
1745,Effy 925 Sterling Silver and 18K Gold Bangle,626.5,895.0,Sterling Silver & 18K Gold,Yellow,,,,,,...,,,,,,,,,,


In [35]:
raw_data.loc[raw_data['Stones'].isnull(), 'Stones'] = 'No Stones'

In [36]:
raw_data.loc[raw_data['Description'] == 'Effy 925 Sterling Silver and 18K Yellow Gold Accented Hoop Earrings']

Unnamed: 0,Description,Discount_Price,Price,Metal,Metal Color,Stones,Length,Hoop Size,"Height 3/8"" x Length",Diameter,...,"18"" Chain Included. Heart Length","18"" Chain Included. Size of Heart","16"" - 18"" Adjustable Chain. Butterfly Size",Necklace Length,"16""-18"" Adjustable Chain Included. Pendant Legnth","17"" Chain Included. Pendant Length","17"" Attached Chain. Pendant Length","4 Strand 18"" Necklace. Pearl Sizes",Pendant Length,"25"" Gemstone"
373,Effy 925 Sterling Silver and 18K Yellow Gold Accented Hoop Earrings,385.0,550.0,Sterling Silver & 18K Gold,Yellow,No Stones,,,,"1.25""",...,,,,,,,,,,


In [37]:
raw_data.loc[raw_data['Stones'].isnull()]

Unnamed: 0,Description,Discount_Price,Price,Metal,Metal Color,Stones,Length,Hoop Size,"Height 3/8"" x Length",Diameter,...,"18"" Chain Included. Heart Length","18"" Chain Included. Size of Heart","16"" - 18"" Adjustable Chain. Butterfly Size",Necklace Length,"16""-18"" Adjustable Chain Included. Pendant Legnth","17"" Chain Included. Pendant Length","17"" Attached Chain. Pendant Length","4 Strand 18"" Necklace. Pearl Sizes",Pendant Length,"25"" Gemstone"


## Observation on Gemstones column
Only 9 rows have additional information on Gemstones column. 'Gemstones' column can be ignored from the dataframe

In [41]:
raw_data.loc[raw_data['Gemstones'].notnull()][['Description','Discount_Price', 'Price','Metal','Metal Color','Stones','Gemstones']]


Unnamed: 0,Description,Discount_Price,Price,Metal,Metal Color,Stones,Gemstones
89,"Effy Mosaic Sterling Silver & 18K Gold Multi Gemstone Earrings, 2.75 TCW",455.0,650.0,Sterling Silver & 18K Gold,Yellow,Multi Shape Multi Color 2.75,"Amethyst, Blue Topaz, Citrine, Garnet, Pink Tourmaline, Peridot"
382,"Effy Mosaic 14K Yellow Gold Multi Gemstone Hoop Earrings, 4.05 TCW",945.0,1350.0,14K Gold,Yellow,Oval Multi Color 4.05,"Amethyst, Blue Topaz, Citrine & Peridot"
513,"Effy Watercolors 14K White Gold Multi Gemstone and Diamond Ring, 2.45 TCW",1886.5,2695.0,14K Gold,White,"Multi Shape Multi Color 2.29, Round Diamond 0.16","Amethyst, Sapphire and Tsavorite"
514,"Effy Watercolors 14K White Gold Multi Gemstone and Diamond Ring, 3.52 TCW",2586.5,3695.0,14K Gold,White,"Multi Shape Multi Color 3.38, Round Diamond 0.14","Amethyst, Sapphire and Tsavorite"
825,"Effy Mosaic 14K Yellow Gold Multi Gemstone Ring, 1.90 TCW",556.5,795.0,14K Gold,Yellow,Oval Multi Color 1.9,"Amethyst, Blue Topaz, Citrine & Peridot"
1304,"Effy Mosaic 14K Yellow Gold Multi Gemstone Ring, 1.40 TCW",945.0,1350.0,14K Gold,Yellow,Round Multi Color 1.4,"Amethyst, Blue Topaz, Citrine, Peridot & Garnet."
1448,"Effy Seaside Sterling Silver Multi Gemstone Starfish Ring, 5.66 TCW",497.5,995.0,Sterling Silver & 18K Gold,Yellow,Multi Shape Multi Color 5.66,"Amethyst, Blue Topaz, Citrine, Rhodolite, Peridot"
1891,"Effy Seaside Sterling Silver Multi Gemstone Starfish Pendant, 4.50 TCW",556.5,795.0,Sterling Silver & 18K Gold,Yellow,Multi Shape Multi Color 4.5,"Amethyst, Blue Topaz, Citrine, Rhodolite, Peridot"
2291,"Effy 14K Yellow Gold Multi Gemstone Station Necklace, 28.93 TCW",2695.0,3850.0,14K Gold,Yellow,Round Multi Color 28.93,"Blue Topaz, Citrine, Garnet and Peridot"


## Creating new dataframe

In [63]:
new_raw_data = pd.DataFrame(raw_data[['Description','Discount_Price', 'Price','Metal','Metal Color','Stones']])

In [44]:
new_raw_data.head()

Unnamed: 0,Description,Discount_Price,Price,Metal,Metal Color,Stones
0,"Effy Brasilica 14K Yellow Gold Emerald and Diamond Earrings, 4.62 TCW",5596.5,7995.0,14K Gold,Yellow,"Multi Shape Emerald 4.01, Round Diamond 0.61"
1,"Effy Pave Classica 14K White Gold Diamond Hoop Earrings, 0.98 TCW",2796.5,3995.0,14K Gold,White,"Round Diamond 0.98,"
2,"Effy Brasilica 14K White Gold Emerald and Diamond Drop Earrings, 4.04 TCW",6996.5,9995.0,14K Gold,Yellow,"Round Diamond 1.38, Pear Emerald 2.66,"
3,"Effy Canare 14K Yellow Gold Yellow Diamond Hoop Earrings, 1.15 TCW",3955.0,5650.0,14K Gold,Yellow,Round Diamond 1.15
4,"Effy Tanzanite Royale 14K Gold Tanzanite and Diamond Earrings, 2.90 TCW",2905.0,4150.0,14K Gold,White,"Multi Shape Tanzanite 2.6, Round Diamond 0.3"


In [64]:
type(new_raw_data)

pandas.core.frame.DataFrame

In [88]:
len(new_raw_data)

2458

## Extract the product weight from the Description

In [72]:
product_description = new_raw_data['Description']

In [155]:

desc = []

for product in product_description:
    if 'TCW' in product:
        pass
    else:
        desc.append(product) 


In [151]:
type(product_description)

pandas.core.series.Series

In [156]:
desc

['Effy 925 Sterling Silver & 18K Yellow Gold Cultured Fresh Water Pearl Stud Earrings',
 'Effy 14K Yellow Gold Pearl Dangle Earrings',
 'Effy Watercolors 14K Yellow Gold Sapphire and Diamond Earrings, 7.56 CW',
 'Effy 14K Yellow Gold Cultured Fresh Water Pearl Earrings',
 'Effy 14K Yellow Gold Cultured Fresh Water Pearl Earrings',
 'Effy 925 Classic Sterling Silver and 18K Yellow Gold Earrings',
 'Effy 14K Rose Gold Black, Espresso and White Diamond Earrings',
 'Espresso 14K Rose Gold Cognac & White Diamond Earrings',
 'Effy Watercolors 14K Yellow Gold Multi Sapphire and Diamond Earrings',
 'Effy 925 Sterling Silver & 18K Gold Cultured Pearl Earrings',
 'Effy 14K Yellow Gold Fresh Water Cultured Pearl Earrings',
 'Effy 925 Splash Sapphire Heart Earrings',
 'Effy Espresso 14K Yellow Gold Cognac and White Diamond Earrings',
 'Effy Watercolors 14K Yellow Gold Multi Sapphire & Diamond Earrings, 2.13 TC',
 'Effy Watercolors 14K Yellow Gold Multi Sapphire and Diamond Earrings',
 'Effy 925 St

In [131]:
product_description[0]

'Effy Brasilica 14K Yellow Gold Emerald and Diamond Earrings, 4.62 TCW'

In [108]:
prod_split

['Effy Pave Classica 14K White Gold Diamond Necklace',
 ' 0.53 TCW',
 ['Effy Brasilica 14K Yellow Gold Emerald and Diamond Earrings', ' 4.62 TCW'],
 ['Effy Pave Classica 14K White Gold Diamond Hoop Earrings', ' 0.98 TCW'],
 ['Effy Brasilica 14K White Gold Emerald and Diamond Drop Earrings',
  ' 4.04 TCW'],
 ['Effy Canare 14K Yellow Gold Yellow Diamond Hoop Earrings', ' 1.15 TCW'],
 ['Effy Tanzanite Royale 14K Gold Tanzanite and Diamond Earrings',
  ' 2.90 TCW'],
 ['Effy Signature 14K Yellow Gold Diamond Emerald Panther Earrings',
  ' 0.93 TCW'],
 ['Effy Brasilica 14K White Gold Emerald and Diamond Earrings', ' 2.30 TCW'],
 ['Effy Brasilica 14K Yellow Gold Emerald Stud Earrings', ' 1.62 TCW'],
 ['Effy Canare 18K Two Tone Gold Yellow and White Diamond Earrings',
  ' 0.36 TCW'],
 ['Effy Canare 18K Two-Tone Gold Yellow and White Diamond Earrings',
  ' 0.51 TCW'],
 ['Effy Novelty 14K White Gold Sapphire & Diamond Evil Eye Earrings',
  ' 0.57 TCW'],
 ['Effy Signature 14K Rose Gold Diamond an

In [97]:
for item in prod_split:
    print(item[len(item)-1])

e
W
 4.62 TCW
 0.98 TCW
 4.04 TCW
 1.15 TCW
 2.90 TCW
 0.93 TCW
 2.30 TCW
 1.62 TCW
 0.36 TCW
 0.51 TCW
 0.57 TCW
 1.57 TCW
 0.71 TCW
 0.55 TCW
 3.60 TCW
 3.75 TCW
 0.49 TCW
 6.04 TCW
 6.18 TCW
 2.74 TCW
 2.23 TCW
 2.44 TCW
 1.54 TCW
 2.19 TCW
 1.52 TCW
 0.93 TCW
 0.45 TCW
 0.72 TCW
 0.61 TCW
 3.97 TCW
 0.41 TCW
 1.26 TCW
 0.71 TCW
 0.37 TCW
 1.27 TCW
Effy 925 Sterling Silver & 18K Yellow Gold Cultured Fresh Water Pearl Stud Earrings
 2.22 TCW
 1.27 TCW
 0.87 TCW
 3.18 TCW
 0.93 TCW
 0.50 TCW
 0.50 TCW
 4.28 TCW
 3.26 TCW
 1.08 TCW
 2.63 TCW
 5.78 TCW
 0.05 TCW
 1.19 TCW
 1.19 TCW
 1.57 TCW
 0.48 TCW
 0.49 TCW
 1.06 TCW
 2.13 TCW
 9.25 TCW
 8.64 TCW
 2.06 TCW
 3.92 TCW
 1.38 TCW
 2.28 TCW
 0.47 TCW
 0.39 TCW
 2.58 TCW
 0.61 TCW
 0.73 TCW
 2.34 TCW
 0.94 TCW
 0.49 TCW
 2.72 TCW
 4.80 TCW
Effy 14K Yellow Gold Pearl Dangle Earrings
 0.50 TCW
 0.82 TCW
 0.75 TCW
 22.46 TCW
 2.95 TCW
 0.47 TCW
 1.10 TCW
 7.56 CW
 0.79 TCW
 6.95 TCW
 1.03 TCW
 0.76 TCW
 1.70 TCW
Effy 14K Yellow Gold Cultured

 1.59 TCW
 2.45 TCW
Effy Seaside Sterling Silver and 18K Gold Turtle Ring
 3.62 TCW
 1.75 TCW
 0.73 TCW
 0.56 TCW
 0.57 TCW
 1.28 TCW
 1.50 TCW
 3.50 TCW
 2.53 TCW
 2.29 TCW
 0.84 TCW
 0.67 TCW
 1.93 TCW
 2.11 TCW
 1.03 TCW
 2.13 TCW
 2.04 TCW
 1.72 TCW
 0.41 TCW
 8.52 TCW
 1.00 TCW
 0.84 TCW
 0.47 TCW
 0.34 TCW
 2.02 TCW
 0.44 TCW
 0.57 TCW
 2.03 TCW
 0.52 TCW
 1.79 TCW
 3.10 TCW
 2.44 TCW
 2.35 TCW
 3.49 TCW
 4.76 TCW
 1.95 TCW
 1.34 TCW
 1.84 TCW
 2.22 TCW
 0.91 TCW
 2.28 TCW
 0.84 TCW
 1.83 TCW
 3.58 TCW
 2.72 TCW
 1.08 TCW
 7.03 TCW
 0.61 TCW
 1.00 TCW
 2.12 TCW
 2.00 TCW
 4.25 TCW
 1.66 TCW
 2.34 TCW
 1.76 TCW
 0.26 TCW
 0.79 TCW
 0.75 TCW
 13.65 TCW
 0.31 TCW
 6.57 TCW
 0.54 TCW
 0.35 TCW
 1.06 TCW
 0.33 TCW
 4.60 TCW
 1.28 TCW
 8.02 TCW
 6.95 TW
 7.79 TCW
 0.55 TCW
 8.28 TCW
 0.82 TCW
 1.61 TCW
 0.23 TCW
 7.82 TCW
 0.56 TCW
 2.09 TCW
 0.44 TCW
 1.50 TCW
 2.49 TCW
 3.41 TCW
 0.13 TCW
 12.93
 2.98 TCW
 0.20 TCW
 .07 TCW
 0.14 TCW
 0.74 TCW
 4.70 TCW
 2.07 TCW
 0.94 TCW
 1.47 TCW


 3.04 TCW
 3.60 TCW
 0.71 TCW
 0.51 TCW
 0.09 TCW
 0.21 TCW
 2.54 TCW
 0.12 TCW
 0.23 TCW
 1.55 TCW
 0.47 TCW
 0.58 TCW
 1.73 TWC
 0.59 TCW
 12.94 TCW
 2.20 TCW
 0.02 TCW
Effy 925 Sterling Silver & 18K Yellow Gold Cultured Fresh Water Pearl Pendant
Effy 14K Yellow Gold Pearl Necklace
Effy 14K White Gold Cultured Fresh Water Pearl Necklace
 10.00 TCW
 27.69 TCW
 0.25 TCW
 0.04 TCW
 5.40 TCW
 0.21 TCW
 6.29 TCW
 0.47 TCW
 0.47 TCW
 1.62 TCW
 0.27 TCW
 4.52 TCW
 0.31 TCW
 2.29 TCW
 0.28 TCW
 1.21 TCW
 1.15 TCW
 3.80 TCW
 0.58 TCW
 20.40 TCW
 3.26 TCW
 1.09 TCW
 2.71 TCW
 0.55 TCW
 1.34 TCW
 1.33 TCW
 1.47 TCW
 2.10 TCW
 2.23 TCW
 0.45 TCW
 1.00 TCW
 0.26 TCW
 6.62 TCW
 0.21 TCW
 2.05 TCW
 1.09 TCW
 0.11 TCW
 0.18 TCW
 3.53 TCW
 0.45 TCW
 0.60 TCW
 0.12 TCW
 8.08 TCW
 0.81 TCW
 0.62 TCW
 0.66 TCW
 0.26 TCW
 .36 TCW
 0.24 TCW
 0.18 TCW
 0.42 TCW
 0.45 TCW
 6.52 TCW
 0.65 TCW
 1.20 TCW
 1.37 TCW
 0.89 TCW
 1.41 TCW
 4.52 TCW
 2.05 TCW
 1.04 TCW
 0.35 TCW
 7.92 TCW
 0.42 TCW
 1.08 TCW
 .5 TCW

 0.31 TCW
 0.92 TCW
 0.23 TCW
 0.62 TCW
 1.91 TCW
 1.55 TCW
 2.75 TCW
 0.64 TCW
 0.73 TCW
 0.64 TCW
 1.59 TCW
 0.37 TCW
 1.27 TCW
 1.95 TCW
 9.65 TCW
 1.35 TCW
 0.82 TCW
 1.64 TCW
 2.21 TCW
 0.35 TCW
 2.17 TCW
 1.75 TCW
 0.74 TCW
 0.01 TCW
 1.22 TCW
 0.63 TCW
 1.63 TCW
 1.22 TCW
 3.63 TCW
 0.39 TCW
 1.57 TCW
 0.39 TCW
 1.14 TCW
 2.16
 1.12 TCW
 2.16 TCW
 1.31 TCW
 1.42 TCW
 2.98 TCW
 2.39 TCW
 2.55 TCW
 1.54 TCW
 0.78 TCW
 2.25 TCW
 1.43 TCW
 4.30 TCW
 2.04 TCW
 4.97 TCW
 2.10 TCW
 3.68 TCW
 6.49 TCW
 3.43 TCW
 1.48 TCW
 0.49 TCW
 1.34 TCW
 2.64 TCW
 1.49 TCW
 3.74 TCW
 0.34 TCW
 1.48 TCW
 0.56 TCW
 1.95 TCW
Effy 925 Classic Sterling Silver and 18K Yellow Gold Ring
 0.18 TCW
 0.41 TCW
 1.33 TCW
 0.44 TCW
 0.70 TCW
 0.77 TCW
 1.10 TCW
 1.21 TCW
 5.42 TCW
 0.32 TCW
 1.75 TCW
 0.47 TCW
 2.92 TCW
 0.46 TCW
 0.11 TCW
 0.32 TCW
 0.97 TCW
 0.39 TCW
 1.17 TCW
 1.66 TCW
 2.01 TCW
 5.05 TCW
 2.48 TCW
 0.46 TCW
 0.46 TCW
 1.66 TCW
 2.00 TCW
 1.00 TCW
 2.20 TCW
 0.67 TCW
 1.85 TCW
 2.54 TCW
 2.27 

 0.82 TCW
 0.46 TCW
 13.51 TCW
 0.53 TCW
 9.46 TCW
 0.15 TCW
 1.28 TCW
 2.70 TCW
 0.73 TCW
 0.24 TCW
 1.79 TCW
 1.73 TCW
 1.49 TCW
 2.68 TCW
 1.29 TCW
 0.65 TCW
 4.18 TCW
 0.47 TCW
 0.04 TCW
 3.62 TCW
 0.54 TCW
 2.09 TCW
 0.62 TCW
 0.50 TCW
 0.47 TCW
 0.47 TCW
 0.43 TCW
 3.66 TCW
 5.33 TCW
 1.62 TWC
 0.22 TCW
 0.19 TCW
Effy 14K Yellow Gold Pearl Tassel Necklace
 2.24 TCW
 1.90 TCW
 0.49 TCW
 0.41 TCW
 0.09 TCW
 0.29 TCW
 1.27 TCW
 1.56 TCW
 7.73 TCW
 2.21 TCW
 0.19 TCW
 4.50 TCW
Effy Watercolors 14K Yellow Gold Multi Sapphire and Diamond Necklace
 2.52 TCW
 1.24 TCW
 0.87 TCW
 1.57 TCW
 0.45 TCW
 0.18 TCW
 0.44 TCW
Effy Nature 14K Yellow Gold Cognac and White Diamond Butterfly Pendant
 2.94 TCW
 0.86 TCW
 3.96 TCW
 1.62 TCW
Effy Seaside 14K White Gold Sapphire and Diamond Whale's Tail Pendant
 1.71 TCW
 0.15 TCW
 0.14 TCW
 1.37 TCW
 1.46 TCW
 0.27 TCW
 0.28 TCW
 3.86 TCW
 3.85 TCW
 Citrine & Diamond Pendant
 0.95 TCW
 0.48 TCW
 1.48 TCW
 1.68 TCW
 2.43 TCW
 0.47 TCW
 9.60 TCW
 2.13 TCW

 0.41 TCW
 4.52 TCW
 1.09 TCW
 2.43 TCW
 1.21 TCW
 0.99 TCW
 0.50 TCW
 1.64 TCW
 0.53 TCW
 1.12 TCW
 0.39 TCW
 0.53 TCW
 1.16 TCW
 0.96 TCW
 0.98 TCW
 0.20 TCW
 1.90 TCW
 1.91 TCW
 2.54 TCW
 0.53 TCW
 2.45 TCW
 3.52 TCW
 0.81 TCW
 1.31 TCW
 3.46 TCW
 0.05 TCW
 1.48 TCW
 0.98 TCW
 1.42 TCW
 0.51 TCW
 0.26 TCW
 0.12 TCW
 1.69 TCW
 3.14 TCW
 0.18 TCW
 0.18 TCW
 0.81 TCW
 0.95 TCW
 4.00 TCW
 3.00 TCW
 2.00 TCW
 1.00 TCW
 0.50 TCW
 4.00 TCW
 3.00 TCW
 2.00 TCW
 1.00 TCW
 0.50 TCW
 0.56 TCW
 0.66 TCW
 0.66 TCW
 2.47 TCW
 0.41 TCW
 0.12 TCW
 0.23 TCW
 0.42 TCW
 0.29 TCW
 0.93 TCW
 0.70 TCW
 0.96 TCW
 0.41 TCW
 1.61 TCW
 3.78 TCW
 1.14 TCW
 0.37 TCW
 0.74 TCW
 4.38 TCW
 2.65 TCW
 1.18 TCW
 2.32 TCW
 0.05 TCW
 0.07
 0.25 TCW
 0.59 TCW
 1.73 TCW
 1.45 TCW
 2.55 TCW
 1.95 TCW
 0.36 TCW
 3.00 TCW
 0.53 TCW
 0.18 TCW
 0.03 TCW
 0.24 TCW
 1.76 TCW
 0.38 TCW
Effy Limited Edition 14K Rose Gold Cognac and White Diamond Butterfly Ring
 1.12 TCW
 0.65 TCW
 0.95 TCW
 0.88 TCW
 1.15 TCW
 0.62 TCW
 0.57 TCW

 0.44 TCW
 1.50 TCW
 2.49 TCW
 3.41 TCW
 0.13 TCW
 12.93
 2.98 TCW
 0.20 TCW
 .07 TCW
 0.14 TCW
 0.74 TCW
 4.70 TCW
 2.07 TCW
 0.94 TCW
 1.47 TCW
 3.96 TCW
 0.71 TCW
 1.89 TCW
 1.21 TCW
 0.73 TCW
 0.54 TCW
 1.64 TCW
 1.48 TCW
 0.65 TCW
 4.01 TCW
 0.40 TCW
 13.85 TCW
 0.45 TCW
 0.59 TCW
 3.95 TCW
 2.95 TCW
 1.29 TCW
 1.15 TCW
 1.05 TCW
 1.63 TCW
 0.15 TCW
 1.72 TCW
 1.02 TCW
 0.47 TCW
 1.45 TCW
 6.00 TCW
 3.48 TCW
 1.22 TCW
Effy 925 Sterling Silver and 18K Gold Ring
 2.32 TCW
 1.14 TCW
 3.07 TCW
 3.43 TCW
 10.04 TCW
 1.40 TCW
 0.86 TCW
 2.64 TCW
 0.40 TCW
 1.36 TCW
 3.26 TCW
 3.91 TCW
 0.26 TCW
 1.03 TCW
 0.77 TCW
 1.94 TCW
 1.01 TCW
 5.66 TCW
 2.30 TCW
 3.33 TCW
 0.68 TCW
 1.70 TCW
 0.99 TCW
 0.08 TCW
 0.82 TCW
 0.05 TCW
 2.47
 1.12 TCW
 1.55 TCW
 1.43 TCW
 1.43 TCW
 2.51 TCW
 3.53 TCW
 13.35 TCW
 1.57 TCW
 1.19 TCW
Effy 925 Sterling Silver and 18K Gold Fresh Water Pearl Ring
 2.41 TCW
 5.80 TCW
 2.86 TCW
 1.21 TCW
 0.51 RCW
 0.75 TCW
 1.25 TCW
 0.42 TCW
 0.47 TCW
 2.24 TCW
 4.05 TCW
 

In [107]:
new_raw_data.loc[new_raw_data['Description'] == 'Effy Seaside 14K White Gold Sapphire & Diamond Starfish Ring']

Unnamed: 0,Description,Discount_Price,Price,Metal,Metal Color,Stones
1208,Effy Seaside 14K White Gold Sapphire & Diamond Starfish Ring,2497.5,4995.0,14K Gold,White,"Round Blue Sapphire 2.71, Round Diamond 0.24"


In [103]:
Stones = new_raw_data['Stones']
for stone in Stones:
    stone_split.append(stone.split(','))

In [104]:
stone_split

['Round Diamond 0.53',
 ['Multi Shape Emerald 4.01', ' Round Diamond 0.61'],
 ['Round Diamond 0.98', ''],
 ['Round Diamond 1.38', ' Pear Emerald 2.66', ''],
 ['Round Diamond 1.15'],
 ['Multi Shape Tanzanite 2.6', ' Round Diamond 0.3'],
 ['Round Diamond 0.91', ' Round Emerald 0.02', ''],
 ['Emerald Cut Emerald 1.9', ' Round Diamond 0.4'],
 ['Round Emerald 1.62', ''],
 ['Pear Yellow Diamond 0.26', ' Round Diamond 0.1'],
 ['Multi Shape Yellow Diamond 0.51', ''],
 ['Round Blue Sapphire 0.25', ' Round Diamond 0.32'],
 ['Round Diamond', ' Round Black Diamond ', ' Round Emerald 0.1'],
 ['Round Blue Diamond 0.6', ' Round Diamond 0.11'],
 ['Round Blue Diamond 0.55'],
 ['Round Pink Sapphire 3.6'],
 ['Round Multi Color 3.75'],
 ['Round Diamond 0.49'],
 ['Emerald Cut Garnet 5.85', ' Round Diamond 0.19'],
 ['Oval Multi Sapphire 6.18'],
 ['Round Diamond 2.74', ''],
 ['Pear Blue Sapphire 1.42', ' Round Diamond 0.81'],
 ['Pear Ruby 2.2', ' Round Diamond 0.24'],
 ['Pear Tanzanite 1.14', ' Round Diamond

In [None]:
len(new_raw_data)

In [None]:
product_weight

In [None]:

clean_raw_data = new_raw_data.copy()

In [None]:
clean_raw_data.head(1)

In [None]:
clean_raw_data['Product_Weight'] = product_weight

In [None]:
clean_raw_data.head()

In [None]:
df = clean_raw_data.copy()

In [None]:
df.to_csv("effy_clean_raw_data.csv", index=False, header=True)

In [None]:
df