## Analyze Watch Description Data

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from copy import deepcopy

pd.set_option("display.max_columns",None)
%matplotlib inline

"Main" Watch Info

In [2]:
mens = pd.read_csv('watch_page_list_mens.csv')
womens = pd.read_csv('watch_page_list_womens.csv')

Additional Info from Watch's Page

In [3]:
mens_additional = pd.read_csv('additional_stats_mens.csv')
womens_additional = pd.read_csv('additional_stats_womens.csv')

Approx Number of Unique Mens Watches

In [4]:
len(mens['image_url'].unique())

6501

__Explore Product Details__ 

Combine all data into a single DF

In [5]:
mens_combo = mens.merge(mens_additional,how='inner',on='url')
womens_combo = womens.merge(womens_additional,how='inner',on='url')

# Add a Gender Label
mens_combo['gender'] = 'mens'
womens_combo['gender'] = 'womens'

combo = mens_combo.append(womens_combo)

Exclude Listings Without Photo

In [6]:
combo = combo.loc[
    combo['image_url']!='https://www.prestigetime.com/images/watches/pic-unavailable_main.jpg',:]

Exclude "Erotic" Watches with blocked out photos

In [7]:
bad_list= ['PerreletTurbine44mmA40204TURBINEEROTIC',
 'PerreletTurbine44mmA40211TURBINEEROTIC']

combo = combo.loc[combo['image_name'].isin(bad_list) == False,:]

Exclude watches that have the exact same picture as another watch

In [8]:
combo = combo.groupby('image_url').first().reset_index()

### Let's Make Simplified Product Descriptions

#### Create Additional labels

In [9]:
strap_words = ['Alligator & Rubber','Alligator/Crocodile Leather','Calfskin Leather',
              'Fabric','Leather','Ostrich','Python Leather','Rubber','Satin','Stingray']

For Now, white gold and platinum will be considered "stainless steel". Watches with any yellow, or rose gold, will be considered "gold."

In [10]:
gold_words = ['18kt Rose Gold','Stainless Steel & Rose Gold',
             '18kt Yellow Gold','18kt Everose Gold','Gold Plated',
             'Stainless Steel & Plated Rose Gold','Stainless Steel & Plated Yellow Gold',
             'Stainless Steel & Gold','Titanium & Rose Gold',
             '18kt Honey Gold','18kt Rose & White Gold',
             'Ceramic and Gold']

silver_words = ['Stainless Steel',
    'Stainless Steel & Gold',
    'Stainless Steel & Plated Rose Gold',
    'Stainless Steel & Plated Yellow Gold',
    'Stainless Steel & Rose Gold',
    'Stainless Steel & Yellow Gold',
    'Stainless Steel Brushed & Polished',
    'Titanium',
    'Titanium & Platinum',
    'Titanium & Rose Gold',
    'Titanium & Steel',
    'Titanium & White Gold',
    '18kt White Gold',
    'PVD & Steel',
    'Platinum']

In [11]:
combo['is_gold']  = [1 if material in gold_words else 0 for material in combo['Case Material'] ]
combo['is_silver']  = [1 if material in silver_words else 0 for material in combo['Case Material'] ]

Strap or Braclet

In [12]:
combo['is_strap']  = [1 if material in strap_words else 0 for material in combo['Band Material'] ]

Lets do the same for shapes

In [13]:
# Indicator Variables for Shape
square_words = ['square','rectangle','tonneau']
combo['is_square'] = [1 if shape in square_words else 0 for shape in combo['Case Shape']]
combo['is_round'] = [1 if shape == 'Round' else 0 for shape in combo['Case Shape']]

Rotating or Fixed Bezel

In [14]:
fixed_bezel_words = ['None','Fixed']
rotating_bezel_words = ['Bi-Directional Rotating','Uni-Directional Rotating']

combo['is_rotating'] = [
    1 if bezel in rotating_bezel_words else 0 for bezel in combo['Bezel']]

Chronograph

In [15]:
combo['Complications'].fillna('None',inplace=True)

In [16]:
combo['is_chronograph'] = [
1 if 'Chronograph' in complication else 0 for complication in combo['Complications']]

Gender of Watch

In [17]:
combo['is_male'] = [1 if gender == 'mens' else 0 for gender in combo['gender']]

__Make Hashed File Names__

My work around for using "flow_from_directory" in Keras for multilable data requires that the pictures are in order. However, this is problematic because each batch will be highly correlated with itself (file names are alphabetical by brand, brands tend to have consistant designs.

Thus, I will randomize the file names while keeping track of which new names, corresponds to which watch.

In [18]:
chars = list(map(str,list(range(0,10000))))
new_names = list(combo['image_name'])
new_names = [np.random.choice(chars) +'_'+ n for n in new_names]
combo['new_image_name'] = new_names

In [20]:
combo.loc[combo['is_chronograph'] == 1]

Unnamed: 0,image_url,brand,image_name,model_name,model_num,price,url,Band Material,Band Name,Band Notes,Bezel,COSC Certified,Case Back,Case Dimensions,Case Material,Case Shape,Case Thickness,Clasp,Clasp Features,Clasp Notes,Color/Finish,Complications,Condition,Crystal,Dial Color,Edition,Lug Width,Movement,Screw-in Crown,Water Resistance,gender,is_gold,is_silver,is_strap,is_square,is_round,is_rotating,is_chronograph,is_male,new_image_name
48,https://www.prestigetime.com/images/watches/01...,Oris,OrisChronoris40mm0167275644154Set,Chronoris 40mm,01 672 7564 4154-Set,3390.0,https://www.prestigetime.com/item/Oris/Chronor...,Calfskin Leather,,,Fixed,,Transparent,40mm,Stainless Steel,Round,14.50mm,Push Button Deployant Buckle,,,Black,Chronograph,New/Unworn,Anti-Reflective Scratch Resistant Sapphire,Black,,20mm,Automatic,Yes,50m/150ft,mens,0,1,1,0,1,0,1,1,1034_OrisChronoris40mm0167275644154Set
49,https://www.prestigetime.com/images/watches/01...,Oris,OrisBC4Chronograph01674763347940752458BFC,BC4 Chronograph,01 674 7633 4794-07 5 24 58BFC,4100.0,https://www.prestigetime.com/item/Oris/BC4-Chr...,Calfskin Leather,,,Fixed,,Transparent,45mm,DLC Coated Stainless Steel,Cushion,15.60mm,Deployant Buckle,,,Black,Chronograph,New/Unworn,Anti-Reflective Scratch Resistant Sapphire,Black,,24mm,Automatic,Yes,100m/330ft,mens,0,0,1,0,0,0,1,1,4460_OrisBC4Chronograph01674763347940752458BFC
50,https://www.prestigetime.com/images/watches/01...,Oris,OrisAquisTitanChronograph46mm01674765572530742...,Aquis Titan Chronograph 46mm,01 674 7655 7253-07 4 26 34TEB,3750.0,https://www.prestigetime.com/item/Oris/Aquis-T...,Rubber,,,Uni-Directional Rotating,,Solid,46mm,Titanium,Round,18.20mm,Push Button Deployant Buckle,,with extension,Black,Chronograph,New/Unworn,Anti-Reflective Scratch Resistant Sapphire,Grey,,26mm,Automatic,Yes,500m,mens,0,1,1,0,1,1,1,1,183_OrisAquisTitanChronograph46mm0167476557253...
51,https://www.prestigetime.com/images/watches/01...,Oris,OrisAquisTitanChronograph46mm01674765572530782...,Aquis Titan Chronograph 46mm,01 674 7655 7253-07 8 26 75PEB,3900.0,https://www.prestigetime.com/item/Oris/Aquis-T...,Titanium,,,Uni-Directional Rotating,,Solid,46mm,Titanium,Round,18.20mm,Push Button Folding Clasp,,with extension,,Chronograph,New/Unworn,Anti-Reflective Scratch Resistant Sapphire,Grey,,26mm,Automatic,Yes,500m,mens,0,1,0,0,1,1,1,1,2723_OrisAquisTitanChronograph46mm016747655725...
52,https://www.prestigetime.com/images/watches/01...,Oris,OrisArtixGTChronograph44mm01674766144340742220FC,Artix GT Chronograph 44mm,01 674 7661 4434-07 4 22 20FC,3450.0,https://www.prestigetime.com/item/Oris/Artix-G...,Rubber,,,Uni-Directional Rotating,,Transparent,44mm,Stainless Steel,Round,14.90mm,Push Button Deployant Buckle,,,Black,Chronograph,New/Unworn,Anti-Reflective Scratch Resistant Sapphire,Black,,22mm,Automatic,Yes,100m,mens,0,1,1,0,1,1,1,1,370_OrisArtixGTChronograph44mm0167476614434074...
53,https://www.prestigetime.com/images/watches/01...,Oris,OrisArtixGTChronograph44mm01674766144340752282FC,Artix GT Chronograph 44mm,01 674 7661 4434-07 5 22 82FC,3450.0,https://www.prestigetime.com/item/Oris/Artix-G...,Calfskin Leather,,,Uni-Directional Rotating,,Transparent,44mm,Stainless Steel,Round,14.90mm,Push Button Deployant Buckle,,,Black,Chronograph,New/Unworn,Anti-Reflective Scratch Resistant Sapphire,Black,,22mm,Automatic,Yes,100m,mens,0,1,1,0,1,1,1,1,7462_OrisArtixGTChronograph44mm016747661443407...
54,https://www.prestigetime.com/images/watches/01...,Oris,OrisArtixGTChronograph44mm01674766144340782285,Artix GT Chronograph 44mm,01 674 7661 4434-07 8 22 85,3650.0,https://www.prestigetime.com/item/Oris/Artix-G...,Stainless Steel,,,Uni-Directional Rotating,,Transparent,44mm,Stainless Steel,Round,14.90mm,Push Button Folding Clasp,,,,Chronograph,New/Unworn,Anti-Reflective Scratch Resistant Sapphire,Black,,22mm,Automatic,Yes,100m,mens,0,1,0,0,1,1,1,1,7508_OrisArtixGTChronograph44mm016747661443407...
55,https://www.prestigetime.com/images/watches/01...,Oris,OrisWilliamsF1TeamChronographDate44mm016747725...,Williams F1 Team Chronograph Date 44mm,01 674 7725 8764-07 4 24 50BT,4500.0,https://www.prestigetime.com/Oris-Williams-F1-...,Rubber,,,Fixed,,Transparent,44mm,PVD Coated Titanium,Round,15mm,Push Button Deployant Buckle,,,Black,Chronograph,New/Unworn,Anti-Reflective Scratch Resistant Sapphire,Black,,24mm,Automatic,Yes,100m,mens,0,0,1,0,1,0,1,1,6011_OrisWilliamsF1TeamChronographDate44mm0167...
56,https://www.prestigetime.com/images/watches/01...,Oris,OrisBigCrownTimerChronograph46mm01675764842340...,Big Crown Timer Chronograph 46mm,01 675 7648 4234-07 5 23 77,3700.0,https://www.prestigetime.com/item/Oris/Big-Cro...,Calfskin Leather,,,Fixed,,Transparent,46mm,Stainless Steel,Round,15.50mm,Tang Buckle,,,Black,"Chronograph, Day/Date",New/Unworn,Scratch Resistant Sapphire,Black,,23mm,Automatic,Yes,30m/100ft,mens,0,1,1,0,1,0,1,1,6452_OrisBigCrownTimerChronograph46mm016757648...
57,https://www.prestigetime.com/images/watches/01...,Oris,OrisBigCrownX1Calculator46mm01675764842640752377,Big Crown X1 Calculator 46mm,01 675 7648 4264-07 5 23 77,3950.0,https://www.prestigetime.com/item/Oris/Big-Cro...,Calfskin Leather,,,Bi-Directional Rotating,,Transparent,46mm,Black PVD,Round,15.50mm,Tang Buckle,,,Brown,"Chronograph, Day/Date",New/Unworn,Anti-Reflective Scratch Resistant Sapphire,Black,,23mm,Automatic,Yes,30m/100ft,mens,0,0,1,0,1,1,1,1,9527_OrisBigCrownX1Calculator46mm0167576484264...


### Save Combined Data Frame

In [None]:
combo.to_csv('all_watch_info_with_indicators.csv',index = False)