## UK Clocks

In [1]:
import pandas as pd
from db_utils import Query, QueryAll, QueryAllFinished, UpdateItem

pd.options.mode.chained_assignment = None

data = QueryAll("clock_us")
cols = ["id", "title", "globalId", "categoryId", "categoryName", "url", "location", "shippingType", "shippingLocations",
                "shippingTime", "startTime", "endTime", "returnsAccepted", "conditionId", "listingIsTopRated", 
               "sellerFeedbackScore", "sellerPositivePercent", "sellerName", "sellerIsTopRated", "price", "currency", "bids"]

clocks = pd.DataFrame(data, columns=cols)


In [4]:
clocks.price.unique()

array(['US $12.00', 'US $0.99', 'US $15.50', 'US $9.90', 'US $14.99',
       'US $30.00', 'US $86.00', 'US $94.99', 'US $5.99', 'US $46.00',
       'US $38.00', 'US $12.50', 'US $18.50', 'US $399.00', 'US $5.00',
       'US $15.25', 'US $41.00', 'US $6.16', 'US $15.00', 'US $480.00',
       'US $40.00', -1.0, 'US $70.00', 'US $22.50', 'US $20.00',
       'US $123.50', 'US $9.00', 'US $525.00', 'US $32.01', 'US $81.00',
       'US $3.00', 'US $8.99', 'US $56.00', 'US $82.50', 'US $29.00',
       'US $9.99', 'US $11.50', 'US $19.50', 'US $49.99', 'US $3.69',
       'US $16.00', 'US $37.05', 'US $29.20', 'US $61.00', 'US $55.00',
       'AU $45.00', 'US $7.99', 'US $89.00', 'US $37.95', 'US $149.99',
       'US $149.95', 'US $39.00', 'US $15.99', 'AU $35.00', 'US $30.20',
       'C $80.00', -2.0, 'US $18.99', 'US $32.20', 'US $29.99',
       'US $7.30', 'US $65.55', 'US $21.99', 'US $20.70', 'US $79.99',
       'US $124.99', 'US $36.00', 'US $17.99', 'US $10.00', 'US $70.75',
       'US $

In [2]:
clocks = clocks.drop(['currency', 'returnsAccepted','shippingTime'], axis=1)
#since all the values in these three colums are negative one, delete all three columns

In [3]:
clocks = clocks.drop(clocks[clocks['bids'] == -1].index, axis=0)
clocks = clocks.drop(clocks[clocks['bids'] == -2].index, axis=0)

In [4]:
EUR_TO_GBP = 0.87
USD_TO_GBP = 0.84

def convert_currency(original):
    original = original.replace(',','')
    if original.startswith('£'):
        return float(original[1:])
    elif original.startswith('EUR'):
        return float(original[4:])*EUR_TO_GBP
    elif original.startswith('US'):
        return float(original[4:])*USD_TO_GBP
    
clocks['price'] = clocks['price'].apply(convert_currency)

In [5]:
zscore = (clocks.price - clocks.price.mean() ) / clocks.price.std()
clocks = clocks[zscore < 2.5]

In [6]:
WEEKENDS = ['2022-10-01', '2022-10-02', '2022-10-07', '2022-10-08', '2022-10-09', '2022-10-14', 
            '2022-10-15', '2022-10-16', '2022-10-21', '2022-10-22', '2022-10-23', '2022-10-28', 
            '2022-10-29', '2022-10-30', '2022-11-04', '2022-11-05', '2022-11-06', '2022-11-11', 
            '2022-11-12', '2022-11-13', '2022-11-18', '2022-11-19', '2022-11-20','2022-11-25',
            '2022-11-26','2022-11-27']

clocks['endAtWeekend'] = clocks["endTime"].str.slice(0, 10).isin(WEEKENDS).astype(int)

In [7]:
clocks['endAtEvening'] = clocks["endTime"].str.slice(11, 13).astype(int).gt(16).astype(int)

In [8]:
clocks['length'] = (pd.to_datetime(clocks['endTime'])-pd.to_datetime(clocks['startTime'])).dt.days

In [9]:
USED = [3000, 6000, 2500, 5000, 2750]
NEW = ['1000', '1500']
BROKEN = [7000]

clocks['isBroken'] = clocks['conditionId'].isin(BROKEN).astype(int)
clocks['isUsed'] = clocks['conditionId'].isin(USED).astype(int)

In [10]:
clocks['isLarge'] = clocks['title'].str.lower().str.contains('large').astype(int)

In [11]:
clocks['freeShipping'] = (clocks['shippingType'] == 'Free').astype(int)

In [12]:
clocks['isSold'] = (clocks['bids'] > 0).astype(int)

In [13]:
clocks = clocks.drop(['title', 'url', 'globalId', 'categoryId', 'categoryName', 'location', 'shippingType',
                     'shippingLocations', 'startTime', 'endTime', 'conditionId', 'sellerName', 'bids'], axis=1)

In [14]:
clocks = clocks.reset_index(drop=True)
clocks.to_csv('uk_clocks.csv')