# Part 1: Identifying best sellers

In [1]:
import pandas as pd
import time
import datetime
import matplotlib.pyplot as plt
%matplotlib inline

In [72]:
df = pd.read_csv("Online Retail.csv", encoding = "ISO-8859-1")
df.head()

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,12/1/10 8:26,2.55,17850.0,United Kingdom
1,536365,71053,WHITE METAL LANTERN,6,12/1/10 8:26,3.39,17850.0,United Kingdom
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,12/1/10 8:26,2.75,17850.0,United Kingdom
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,12/1/10 8:26,3.39,17850.0,United Kingdom
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,12/1/10 8:26,3.39,17850.0,United Kingdom


In [74]:
# create a "data" column so we can identify best selling items for a given range
df["date"] = df.apply(lambda row: row.InvoiceDate.split(" ")[0], axis = 1) 
df["timestamp"] = df.apply(lambda x: time.mktime(datetime.datetime.strptime(x['date'], "%m/%d/%y").timetuple()), axis = 1) 
df.head()

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country,date,timestamp
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,12/1/10 8:26,2.55,17850.0,United Kingdom,12/1/10,1291180000.0
1,536365,71053,WHITE METAL LANTERN,6,12/1/10 8:26,3.39,17850.0,United Kingdom,12/1/10,1291180000.0
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,12/1/10 8:26,2.75,17850.0,United Kingdom,12/1/10,1291180000.0
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,12/1/10 8:26,3.39,17850.0,United Kingdom,12/1/10,1291180000.0
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,12/1/10 8:26,3.39,17850.0,United Kingdom,12/1/10,1291180000.0


In [75]:
# we want best selling items for thanksgiving week

start = "11/27/11"
end = "12/3/11"

select = (df['timestamp'] <= time.mktime(datetime.datetime.strptime(end, "%m/%d/%y").timetuple())) & (df['timestamp'] >= time.mktime(datetime.datetime.strptime(start, "%m/%d/%y").timetuple()))
df_small = df[select]

StockCode
23084     4588
22197     3195
23582     1851
20668     1661
22086     1611
          ... 
21110     -151
84859C    -153
84598     -247
16045     -700
85204    -1119
Name: Quantity, Length: 2341, dtype: int64

In [94]:
# return a list of best sellers by stock id over that thanksgiving
best_sellers = list(df_small.groupby("StockCode")['Quantity'].sum().sort_values(ascending=False)[:3].index)
best_sellers

['23084', '22197', '23582']

# part 2 - by country
given a stockcode id, return a dataframe of predictions by country

In [None]:
import fbprophet
from fbprophet import Prophet
from pandas import to_datetime

def make_predictions(df, stockcode_id):
    
    # segment by Stockcode id
    df_23084 = df[df['StockCode'] == stockcode_id]

    # fit model and make predictions
    period = {"ds":['11/27/11', '11/28/11', '11/29/11', '11/30/11', '12/1/11', '12/2/11', '12/3/11', '12/4/11', '12/5/11', '12/6/11', '12/7/11', '12/8/11', '12/9/11', '12/10/11']}
    data = {time: period['ds']}
    
    countries = set(list(df_23084['Country']))
    for country in countries:
        # print(country, df_23084[df_23084['Country'] == country].shape)

        # ignore countries that have few sales
        if df_23084[df_23084['Country'] == country].shape[0] < 10:
            pass
        else:
            # segment by country: 
            df_23084_uk = df_23084[df_23084['Country'] == country]

            # format data and fit model
            x = df_23084_uk[['date', 'Quantity']]
            x.columns = ['ds', 'y']
            model = Prophet()
            model.fit(x)

            # make predictions
            test = pd.DataFrame(period)
            predictions = model.predict(test)
            pred =  {country: predictions['yhat'].values}
            data[country] = pred[country]
    return pd.DataFrame(data)

In [100]:
make_predictions(df, stockcode_id='22197')

INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:n_changepoints greater than number of observations. Using 16.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:n_changepoints greater than number of observations. Using 7.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disablin

Unnamed: 0,<module 'time' (built-in)>,France,Spain,United Kingdom,EIRE,Germany
0,11/27/11,30.154601,13.867651,35.755949,11.29711,14.875822
1,11/28/11,6.848325,5.359058,49.494122,65.474168,7.993204
2,11/29/11,13.134136,21.409805,47.636298,9.843861,-11.135121
3,11/30/11,16.80507,-14.659657,50.424573,61.048749,9.518483
4,12/1/11,7.177688,9.532038,65.212645,16.591143,7.97036
5,12/2/11,9.455002,17.011043,91.790714,10.255611,-11.17766
6,12/3/11,-32.162959,-14.702124,100.680646,-73.53318,-11.191839
7,12/4/11,29.911531,13.768561,37.972639,10.713759,14.776564
8,12/5/11,6.605255,5.259969,51.710813,64.890817,7.893946
9,12/6/11,12.891066,21.310715,49.852988,9.260511,-11.234379
