In [1]:
import pandas as pd
import numpy as np
import json
from constants import BASE_DIR

In [2]:
import pickle

def compute_duration(current_df,start_df, freq):
    if freq == 'M':
        divisor = 12
    elif freq == 'W':
        divisor = 52

    current_df = current_df.values
    start_df = start_df.values

    
    duration = (current_df[0]-start_df[0])/divisor + (current_df[1]-start_df[1])
    return np.maximum(duration,0)*divisor

def onehot_encoder_match(keys,vals,df,store_df):
    match_dict = dict(zip(keys, vals))

    df[vals] = 0
    val = match_dict[store_df]
    df[val] = 1
    return df

def load_model(fname):
    file = open(fname, 'rb')
    data = pickle.load(file)
    file.close()

    return data

In [3]:
post_request = {
        "Store":1111,
        "DayOfWeek":4,
        "Date":"2014-07-10",
        "Customers":410,
        "Open":1,
        "Promo":0,
        "StateHoliday":"0",
        "SchoolHoliday":1
    }

In [4]:
df = pd.read_csv('../../data/processed_data.csv', nrows=1).drop(['Id','Sales'],axis=1)
df_request = pd.Series(index=df.columns)

In [5]:
df_request

Store                          NaN
DayOfWeek                      NaN
Customers                      NaN
Open                           NaN
Promo                          NaN
SchoolHoliday                  NaN
CompetitionDistance            NaN
Promo2                         NaN
No_Promo                       NaN
Jan,Apr,Jul,Oct                NaN
Feb,May,Aug,Nov                NaN
Mar,Jun,Sept,Dec               NaN
Year                           NaN
Month                          NaN
Week                           NaN
CompetitionOpenSinceDuration   NaN
Promo2SinceDuration            NaN
no_holiday                     NaN
public_holiday                 NaN
easter_holiday                 NaN
christmas                      NaN
basic                          NaN
extra                          NaN
extended                       NaN
store_type_a                   NaN
store_type_b                   NaN
store_type_c                   NaN
store_type_d                   NaN
dtype: float64

In [6]:
df_request['Store'] = post_request['Store']
df_request['DayOfWeek'] = post_request['DayOfWeek']
df_request['Customers'] = post_request['Customers']
df_request['Open'] = post_request['Open']
df_request['Promo'] = post_request['Promo']
df_request['SchoolHoliday'] = post_request['SchoolHoliday']
df_request['Date'] = post_request['Date']

In [7]:
df_request

Store                               1111.0
DayOfWeek                              4.0
Customers                            410.0
Open                                   1.0
Promo                                  0.0
SchoolHoliday                          1.0
CompetitionDistance                    NaN
Promo2                                 NaN
No_Promo                               NaN
Jan,Apr,Jul,Oct                        NaN
Feb,May,Aug,Nov                        NaN
Mar,Jun,Sept,Dec                       NaN
Year                                   NaN
Month                                  NaN
Week                                   NaN
CompetitionOpenSinceDuration           NaN
Promo2SinceDuration                    NaN
no_holiday                             NaN
public_holiday                         NaN
easter_holiday                         NaN
christmas                              NaN
basic                                  NaN
extra                                  NaN
extended   

In [8]:
hol_keys = ['0','a','b','c']
hol_vals = ['no_holiday','public_holiday','easter_holiday','christmas']

hold_dict = dict(zip(hol_keys,hol_vals))

if post_request["StateHoliday"] in hol_keys:
    state_holiday = hold_dict[post_request["StateHoliday"]]
    df_request[hol_vals] = 0
    df_request[state_holiday] = 1

else:
  raise ValueError("State Holiday is invalid !")


In [9]:
df_store_info = pd.read_csv("../../data/raw/store.csv").drop_duplicates()
store_vals =  df_store_info[df_store_info['Store'] == df_request['Store']].squeeze()
store_vals

Store                                   1111
StoreType                                  a
Assortment                                 a
CompetitionDistance                   1900.0
CompetitionOpenSinceMonth                6.0
CompetitionOpenSinceYear              2014.0
Promo2                                     1
Promo2SinceWeek                         31.0
Promo2SinceYear                       2013.0
PromoInterval                Jan,Apr,Jul,Oct
Name: 1110, dtype: object

In [10]:
df_request['CompetitionDistance'] = store_vals['CompetitionDistance']
df_request['Promo2'] = store_vals['Promo2']

In [11]:
promo_interval_list = ['No_Promo', 'Jan,Apr,Jul,Oct', 'Feb,May,Aug,Nov', 'Mar,Jun,Sept,Dec']
df_request[promo_interval_list] = 0
df_request[store_vals['PromoInterval']] = 1

In [12]:
df_request['Date'] = pd.Period(df_request['Date'])
df_request['Year'] = df_request['Date'].year
df_request['Month'] = df_request['Date'].month
df_request['Week'] = df_request['Date'].week
df_request = df_request.drop('Date')
df_request

Store                           1111.0
DayOfWeek                          4.0
Customers                        410.0
Open                               1.0
Promo                              0.0
SchoolHoliday                      1.0
CompetitionDistance             1900.0
Promo2                               1
No_Promo                             0
Jan,Apr,Jul,Oct                      1
Feb,May,Aug,Nov                      0
Mar,Jun,Sept,Dec                     0
Year                              2014
Month                                7
Week                                28
CompetitionOpenSinceDuration       NaN
Promo2SinceDuration                NaN
no_holiday                           1
public_holiday                       0
easter_holiday                       0
christmas                            0
basic                              NaN
extra                              NaN
extended                           NaN
store_type_a                       NaN
store_type_b             

In [13]:
date_str = ['Month','Year']
com_open_str = ['CompetitionOpenSinceMonth','CompetitionOpenSinceYear']
df_request['CompetitionOpenSinceDuration'] = compute_duration(df_request[date_str],store_vals[com_open_str], 'M')

In [14]:
week_str = ['Week','Year']
promo2_str = ['Promo2SinceWeek','Promo2SinceYear']
df_request['Promo2SinceDuration'] = compute_duration(df_request[week_str],store_vals[promo2_str], 'W')

In [15]:
assort_keys = ['a','b','c']
assort_vals = ['basic','extra','extended']

df_request = onehot_encoder_match(assort_keys,assort_vals,df_request,store_vals['Assortment'])

In [16]:
storetype_keys = ['a','b','c','d']
storetype_vals = ['store_type_a', 'store_type_b', 'store_type_c', 'store_type_d']

df_request = onehot_encoder_match(storetype_keys,storetype_vals,df_request,store_vals['StoreType'])

In [17]:
df_request

Store                           1111.0
DayOfWeek                          4.0
Customers                        410.0
Open                               1.0
Promo                              0.0
SchoolHoliday                      1.0
CompetitionDistance             1900.0
Promo2                               1
No_Promo                             0
Jan,Apr,Jul,Oct                      1
Feb,May,Aug,Nov                      0
Mar,Jun,Sept,Dec                     0
Year                              2014
Month                                7
Week                                28
CompetitionOpenSinceDuration       1.0
Promo2SinceDuration               49.0
no_holiday                           1
public_holiday                       0
easter_holiday                       0
christmas                            0
basic                                1
extra                                0
extended                             0
store_type_a                         1
store_type_b             

In [18]:
X_test_tr = df_request.copy()

In [19]:
minmax_str = ['Store','Year',"Week","Month", "DayOfWeek"]
scaler = load_model(BASE_DIR / 'models' / 'transform_minmax.pkl')
X_test_tr.loc[minmax_str] = scaler.transform(df_request[minmax_str].to_frame().T)[0]

In [20]:
X_test_tr.loc[minmax_str]

Store        0.996409
Year              0.5
Week         0.529412
Month        0.545455
DayOfWeek         0.5
dtype: object

In [21]:
std_str = ['Customers', 'CompetitionDistance', 'CompetitionOpenSinceDuration', 'Promo2SinceDuration']

scaler_std = load_model(BASE_DIR / 'models' / 'transform_std.pkl', )
X_test_tr.loc[std_str] = scaler_std.transform(df_request[std_str].to_frame().T)[0]

In [22]:
X_test_tr.loc[std_str]

Customers                      -0.483072
CompetitionDistance            -0.456466
CompetitionOpenSinceDuration   -0.619428
Promo2SinceDuration            -0.055918
dtype: object

In [23]:
tuned_model = load_model(BASE_DIR / 'models' / 'tuned_model.pkl')

In [24]:
tuned_model.predict(X_test_tr.to_frame().T)

array([3589.85854654])