In [None]:
# import numpy as np
# import pandas as pd

# X = pd.read_csv('./task_1_sample_submission.csv')
# rows=10000
# X['Channel'] = np.random.choice(a=[0, 1, 2], size=rows, p=[0.25, 0.50, 0.25])

# X.to_csv('./task_1.csv', index=False)

In [None]:
import numpy as np
np.random.seed(42)
import pandas as pd
import hts

import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.simplefilter("ignore")

# settings
plt.style.use('seaborn')
plt.rcParams["figure.figsize"] = (16, 8)

import pickle

In [None]:
df = pd.read_excel('./HCP_Data_KDAG_Hackathon/HCP_Data_KDAG_Hackathon.xlsx', parse_dates=['Time_Period'])
df.head()

In [None]:
df.nunique()

In [None]:
df = df.drop(['Speaker_Programs_Attended', 'Vouchers_Dropped'], axis=1)
df = df[['Physician_ID', 'Time_Period', 'Physician_Segment', 'Specialty', 'Sales_Rep_Calls', 'Samples_Dropped', 'Emails_Delivered', 'Brand_Rx', 'Market_Rx']]
# df['Percent_Market_Share'] = df['Brand_Rx'] / df['Market_Rx'] * 100.0
df['Specialty'] = df['Specialty'].map({'Dermatologist':'D', 'General Physician':'GP', 'Nurse Practitioner':'NP'})
df['Physician_Segment'] = df['Physician_Segment'].map({'3-Low':'L', '2-Medium':'M', '1-High':'H'})
df.head()

In [None]:
out_dict = {'Sales_Rep_Calls':0, 'Samples_Dropped':1, 'Emails_Delivered':2}

In [None]:
# Number of weeks when Sales_Rep_Calls, Samples_Dropped, Emails_Delivered are all 0
df[['Sales_Rep_Calls', 'Samples_Dropped', 'Emails_Delivered']].apply(lambda x: (x!=0).sum(), axis=1).value_counts()

In [None]:
# create the bottom level id
df["seg_specialty_id"] = df.apply(lambda x: f"{x['Physician_Segment']}_{x['Specialty']}_{x['Physician_ID']}", axis=1)
# create the l1 level id
df["seg_specialty"] = df.apply(lambda x: f"{x['Physician_Segment']}_{x['Specialty']}", axis=1)

In [None]:
# create the bottom level df
df_bottom_level = df.pivot(index="Time_Period", columns="seg_specialty_id", values="Brand_Rx")
df_bottom_level.head()

In [None]:
# create the l1 level df
df_l1_level = df.groupby(["Time_Period", "Physician_Segment", "Specialty", "seg_specialty"]).sum() \
                    .reset_index(drop=False) \
                    .pivot(index="Time_Period", columns="seg_specialty", values="Brand_Rx")
df_l1_level.head()

In [None]:
# create the l2 level df
df_l2_level = df.groupby(["Time_Period", "Physician_Segment"]).sum() \
                    .reset_index(drop=False) \
                    .pivot(index="Time_Period", columns="Physician_Segment", values="Brand_Rx")
df_l2_level.head()

In [None]:
# create the total level df
df_total = df.groupby(["Time_Period"])["Brand_Rx"].sum() \
                    .to_frame().rename(columns={"Brand_Rx": "total"})
df_total.head()

In [None]:
# join the DataFrames
hierarchy_df = df_bottom_level.join(df_l1_level).join(df_l2_level).join(df_total)
hierarchy_df.index = pd.to_datetime(hierarchy_df.index)
hierarchy_df = hierarchy_df.resample('W-FRI').sum()

hierarchy_df.head(100)
# if(hierarchy_df1 == hierarchy_df).all().all():
#     print("True")

In [None]:
segments = df["Physician_Segment"].unique()
specialties = df["seg_specialty"].unique()
ids = df["seg_specialty_id"].unique()

total = {'total': list(segments)}
segment = {k: [v for v in specialties if v.startswith(k)] for k in segments}
id = {k: [v for v in ids if v.startswith(k)] for k in specialties}
hierarchy = {**total, **segment, **id}

hierarchy

In [None]:
# model_ols_prophet = hts.HTSRegressor(model='prophet', revision_method='OLS', n_jobs=0)
# model_ols_prophet = model_ols_prophet.fit(hierarchy_df, hierarchy)
# with open('model_ols_prophet.pickle', 'wb') as f:
#     pickle.dump(model_ols_prophet, f)
# pred_ols_prophet = model_ols_prophet.predict(steps_ahead=1)

In [None]:
model_ahp_arima = hts.HTSRegressor(model='auto_arima', revision_method='AHP', n_jobs=4)
model_ahp_arima = model_ahp_arima.fit(hierarchy_df, hierarchy)
with open('model_ahp_arima.pickle', 'wb') as f:
    pickle.dump(model_ahp_arima, f)
pred_ahp_arima = model_ahp_arima.predict(steps_ahead=1)

In [None]:
# Take last row of pred_ahp_prophet and convert it to a dataframe by making it into a column
pred_ahp_prophet = pred_ahp_prophet.iloc[-1].to_frame().T
