# Import Libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from statsmodels.tsa.holtwinters import ExponentialSmoothing
from statsmodels.tsa.exponential_smoothing.ets import ETSModel

from sklearn.metrics import mean_squared_error

import os
import warnings

warnings.filterwarnings("ignore")
pd.set_option('display.float_format', '{:.2f}'.format)

In [2]:
plt.style.use('default')

plt.rcParams['figure.figsize'] = (16, 4)

custom_colors = ['#512d6d', '#e6a2b4', '#6b7d96', '#b3cde0']
plt.rcParams['axes.prop_cycle'] = plt.cycler(color=custom_colors)

# Download dataset

In [3]:
file_id = "1atOZX3YXmxx-_QebbTfndeC6U_DPTL2e" # ID of the file on Google Drive
file_name = 'Updated_data_2021&2022.csv'

%run download.ipynb

# Import dataset

In [None]:
# File path
current_dir = os.getcwd()
parent_dir = os.path.dirname(current_dir)
file_path = os.path.join(parent_dir, 'Data',file_name)

df = pd.read_csv(file_path)
df.head()

In [None]:
df['RACING_TURNOVER'] = df[['FOB_RACING_TURNOVER','PARI_RACING_TURNOVER']].sum(axis=1)
df['SPORT_TURNOVER'] = df[['FOB_SPORT_TURNOVER','PARI_SPORT_TURNOVER']].sum(axis=1)

cols = ['DATE_DIM', 'BET_ACCOUNT_NUM_HASH', 'TENURE_IN_DAYS', 'RACING_TURNOVER', 'SPORT_TURNOVER', 'TOTAL_TURNOVER']

In [None]:
df = df[cols]
df = df[df['DATE_DIM'] >= '2021-01-04']

In [None]:
df['DATE_DIM'] = pd.to_datetime(df['DATE_DIM'], format='%Y-%m-%d')

# Aggregate

In [None]:
df['DATE_DIM'] = pd.to_datetime(df['DATE_DIM'], format='%Y-%m-%d')

df_agg = df.set_index('DATE_DIM').resample('d')[['RACING_TURNOVER', 'SPORT_TURNOVER', 'FOB_TURNOVER', 'PARI_TURNOVER', 'TOTAL_TURNOVER', 'DIVIDENDS_PAID', 'GROSS_MARGIN', 'TICKETS']].sum()

df_agg_2021 =  df_agg[df_agg.index.year == 2021]
df_agg_2022 =  df_agg[df_agg.index.year == 2022]

df_agg_2021.head()

In [None]:
fig, ax = plt.subplots(1, figsize=(16,4))

ax.plot(df_agg_2021['RACING_TURNOVER'])
ax.set_title('2021 TURNOVER')

# RACING vs SPORT

In [None]:
X_racing = df_agg_2021['RACING_TURNOVER']
X_sport = df_agg_2021['SPORT_TURNOVER']

In [None]:

fig, ax = plt.subplots(2, figsize=(16,8))

ax[0].plot(df_agg_2021['RACING_TURNOVER'])
ax[0].set_title('2021 RACING_TURNOVER')

ax[1].plot(df_agg_2021['SPORT_TURNOVER'])
ax[1].set_title('2021 SPORT_TURNOVER')

plt.show()

In [None]:
X_racing = df_agg_2021['RACING_TURNOVER']
X_sport = df_agg_2021['SPORT_TURNOVER']

# Racing

In [None]:
offset=30
MSE_list = []

for index in range(30,336):
    X_train = X_racing[:index]
    X_test = X_racing[index:index+offset]
    
    exp_smth = ETSModel(X_train, seasonal_periods=7, trend = "add", seasonal = "mul")
    result = exp_smth.fit()

    start = X_train.index[-1] + pd.DateOffset(1)
    end = X_train.index[-1] + pd.DateOffset(offset)

    X_forecast = result.predict(start=start, end=end)
    
    diff = X_test - X_forecast
    MSE = np.linalg.norm(diff)/offset
    
    MSE_list.append(MSE)
    
MSE_list

In [None]:
plt.plot(MSE_list)

In [None]:
offset=30
index=120

X_train = X_racing[:index]
X_test = X_racing[index:index+offset]

exp_smth = ETSModel(X_train, seasonal_periods=7, trend = "add", seasonal = "mul")
result = exp_smth.fit()

start = X_train.index[-1] + pd.DateOffset(1)
end = X_train.index[-1] + pd.DateOffset(offset)

X_forecast = result.predict(start=start, end=end)

In [None]:
plt.figure(figsize=(16,4))

plt.plot(X_train)
plt.plot(X_forecast, c='orange')
plt.plot(X_test, c='green')
plt.ylabel("TURNOVER")
plt.title("Daily Turnover")
plt.legend(['train', 'predicted', 'real'], loc = 'upper left')

plt.show()

In [None]:
plt.figure(figsize=(16,4))

plt.plot(X_forecast, c='orange')
plt.plot(X_test, c='green', label='real')
plt.legend(['predicted', 'real'], loc = 'upper left')

plt.show()

# Individual

In [None]:
df_3636408132 = df.loc[df['BET_ACCOUNT_NUM_HASH'] == 3636408132, ['DATE_DIM','TOTAL_TURNOVER']]

offset=87+28*11

X_train = df_3636408132[:offset].set_index('DATE_DIM').resample('W').TOTAL_TURNOVER.sum().squeeze()
X_test = df_3636408132[offset:offset+28].set_index('DATE_DIM').resample('W').TOTAL_TURNOVER.sum().squeeze()

exp_smth = ETSModel(X_train, trend = "add", freq='W')

result = exp_smth.fit()

start = X_train.index[-1] + pd.DateOffset(7)
end = X_train.index[-1] + pd.DateOffset(28)
X_forecast = result.predict(start=start, end=end)

print(sum(X_test))
print(sum(X_forecast))


plt.figure(figsize=(16,4))

plt.plot(X_train)
plt.plot(X_forecast, c='orange')
plt.plot(X_test, c='green')
plt.ylabel("TURNOVER")
plt.title("Daily Turnover")
plt.legend(['train', 'predicted', 'real'], loc = 'upper left')

plt.show()

In [None]:
start_date = '2021-01-04'
end_date = '2021-12-22'

date_range = pd.date_range(start=start_date, end=end_date)
last_days = date_range.to_frame().resample('W').first().index

week_offset=8

last_day = last_days[week_offset]

train_df = df[df['DATE_DIM'] <= last_days[week_offset]]
test_df = df[(df['DATE_DIM'] > last_days[week_offset]) & (df['DATE_DIM'] <= last_days[week_offset+4])]

cus = train_df.groupby('BET_ACCOUNT_NUM_HASH',as_index=False).TENURE_IN_DAYS.max()
cus = cus.loc[cus['TENURE_IN_DAYS'] > 28, 'BET_ACCOUNT_NUM_HASH']

train_df = train_df[train_df['BET_ACCOUNT_NUM_HASH'].isin(cus)]
test_df = test_df[test_df['BET_ACCOUNT_NUM_HASH'].isin(cus)]

In [None]:
start = last_days[week_offset] + pd.DateOffset(7)
end = last_days[week_offset] + pd.DateOffset(28)

res = pd.DataFrame(columns = ['BET_ACCOUNT_NUM_HASH', 'real', 'pred'])
X_test = test_df.groupby('BET_ACCOUNT_NUM_HASH').TOTAL_TURNOVER.sum()

for i in cus:
    cus_df = train_df[train_df['BET_ACCOUNT_NUM_HASH'] == i]
    
    X_train = cus_df.set_index('DATE_DIM').resample('W').TOTAL_TURNOVER.sum()
    X_train = X_train.reindex(last_days[:week_offset+4]).fillna(0)
    
    exp_smth = ETSModel(X_train, trend = "add", freq='W')
    
    result = exp_smth.fit()

    X_forecast = result.predict(start=start, end=end)
    
    try:
        real = X_test[i]
    except:
        real = 0
    
    pred = X_forecast.sum()
    
    res = res.append({'BET_ACCOUNT_NUM_HASH' : i, 'real':real, 'pred' pred}, ignore_index=True)

In [None]:
res['fixed_pred'] = res['pred'].apply(lambda x: 0 if x < 0 else x)

In [None]:
res

In [None]:
plt.figure(figsize=(8,8))
plt.scatter(res['real'], res['fixed_pred'], alpha=0.5)
plt.ylim((-5000,100000))
plt.xlim((-5000,100000))

In [None]:
mean_squared_error(res['real']/28 , res['fixed_pred']/28)

In [None]:
res[res['pred'] >= 80000]