In [13]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go

def bet_eval_metrics(d):

    metrics = pd.DataFrame(d
    .agg({"gpl": "sum", "stake": "sum"})
    ).transpose().assign(pot=lambda x: x['gpl'] / x['stake'])

    return(metrics[metrics['stake'] != 0])

# Cumulative PL by market to visually see trend and consistency
def bet_eval_chart_cPl(d):

    d = (
        d
        .groupby('market_id')
        .agg({'gpl': 'sum'})
    )

    d['market_number'] = np.arange(len(d))
    d['cGpl'] = d.gpl.cumsum()

    chart = px.line(d, x="market_number", y="cGpl", title='Cumulative Gross Profit', template='simple_white')

    return(chart)

def output_eval(summary):
    eval_df = bet_eval_metrics(summary)
    print(f'Total Gross Profit/Loss % = {(eval_df["gpl"]/eval_df["stake"]*100).values[0]}')
    print(eval_df)

def all_summary():
    dec = '2023_12'
    jan = '2024_01'
    feb = '2024_02'

    dir = f'trade_result_2' # change to trade_result_all_tracks

    df1 = pd.read_csv(f'{dir}/{dec}/{dec}_summary.csv',
                    dtype={'market_id': 'string', 'selection_id': 'string'},
                    )
    df1['month'] = dec

    df2 = pd.read_csv(f'{dir}/{jan}/{jan}_summary.csv',
                    dtype={'market_id': 'string', 'selection_id': 'string'},
                    )
    df2['month'] = jan

    df3 = pd.read_csv(f'{dir}/{feb}/{feb}_summary.csv',
                    dtype={'market_id': 'string', 'selection_id': 'string'},
                    )
    df3['month'] = feb

    summary = pd.concat([df1, df2, df3], axis=0)
    summary['gpl'] = summary['back_return'] + summary['lay_return']
    summary['stake'] = summary['back_v_sum'] + summary['lay_v_sum']
    
    # summary.to_csv(f'{dir}/all_months_summary.csv', index=False)

    eval_df = bet_eval_metrics(summary)
    print(f'Total Gross Profit/Loss % = {(eval_df["gpl"]/eval_df["stake"]*100).values[0]}')

    return summary

def all_raw_data():
    dec = '2023_12'
    jan = '2024_01'
    feb = '2024_02'

    dir = f'extracted_data' # change accordingly

    df1 = pd.read_csv(f'{dir}/{dec}/{dec}_preprocessed.csv',
                    dtype={'market_id': 'string', 'selection_id': 'string'},
                    )
    df1['month'] = dec

    df2 = pd.read_csv(f'{dir}/{jan}/{jan}_preprocessed.csv',
                    dtype={'market_id': 'string', 'selection_id': 'string'},
                    )
    df2['month'] = jan

    df3 = pd.read_csv(f'{dir}/{feb}/{feb}_preprocessed.csv',
                    dtype={'market_id': 'string', 'selection_id': 'string'},
                    )
    df3['month'] = feb

    raw = pd.concat([df1, df2, df3], axis=0)
    return raw
    # raw.to_csv(f'{dir}/all_months_raw_data.csv', index=False)

In [4]:
summary = all_summary()
raw = all_raw_data()

Total Gross Profit/Loss % = 2.241958489669226


In [5]:
output_eval(summary)
bet_eval_chart_cPl(summary)

Total Gross Profit/Loss % = 2.241958489669226
           gpl          stake      pot
0  4118.936774  183720.474428  0.02242


In [24]:
raw['vol'] = raw.groupby('market_id')['traded_volume'].diff().fillna(raw['traded_volume'])
venue_raw = raw.groupby('venue').agg({'vol': 'sum'}).reset_index()

venue_summary = summary.groupby('venue').agg({'gpl': 'sum'}).sort_values('gpl', ascending=True)

venue_summary = venue_summary.merge(venue_raw, on='venue', how='left')
venue_summary

Unnamed: 0,venue,gpl,vol
0,Sandown,-1816.944193,827559.07
1,Caulfield,362.384768,1631728.15
2,Flemington,650.60899,946681.99
3,Moonee Valley,1500.945817,860479.49
4,Bendigo,3421.941391,89374.49


In [25]:
venue_summary

Unnamed: 0,venue,gpl,vol
0,Sandown,-1816.944193,827559.07
1,Caulfield,362.384768,1631728.15
2,Flemington,650.60899,946681.99
3,Moonee Valley,1500.945817,860479.49
4,Bendigo,3421.941391,89374.49


In [28]:
from plotly.subplots import make_subplots

fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add traces
fig.add_trace(
    go.Bar(x=venue_summary['venue'], y=venue_summary['gpl'], name="GPL"),
    secondary_y=False,
)
fig.add_trace(
    go.Scatter(x=venue_summary['venue'], y=venue_summary['vol'], name="Volume"),
    secondary_y=True,
)

# Add figure title
fig.update_layout(
    title_text="Total GPL and Volume by Venue"
)

# Set x-axis title
fig.update_xaxes(title_text="Venue")

# Set y-axes titles
fig.update_yaxes(title_text="<b>GPL</b>", secondary_y=False)
fig.update_yaxes(title_text="<b>Volume</b>", secondary_y=True)

tickvals = venue_summary['venue'].tolist()
ticktext = venue_summary['venue'].tolist()

fig.update_xaxes(tickvals=tickvals, ticktext=ticktext)
# Increase the graph size
fig.update_layout(width=1300, height=600)

# Reduce the x-axis label font size
fig.update_xaxes(tickfont=dict(size=8))

fig.show()

In [37]:
n = 5 # top and bottom 5
signif_venues = pd.concat([venue_summary.head(n), venue_summary.tail(n)])['venue'].tolist()
signif_summary = summary[(summary['venue'].isin(signif_venues))]
signif_summary

Unnamed: 0,market_id,selection_id,selection_name,venue,win,back_trades,lay_trades,lay_v_sum,back_v_sum,back_liability,back_return,lay_liability,lay_return,month,gpl,stake
0,1.221906108,54632509,3. Field Of Mars,Moonee Valley,0,"{'p': [40.0, 40.0, 40.0, 40.0, 40.0, 40.0, 42....","{'p': [42.0, 42.0, 42.0, 42.0, 42.0, 42.0, 42....",80.312500,72.187500,72.187500,-72.187500,0.000000,80.312500,2023_12,8.125000,152.500000
1,1.221906108,1436299,4. New Hampshire,Moonee Valley,1,"{'p': [23.0, 22.0, 22.0, 22.0, 22.0, 22.0, 23....","{'p': [21.0, 21.0, 21.0, 21.0, 21.0, 21.0, 21....",20.625000,27.500000,27.500000,583.437500,381.015625,-381.015625,2023_12,202.421875,48.125000
2,1.221906108,63363608,6. Warmosa,Moonee Valley,0,"{'p': [16.0, 16.0, 16.5, 16.5, 16.5, 16.5, 16....","{'p': [19.0, 19.0, 19.0, 19.0, 19.0, 19.0, 19....",13.437500,76.968750,76.968750,-76.968750,0.000000,13.437500,2023_12,-63.531250,90.406250
3,1.221906108,63363609,7. Steel King,Moonee Valley,0,"{'p': [7.2, 7.4, 5.6, 5.6, 5.6, 5.6, 5.6, 5.6,...","{'p': [], 'v': []}",0.000000,10.968750,10.968750,-10.968750,0.000000,0.000000,2023_12,-10.968750,10.968750
4,1.221906108,63363610,8. Vienna Lights,Moonee Valley,0,"{'p': [20.0, 20.0, 21.0, 22.0, 22.0, 22.0, 22....","{'p': [22.0, 22.0, 22.0, 22.0, 22.0, 22.0, 22....",33.125000,40.937500,40.937500,-40.937500,0.000000,33.125000,2023_12,-7.812500,74.062500
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1062,1.225361859,66487428,11. Hooah Havana,Sandown,0,"{'p': [12.0, 12.5, 12.0, 12.5, 12.5, 14.5, 13....","{'p': [], 'v': []}",0.000000,3.035714,3.035714,-3.035714,0.000000,0.000000,2024_02,-3.035714,3.035714
1063,1.225361859,45023136,12. Gwan So,Sandown,0,"{'p': [18.5, 18.5, 19.5, 19.5, 19.5, 19.5, 19....","{'p': [24.0, 24.0, 24.0, 24.0, 24.0, 24.0, 24....",1.785714,90.000000,90.000000,-90.000000,0.000000,1.785714,2024_02,-88.214286,91.785714
1064,1.225361859,66487431,13. Kodiak,Sandown,0,"{'p': [11.5, 11.5, 11.5, 12.5, 12.5, 12.5, 12....","{'p': [14.5, 14.5, 14.5, 14.5, 14.5, 14.5, 14....",2.321429,96.142857,96.142857,-96.142857,0.000000,2.321429,2024_02,-93.821429,98.464286
1065,1.225361859,60816398,14. Eye Of The Eagle,Sandown,0,"{'p': [5.7, 5.7, 5.6, 5.9, 5.9, 5.7, 5.6, 5.5,...","{'p': [], 'v': []}",0.000000,7.607143,7.607143,-7.607143,0.000000,0.000000,2024_02,-7.607143,7.607143


In [49]:
signif_summary_month = summary.groupby(['venue', 'month']).agg({'gpl': 'sum'}).reset_index()
signif_summary_month

Unnamed: 0,venue,month,gpl
0,Bendigo,2023_12,-1122.351179
1,Bendigo,2024_02,4544.292569
2,Caulfield,2023_12,-1854.004464
3,Caulfield,2024_01,-181.78319
4,Caulfield,2024_02,2398.172423
5,Flemington,2023_12,-1294.60873
6,Flemington,2024_01,2350.863514
7,Flemington,2024_02,-405.645794
8,Moonee Valley,2023_12,-2475.00891
9,Moonee Valley,2024_01,926.952821


In [57]:
def ensure_all_months_present(df):
    # Define the months you're interested in
    months = ['2023_12', '2024_01', '2024_02']
    
    # Group the data by venue
    grouped = df.groupby('venue')
    
    # Loop through each venue
    for venue, group in grouped:
        # Check if the venue has data for all three months
        if not all(month in group['month'].values for month in months):
            # If any month is missing, add a new row with the venue name, the missing month, and a GPL value of 0
            for month in months:
                if month not in group['month'].values:
                    new_row = {'venue': venue, 'month': month, 'gpl': 0}
                    df.loc[len(df)] = new_row
        
    return df

signif_summary_month = ensure_all_months_present(signif_summary_month)
signif_summary_month

Unnamed: 0,venue,month,gpl
0,Bendigo,2023_12,-1122.351179
1,Bendigo,2024_02,4544.292569
2,Caulfield,2023_12,-1854.004464
3,Caulfield,2024_01,-181.78319
4,Caulfield,2024_02,2398.172423
5,Flemington,2023_12,-1294.60873
6,Flemington,2024_01,2350.863514
7,Flemington,2024_02,-405.645794
8,Moonee Valley,2023_12,-2475.00891
9,Moonee Valley,2024_01,926.952821


In [58]:
# Create a bar chart with grouped bars for each venue and month
fig = go.Figure()

colors = ['blue', 'green', 'red']

# Loop through each venue
for venue in signif_summary_month['venue'].unique():
    venue_data = signif_summary_month[signif_summary_month['venue'] == venue]
    # Add a bar for each month of the venue
    for month, gpl in zip(venue_data['month'], venue_data['gpl']):
        # Determine the color based on the month index
        month_index = signif_summary_month['month'].unique().tolist().index(month)
        fig.add_trace(
            go.Bar(
                x=[venue],
                y=[gpl],
                name=month,
                legendgroup=venue,
                showlegend=True if venue == signif_summary_month['venue'].unique()[0] else False, # Show legend only for the first venue
                legendgrouptitle_text=venue,
                marker_color=colors[month_index] # Assign color based on month index
            )
        )

# Update layout for better visualization
fig.update_layout(barmode='group', title_text="GPL by Venue and Month")

# Display the plot
fig.show()