In [28]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go

def bet_eval_metrics(d):

    metrics = pd.DataFrame(d
    .agg({"gpl": "sum", "stake": "sum"})
    ).transpose().assign(pot=lambda x: x['gpl'] / x['stake'])

    return(metrics[metrics['stake'] != 0])

# Cumulative PL by market to visually see trend and consistency
def bet_eval_chart_cPl(d):

    d = (
        d
        .groupby('market_id')
        .agg({'gpl': 'sum'})
    )

    d['market_number'] = np.arange(len(d))
    d['cGpl'] = d.gpl.cumsum()

    chart = px.line(d, x="market_number", y="cGpl", title='Cumulative Gross Profit', template='simple_white')

    return(chart)

def output_eval(summary):
    eval_df = bet_eval_metrics(summary)
    print(f'Total Gross Profit/Loss % = {(eval_df["gpl"]/eval_df["stake"]*100).values[0]}')
    print(eval_df)

def all_summary():
    dec = '2023_12'
    jan = '2024_01'
    feb = '2024_02'

    dir = f'trade_result_all_tracks1' # change to trade_result_all_tracks

    df1 = pd.read_csv(f'{dir}/{dec}/{dec}_summary.csv',
                    dtype={'market_id': 'string', 'selection_id': 'string'},
                    )
    df1['month'] = dec

    df2 = pd.read_csv(f'{dir}/{jan}/{jan}_summary.csv',
                    dtype={'market_id': 'string', 'selection_id': 'string'},
                    )
    df2['month'] = jan

    df3 = pd.read_csv(f'{dir}/{feb}/{feb}_summary.csv',
                    dtype={'market_id': 'string', 'selection_id': 'string'},
                    )
    df3['month'] = feb

    summary = pd.concat([df1, df2, df3], axis=0)
    summary['gpl'] = summary['back_return'] + summary['lay_return']
    summary['stake'] = summary['back_v_sum'] + summary['lay_v_sum']
    
    # summary.to_csv(f'{dir}/all_months_summary.csv', index=False)

    eval_df = bet_eval_metrics(summary)
    print(f'Total Gross Profit/Loss % = {(eval_df["gpl"]/eval_df["stake"]*100).values[0]}')

    return summary

def all_raw_data():
    dec = '2023_12'
    jan = '2024_01'
    feb = '2024_02'

    dir = f'extracted_data' # change accordingly

    df1 = pd.read_csv(f'{dir}/{dec}/{dec}_preprocessed.csv',
                    dtype={'market_id': 'string', 'selection_id': 'string'},
                    )
    df1['month'] = dec

    df2 = pd.read_csv(f'{dir}/{jan}/{jan}_preprocessed.csv',
                    dtype={'market_id': 'string', 'selection_id': 'string'},
                    )
    df2['month'] = jan

    df3 = pd.read_csv(f'{dir}/{feb}/{feb}_preprocessed.csv',
                    dtype={'market_id': 'string', 'selection_id': 'string'},
                    )
    df3['month'] = feb

    raw = pd.concat([df1, df2, df3], axis=0)
    return raw
    # raw.to_csv(f'{dir}/all_months_raw_data.csv', index=False)

In [29]:
summary = all_summary()
raw = all_raw_data()

Total Gross Profit/Loss % = -2.1979417807072


In [30]:
output_eval(summary)
bet_eval_chart_cPl(summary)

Total Gross Profit/Loss % = -2.1979417807072
            gpl         stake       pot
0 -54014.204178  2.457490e+06 -0.021979


In [31]:
raw['vol'] = raw.groupby('market_id')['traded_volume'].diff().fillna(raw['traded_volume'])
venue_raw = raw.groupby('venue').agg({'vol': 'sum'}).reset_index()

venue_summary = summary.groupby('venue').agg({'gpl': 'sum'}).sort_values('gpl', ascending=True)

venue_summary = venue_summary.merge(venue_raw, on='venue', how='left')
venue_summary

Unnamed: 0,venue,gpl,vol
0,Port Lincoln,-11317.541815,106821.85
1,Murray Bridge,-6394.641772,93426.63
2,Strathalbyn,-5539.844956,111443.89
3,Ascot,-4928.014638,509395.84
4,Belmont,-4722.631006,87793.82
...,...,...,...
124,Scone,5413.739269,25856.51
125,Geraldton,5661.326802,84515.09
126,Kilcoy,6128.946480,12569.93
127,Seymour,6982.497578,95093.65


In [32]:
raw['vol'] = raw.groupby('market_id')['traded_volume'].diff().fillna(raw['traded_volume'])
venue_raw = raw.groupby('venue').agg({'vol': 'sum'}).reset_index()

venue_summary = summary.groupby('venue').agg({'gpl': 'sum'}).sort_values('gpl', ascending=True)

venue_summary = venue_summary.merge(venue_raw, on='venue', how='left')
venue_summary

Unnamed: 0,venue,gpl,vol
0,Port Lincoln,-11317.541815,106821.85
1,Murray Bridge,-6394.641772,93426.63
2,Strathalbyn,-5539.844956,111443.89
3,Ascot,-4928.014638,509395.84
4,Belmont,-4722.631006,87793.82
...,...,...,...
124,Scone,5413.739269,25856.51
125,Geraldton,5661.326802,84515.09
126,Kilcoy,6128.946480,12569.93
127,Seymour,6982.497578,95093.65


In [33]:
venue_summary

Unnamed: 0,venue,gpl,vol
0,Port Lincoln,-11317.541815,106821.85
1,Murray Bridge,-6394.641772,93426.63
2,Strathalbyn,-5539.844956,111443.89
3,Ascot,-4928.014638,509395.84
4,Belmont,-4722.631006,87793.82
...,...,...,...
124,Scone,5413.739269,25856.51
125,Geraldton,5661.326802,84515.09
126,Kilcoy,6128.946480,12569.93
127,Seymour,6982.497578,95093.65


In [34]:
from plotly.subplots import make_subplots

fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add traces
fig.add_trace(
    go.Bar(x=venue_summary['venue'], y=venue_summary['gpl'], name="GPL"),
    secondary_y=False,
)
fig.add_trace(
    go.Scatter(x=venue_summary['venue'], y=venue_summary['vol'], name="Volume"),
    secondary_y=True,
)

# Add figure title
fig.update_layout(
    title_text="Total GPL and Volume by Venue"
)

# Set x-axis title
fig.update_xaxes(title_text="Venue")

# Set y-axes titles
fig.update_yaxes(title_text="<b>GPL</b>", secondary_y=False)
fig.update_yaxes(title_text="<b>Volume</b>", secondary_y=True)

tickvals = venue_summary['venue'].tolist()
ticktext = venue_summary['venue'].tolist()

fig.update_xaxes(tickvals=tickvals, ticktext=ticktext)
# Increase the graph size
fig.update_layout(width=1300, height=600)

# Reduce the x-axis label font size
fig.update_xaxes(tickfont=dict(size=8))

fig.show()

In [35]:
n = 5 # top and bottom 5
signif_venues = pd.concat([venue_summary.head(n), venue_summary.tail(n)])['venue'].tolist()
signif_summary = summary[(summary['venue'].isin(signif_venues))]
signif_summary

Unnamed: 0,market_id,selection_id,selection_name,venue,win,back_trades,lay_trades,lay_v_sum,back_v_sum,back_liability,back_return,lay_liability,lay_return,month,gpl,stake
1009,1.221908162,49261314,1. Playhouse Patron,Ascot,0,"{'p': [8.4, 8.4, 8.6, 8.6, 8.6, 8.6, 8.6, 8.6,...","{'p': [8.8, 8.4, 8.6, 8.6, 9.2, 9.0, 9.0, 9.0,...",6.150000,14.250000,14.250000,-14.250000,0.0,6.150000,2023_12,-8.100000,20.400000
1010,1.221908162,63085656,2. Tonis Spirit,Ascot,0,"{'p': [24.0, 24.0, 25.0, 25.0, 25.0, 25.0, 26....","{'p': [28.0, 29.0, 28.0, 28.0, 28.0, 28.0, 28....",3.600000,44.300000,44.300000,-44.300000,0.0,3.600000,2023_12,-40.700000,47.900000
1011,1.221908162,61792136,3. Diamond Scene,Ascot,1,"{'p': [2.78, 2.74, 2.86, 2.86, 2.74, 2.86, 2.8...","{'p': [], 'v': []}",0.000000,15.550000,15.550000,24.703000,0.0,-0.000000,2023_12,24.703000,15.550000
1012,1.221908162,28720435,4. Bandalera Miss,Ascot,0,"{'p': [160.0, 100.0, 100.0, 100.0, 100.0, 100....","{'p': [110.0, 110.0, 110.0, 110.0, 110.0, 110....",2.550000,20.025000,20.025000,-20.025000,0.0,2.550000,2023_12,-17.475000,22.575000
1013,1.221908162,19945970,5. Friar Away,Ascot,0,"{'p': [170.0, 210.0, 210.0, 210.0, 210.0, 210....","{'p': [200.0, 200.0, 200.0, 200.0, 200.0, 200....",31.050000,36.650000,36.650000,-36.650000,0.0,31.050000,2023_12,-5.600000,67.700000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12275,1.225317315,66365203,6. Awesome Tycoon,Scone,0,"{'p': [18.5, 19.0, 20.0, 20.0, 21.0, 20.0, 20....","{'p': [30.0, 30.0, 30.0, 30.0, 30.0, 30.0, 30....",23.500000,32.125000,32.125000,-32.125000,0.0,23.500000,2024_02,-8.625000,55.625000
12276,1.225317315,65052155,12. Sabana,Scone,0,"{'p': [16.0, 18.0, 19.5, 19.5, 19.5, 19.5, 19....","{'p': [21.0, 21.0, 21.0, 21.0, 21.0, 21.0, 21....",3.333333,44.333333,44.333333,-44.333333,0.0,3.333333,2024_02,-41.000000,47.666667
12277,1.225317315,66365208,14. Himorher,Scone,0,"{'p': [13.5, 13.5, 13.0, 13.0, 12.5, 13.0, 13....","{'p': [18.5, 18.5, 18.5, 18.5, 18.5, 18.5, 18....",10.500000,56.333333,56.333333,-56.333333,0.0,10.500000,2024_02,-45.833333,66.833333
12278,1.225317315,66365209,15. Akauwheo,Scone,0,"{'p': [160.0, 130.0, 170.0, 140.0, 130.0, 130....","{'p': [170.0, 170.0, 170.0, 170.0, 170.0, 170....",15.000000,31.166667,31.166667,-31.166667,0.0,15.000000,2024_02,-16.166667,46.166667


In [36]:
signif_summary_month = signif_summary.groupby(['venue', 'month']).agg({'gpl': 'sum'}).reset_index()
signif_summary_month

Unnamed: 0,venue,month,gpl
0,Ascot,2023_12,-4347.052349
1,Ascot,2024_01,1611.577859
2,Ascot,2024_02,-2192.540148
3,Belmont,2023_12,-1796.672088
4,Belmont,2024_01,-1372.889556
5,Belmont,2024_02,-1553.069362
6,Geraldton,2023_12,2468.971351
7,Geraldton,2024_01,1032.951557
8,Geraldton,2024_02,2159.403894
9,Kilcoy,2023_12,5728.191643


In [37]:
def ensure_all_months_present(df):
    # Define the months you're interested in
    months = ['2023_12', '2024_01', '2024_02']
    
    # Group the data by venue
    grouped = df.groupby('venue')
    
    # Loop through each venue
    for venue, group in grouped:
        # Check if the venue has data for all three months
        if not all(month in group['month'].values for month in months):
            # If any month is missing, add a new row with the venue name, the missing month, and a GPL value of 0
            for month in months:
                if month not in group['month'].values:
                    new_row = {'venue': venue, 'month': month, 'gpl': 0}
                    df.loc[len(df)] = new_row
        
    return df

signif_summary_month = ensure_all_months_present(signif_summary_month)
signif_summary_month

Unnamed: 0,venue,month,gpl
0,Ascot,2023_12,-4347.052349
1,Ascot,2024_01,1611.577859
2,Ascot,2024_02,-2192.540148
3,Belmont,2023_12,-1796.672088
4,Belmont,2024_01,-1372.889556
5,Belmont,2024_02,-1553.069362
6,Geraldton,2023_12,2468.971351
7,Geraldton,2024_01,1032.951557
8,Geraldton,2024_02,2159.403894
9,Kilcoy,2023_12,5728.191643


In [38]:
# Create a bar chart with grouped bars for each venue and month
fig = go.Figure()

colors = ['blue', 'green', 'red']

# Loop through each venue
for venue in signif_summary_month['venue'].unique():
    venue_data = signif_summary_month[signif_summary_month['venue'] == venue]
    # Add a bar for each month of the venue
    for month, gpl in zip(venue_data['month'], venue_data['gpl']):
        # Determine the color based on the month index
        month_index = signif_summary_month['month'].unique().tolist().index(month)
        fig.add_trace(
            go.Bar(
                x=[venue],
                y=[gpl],
                name=month,
                legendgroup=venue,
                showlegend=True if venue == signif_summary_month['venue'].unique()[0] else False, # Show legend only for the first venue
                # legendgrouptitle_text=venue,
                marker_color=colors[month_index] # Assign color based on month index
            )
        )

# Update layout for better visualization
fig.update_layout(barmode='group', title_text="GPL by Venue and Month")

# Display the plot
fig.show()

In [40]:
signif_venues_vol = pd.concat([venue_summary.head(n), venue_summary.tail(n)])

fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add traces
fig.add_trace(
    go.Bar(x=signif_venues_vol['venue'], y=signif_venues_vol['gpl'], name="GPL"),
    secondary_y=False,
)
fig.add_trace(
    go.Scatter(x=signif_venues_vol['venue'], y=signif_venues_vol['vol'], name="Volume"),
    secondary_y=True,
)

# Add figure title
fig.update_layout(
    title_text="Total GPL and Volume by Venue"
)

# Set x-axis title
fig.update_xaxes(title_text="Venue")

# Set y-axes titles
fig.update_yaxes(title_text="<b>GPL</b>", secondary_y=False)
fig.update_yaxes(title_text="<b>Volume</b>", secondary_y=True)

tickvals = signif_venues_vol['venue'].tolist()
ticktext = signif_venues_vol['venue'].tolist()

fig.update_xaxes(tickvals=tickvals, ticktext=ticktext)
# Increase the graph size
fig.update_layout(width=1300, height=600)

# Reduce the x-axis label font size
fig.update_xaxes(tickfont=dict(size=8))

fig.show()