## Import libraries

In [116]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import os


## Read Data

In [355]:
df_results = pd.read_csv('data/results.csv')
df_results

Unnamed: 0,match_code,home,away,actual_score,matchday,stage,stage_detail,match_time
0,M01,Germany,Scotland,1-1,1,Groups,01. Groups - Matchday 1,14/06/2024 20:00:00
1,M03,Spain,Croatia,2-0,1,Groups,01. Groups - Matchday 1,15/06/2024 17:00:00
2,M05,Serbia,England,3-0,1,Groups,01. Groups - Matchday 1,16/06/2024 20:00:00
3,M08,Austria,France,1-3,1,Groups,01. Groups - Matchday 1,17/06/2024 20:00:00
4,M12,Portugal,Czech Republic,4-1,1,Groups,01. Groups - Matchday 1,18/06/2024 20:00:00
5,M13,Scotland,Switzerland,0-0,2,Groups,02. Groups - Matchday 2,19/06/2024 20:00:00
6,M16,Spain,Italy,1-0,2,Groups,02. Groups - Matchday 2,20/06/2024 17:00:00
7,M17,Denmark,England,0-1,2,Groups,02. Groups - Matchday 2,20/06/2024 20:00:00
8,M20,Netherlands,France,2-1,2,Groups,02. Groups - Matchday 2,21/06/2024 20:00:00
9,M22,Belgium,Romania,1-2,2,Groups,02. Groups - Matchday 2,22/06/2024 20:00:00


In [None]:
md1_responses = pd.read_csv('data/md1_responses.csv')
md2_responses = pd.read_csv('data/md2_responses.csv')
# md3_responses = pd.read_csv('data/md3_responses.csv')

## Transform Data

- wide to long format using melt
- union the different matchday dataframes to form one
- create new points multiplier column based on user selected bonus match
- create new columns for home and away teams 
- inner join predictions and results dataframe together, on match_code 

In [356]:
# rename columns
md1_responses = md1_responses.rename(columns={'Timestamp': 'timestamp', 'Name': 'name', 'Which game would you like to give double points to?': 'bonus_match_code'})
# transform dataframe from wide to long
md1_responses = md1_responses.melt(id_vars=['timestamp', 'name', 'bonus_match_code'],
                                     var_name='match_code', value_name='predicted_score')
# preview
md1_responses.head()

Unnamed: 0,timestamp,name,bonus_match_code,match_code,predicted_score
0,27/05/2024 20:29:29,Corfe,M01. Germany v Scotland,M01. Germany v Scotland,1-0
1,27/05/2024 20:29:29,Ed,M03. Spain v Croatia,M01. Germany v Scotland,1-0
2,27/05/2024 20:29:29,Jay,M08. Austria v France,M01. Germany v Scotland,1-1
3,27/05/2024 20:29:29,Jonny,M08. Austria v France,M01. Germany v Scotland,1-0
4,27/05/2024 20:29:29,Larry,M08. Austria v France,M01. Germany v Scotland,1-0


In [357]:
md2_responses = md2_responses.rename(columns={'Timestamp': 'timestamp', 'Name': 'name', 'Which game would you like to give double points to?': 'bonus_match_code'})
# transform dataframe from wide to long
md2_responses = md2_responses.melt(id_vars=['timestamp', 'name', 'bonus_match_code'],
                                     var_name='match_code', value_name='predicted_score')
# preview
md2_responses.head()

Unnamed: 0,timestamp,name,bonus_match_code,match_code,predicted_score
0,27/05/2024 20:29:29,Corfe,M17. Denmark v England,M13. Scotland v Switzerland,1-0
1,27/05/2024 20:29:29,Ed,M22. Belgium v Romania,M13. Scotland v Switzerland,1-0
2,27/05/2024 20:29:29,Jay,M16. Spain v Italy,M13. Scotland v Switzerland,1-1
3,27/05/2024 20:29:29,Jonny,M17. Denmark v England,M13. Scotland v Switzerland,1-0
4,27/05/2024 20:29:29,Larry,M17. Denmark v England,M13. Scotland v Switzerland,1-0


In [None]:
# md3_responses = md3_responses.rename(columns={'Timestamp': 'timestamp', 'Name': 'name', 'Which game would you like to give double points to?': 'bonus_match_code'})
# # transform dataframe from wide to long
# md3_responses = md3_responses.melt(id_vars=['timestamp', 'name', 'bonus_match_code''],
#                                      var_name='match_code', value_name='predicted_score')
# # preview
# md3_responses.head()

In [358]:
# Concatenate the DataFrames (union)
df_predictions = pd.concat([md1_responses, md2_responses], axis=0)
# Reset the index for better readability
df_predictions.reset_index(drop=True, inplace=True)
# Display the concatenated DataFrame
df_predictions

Unnamed: 0,timestamp,name,bonus_match_code,match_code,predicted_score
0,27/05/2024 20:29:29,Corfe,M01. Germany v Scotland,M01. Germany v Scotland,1-0
1,27/05/2024 20:29:29,Ed,M03. Spain v Croatia,M01. Germany v Scotland,1-0
2,27/05/2024 20:29:29,Jay,M08. Austria v France,M01. Germany v Scotland,1-1
3,27/05/2024 20:29:29,Jonny,M08. Austria v France,M01. Germany v Scotland,1-0
4,27/05/2024 20:29:29,Larry,M08. Austria v France,M01. Germany v Scotland,1-0
...,...,...,...,...,...
95,27/05/2024 20:29:29,Luke,M20. Netherlands v France,M22. Belgium v Romania,0-0
96,27/05/2024 20:29:29,Marc,M17. Denmark v England,M22. Belgium v Romania,1-1
97,27/05/2024 20:29:29,Tom,M17. Denmark v England,M22. Belgium v Romania,1-1
98,27/05/2024 20:29:29,Rando Randal,M17. Denmark v England,M22. Belgium v Romania,1-1


In [359]:
# Create a new points_multiplier column
df_predictions['points_multiplier'] = df_predictions.apply(lambda row: 2 if row['bonus_match_code'] == row['match_code'] else 1, axis=1)

In [360]:
# Extract MatchCode, Home, and Away using regular expressions
df_predictions[['match_code', 'home', 'away']] = df_predictions['match_code'].str.extract(r'(M\d{2}). (.+) v (.+)')
df_predictions = df_predictions[['timestamp', 'name', 'match_code', 'home', 'away', 'predicted_score', 'points_multiplier']]
df_predictions


Unnamed: 0,timestamp,name,match_code,home,away,predicted_score,points_multiplier
0,27/05/2024 20:29:29,Corfe,M01,Germany,Scotland,1-0,2
1,27/05/2024 20:29:29,Ed,M01,Germany,Scotland,1-0,1
2,27/05/2024 20:29:29,Jay,M01,Germany,Scotland,1-1,1
3,27/05/2024 20:29:29,Jonny,M01,Germany,Scotland,1-0,1
4,27/05/2024 20:29:29,Larry,M01,Germany,Scotland,1-0,1
...,...,...,...,...,...,...,...
95,27/05/2024 20:29:29,Luke,M22,Belgium,Romania,0-0,1
96,27/05/2024 20:29:29,Marc,M22,Belgium,Romania,1-1,1
97,27/05/2024 20:29:29,Tom,M22,Belgium,Romania,1-1,1
98,27/05/2024 20:29:29,Rando Randal,M22,Belgium,Romania,1-1,1


In [362]:
# Merge DataFrames based on 'MatchCode', 'Home', and 'Away'
df_merged = pd.merge(df_predictions, df_results, on=['match_code', 'home', 'away'], how='inner')

In [363]:
df_merged.head()

Unnamed: 0,timestamp,name,match_code,home,away,predicted_score,points_multiplier,actual_score,matchday,stage,stage_detail,match_time
0,27/05/2024 20:29:29,Corfe,M01,Germany,Scotland,1-0,2,1-1,1,Groups,01. Groups - Matchday 1,14/06/2024 20:00:00
1,27/05/2024 20:29:29,Ed,M01,Germany,Scotland,1-0,1,1-1,1,Groups,01. Groups - Matchday 1,14/06/2024 20:00:00
2,27/05/2024 20:29:29,Jay,M01,Germany,Scotland,1-1,1,1-1,1,Groups,01. Groups - Matchday 1,14/06/2024 20:00:00
3,27/05/2024 20:29:29,Jonny,M01,Germany,Scotland,1-0,1,1-1,1,Groups,01. Groups - Matchday 1,14/06/2024 20:00:00
4,27/05/2024 20:29:29,Larry,M01,Germany,Scotland,1-0,1,1-1,1,Groups,01. Groups - Matchday 1,14/06/2024 20:00:00


## Functions to Calculate Points

- `get_predicted_outcome` function: creates new column `predicted_outcome` which states if the user predicted a Home, Away or Draw result
- `get_actual_outcome` function: creates new column `actual_outcome` which states if the actual result was Home, Away or Draw result
- `calculate_points` function: creates new column `base_points` where:
    + 1 point is awarded if `predicted_outcome` = `actual_outcome` 
    + 3 points is awarded if `predicted_score` = `actual_score`
- `total_points` is then created which is `base_points` * `points_multiplier` to award double points for user selected bonus matches
- `df_merged` contains one row per prediction with points

In [364]:
# Create a function to determine predicted outcome
def get_predicted_outcome(predicted_score):
    home_score, away_score = map(int, predicted_score.split('-'))
    if home_score > away_score:
        return 'Home'
    elif home_score < away_score:
        return 'Away'
    else:
        return 'Draw'

In [365]:
# Apply the function to create the 'predicted_outcome' column
df_merged['predicted_outcome'] = df_merged['predicted_score'].apply(get_predicted_outcome)

In [367]:
# Create a function to determine predicted outcome
def get_actual_outcome(actual_score):
    home_score, away_score = map(int, actual_score.split('-'))
    if home_score > away_score:
        return 'Home'
    elif home_score < away_score:
        return 'Away'
    else:
        return 'Draw'

In [368]:
# Apply the function to create the 'predicted_outcome' column
df_merged['actual_outcome'] = df_merged['actual_score'].apply(get_actual_outcome)

In [370]:
# Create a function to calculate points
def calculate_points(predicted_score, actual_score, predicted_outcome, actual_outcome):
    home_pred, away_pred = map(int, predicted_score.split('-'))
    home_res, away_res = map(int, predicted_score.split('-'))

    if predicted_score == actual_score:
        return 3
    elif predicted_outcome == actual_outcome:
        return 1
    else:
        return 0

In [372]:
# Apply the function to create the 'Points' column
df_merged['base_points'] = df_merged.apply(lambda row: calculate_points(row['predicted_score'], row['actual_score'], row['predicted_outcome'], row['actual_outcome']), axis=1)
df_merged['total_points'] = df_merged['base_points'] * df_merged['points_multiplier']

df_merged.to_csv('data/df_merged.csv')

In [373]:
df_merged

Unnamed: 0,timestamp,name,match_code,home,away,predicted_score,points_multiplier,actual_score,matchday,stage,stage_detail,match_time,predicted_outcome,actual_outcome,base_points,total_points
0,27/05/2024 20:29:29,Corfe,M01,Germany,Scotland,1-0,2,1-1,1,Groups,01. Groups - Matchday 1,14/06/2024 20:00:00,Home,Draw,0,0
1,27/05/2024 20:29:29,Ed,M01,Germany,Scotland,1-0,1,1-1,1,Groups,01. Groups - Matchday 1,14/06/2024 20:00:00,Home,Draw,0,0
2,27/05/2024 20:29:29,Jay,M01,Germany,Scotland,1-1,1,1-1,1,Groups,01. Groups - Matchday 1,14/06/2024 20:00:00,Draw,Draw,3,3
3,27/05/2024 20:29:29,Jonny,M01,Germany,Scotland,1-0,1,1-1,1,Groups,01. Groups - Matchday 1,14/06/2024 20:00:00,Home,Draw,0,0
4,27/05/2024 20:29:29,Larry,M01,Germany,Scotland,1-0,1,1-1,1,Groups,01. Groups - Matchday 1,14/06/2024 20:00:00,Home,Draw,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,27/05/2024 20:29:29,Luke,M22,Belgium,Romania,0-0,1,1-2,2,Groups,02. Groups - Matchday 2,22/06/2024 20:00:00,Draw,Away,0,0
96,27/05/2024 20:29:29,Marc,M22,Belgium,Romania,1-1,1,1-2,2,Groups,02. Groups - Matchday 2,22/06/2024 20:00:00,Draw,Away,0,0
97,27/05/2024 20:29:29,Tom,M22,Belgium,Romania,1-1,1,1-2,2,Groups,02. Groups - Matchday 2,22/06/2024 20:00:00,Draw,Away,0,0
98,27/05/2024 20:29:29,Rando Randal,M22,Belgium,Romania,1-1,1,1-2,2,Groups,02. Groups - Matchday 2,22/06/2024 20:00:00,Draw,Away,0,0


## Final Dataframes for Streamlit

** written to_csv and used in current Streamlit app

- ** `points_by_name`: base and total points per name - used in the bar chart
- `points_by_name_stage`: base and total points per name by each stage - not yet used, but could be used for the bar chart where the user can filter on certain matchdays or stages
- `mode_predicted_scores`: most popular score prediction by each user - not used
- `mode_correct_predicted_scores`: most popular score predictions that are correct by each user - not used
- ** `vs_mean_df`: by each matchday, takes the total points for a user each matchday and compares it vs the mean total points and takes the delta - used in a trended line chart
- ** `vs_mean_match_code_df`: by each match_code (more granular), takes the total points for a user each match_code and compares it vs the mean total points and takes the delta - used in a trended line chart within the summary table near the top
- ** `overall_standings_df`: overall points for each user, change in rank, trended line chart data - used as the summary table near the top

In [374]:
# Group the DataFrame by 'name' and sum the 'Points' for each group
points_by_name = df_merged.groupby('name').agg({'base_points': 'sum', 'total_points': 'sum'}).reset_index()
points_by_name = points_by_name.sort_values(by='total_points', ascending=False)

In [375]:
print(points_by_name)

            name  base_points  total_points
2            Jay            9            10
1             Ed            6             9
0          Corfe            6             7
9            Tom            6             7
3          Jonny            3             5
4          Larry            3             5
7  Peter Popular            5             5
6           Marc            2             3
8   Rando Randal            2             3
5           Luke            2             2


In [376]:
points_by_name_stage = df_merged.groupby(['name', 'matchday', 'stage_detail']).agg({'base_points': 'sum', 'total_points': 'sum'}).reset_index()
points_by_name_stage

Unnamed: 0,name,matchday,stage_detail,base_points,total_points
0,Corfe,1,01. Groups - Matchday 1,2,2
1,Corfe,2,02. Groups - Matchday 2,4,5
2,Ed,1,01. Groups - Matchday 1,4,7
3,Ed,2,02. Groups - Matchday 2,2,2
4,Jay,1,01. Groups - Matchday 1,5,5
5,Jay,2,02. Groups - Matchday 2,4,5
6,Jonny,1,01. Groups - Matchday 1,1,2
7,Jonny,2,02. Groups - Matchday 2,2,3
8,Larry,1,01. Groups - Matchday 1,2,3
9,Larry,2,02. Groups - Matchday 2,1,2


In [377]:
points_by_name_stage.to_csv('data/points_by_name_stage.csv')
points_by_name.to_csv('data/points_by_name.csv')

In [378]:
# Create the bar chart
fig = px.bar(points_by_name, x='name', y='total_points', color='name',
             labels={'name': 'Name', 'Points': 'Total Points'},
             title='Euro 2024 Total Points by Name',  # Updated title
             color_discrete_sequence=['#ffd700', '#C0C0C0', '#B87333', # gold, silver, bronze for 1st, 2nd, 3rd
                                      '#8850be', '#8850be', '#8850be', '#8850be', '#8850be', '#8850be', '#8850be', '#8850be', '#8850be', '#8850be'])  # purple for all others

# Customize the layout
fig.update_layout(
    xaxis_title='Player Names',
    yaxis_title='Total Points',
    font=dict(family='Arial', size=12),
    title_font=dict(family='Arial', size=16),
    paper_bgcolor='rgba(0,0,0,0)',
    plot_bgcolor='rgba(0,0,0,0)',
    showlegend=False,  # Hide the legend
    legend_title_text='Legend',
    legend_font=dict(family='Arial', size=10),
    legend_traceorder='reversed'
)

# Show the plot
fig.show()

In [379]:
# most predicted score
mode_predicted_scores = df_merged.groupby('name')['predicted_score'].agg(lambda x: x.mode().iloc[0]).reset_index()
mode_predicted_scores

Unnamed: 0,name,predicted_score
0,Corfe,1-0
1,Ed,0-4
2,Jay,0-3
3,Jonny,0-1
4,Larry,0-2
5,Luke,0-0
6,Marc,0-4
7,Peter Popular,2-0
8,Rando Randal,1-1
9,Tom,1-1


In [380]:
# most predicted, successful score
correct_predictions_df = df_merged[df_merged['predicted_outcome'] == df_merged['actual_outcome']]
mode_correct_predicted_scores = correct_predictions_df.groupby('name')['predicted_score'].agg(lambda x: x.mode().iloc[0]).reset_index()
mode_correct_predicted_scores

Unnamed: 0,name,predicted_score
0,Corfe,1-0
1,Ed,2-0
2,Jay,1-1
3,Jonny,0-1
4,Larry,0-4
5,Luke,0-4
6,Marc,0-4
7,Peter Popular,0-3
8,Rando Randal,0-4
9,Tom,1-1


In [381]:
# Create a new dataframe containing necessary columns
plot_df = df_merged[['timestamp', 'name', 'match_code', 'home', 'away', 'predicted_score', 'actual_score', 'total_points']].copy()

# Get unique match codes from the DataFrame
match_codes_unique = df_merged['match_code'].unique()

# Sort the match codes
match_code_order = sorted(match_codes_unique, key=lambda x: int(x[1:]))

# Convert match_code to categorical and set the order
plot_df['match_code'] = pd.Categorical(plot_df['match_code'], categories=match_code_order, ordered=True)

# Sort the dataframe based on match_code order
plot_df = plot_df.sort_values(by='match_code')

# Calculate the cumulative sum of total_points
plot_df['cumulative_total_points'] = plot_df.groupby('name')['total_points'].cumsum()

# Create traces for each name
traces = []
for name, group in plot_df.groupby('name'):
    trace = go.Scatter(
        x=group['match_code'], 
        y=group['cumulative_total_points'], 
        mode='lines+markers', 
        name=name,
        text=group.apply(lambda row: f"Player: {row['name']}<br>{row['home']} vs {row['away']}<br>Matchday Points: {row['total_points']}<br>Predicted Score: {row['predicted_score']}<br>Actual Score: {row['actual_score']}", axis=1),
        hovertemplate='<b>%{text}</b><br>Match: %{x}<br>Cumulative Points: %{y}<extra></extra>'
    )
    traces.append(trace)

# Create the layout
layout = go.Layout(
    title='Cumulative Total Points Over Time',
    xaxis=dict(title=' '),
    yaxis=dict(title='Cumulative Total Points'),
    legend=dict(orientation='h')
)

# Create the figure
fig = go.Figure(data=traces, layout=layout)

# Show the plot
fig.show()

In [383]:
# Calculate the cumulative total points for each player
points_by_name_stage['cumulative_total_points'] = points_by_name_stage.groupby('name')['total_points'].cumsum()

# Calculate the cumulative average total points for each matchday
cumulative_avg_total_points = points_by_name_stage.groupby('matchday')['cumulative_total_points'].mean().reset_index()
cumulative_avg_total_points.rename(columns={'cumulative_total_points': 'cumulative_avg_total_points'}, inplace=True)

# Merge the cumulative average total points back to the original DataFrame
vs_mean_df = points_by_name_stage.merge(cumulative_avg_total_points, on='matchday')

# Calculate the difference between each player's cumulative total points and the cumulative average
vs_mean_df['difference'] = vs_mean_df['cumulative_total_points'] - vs_mean_df['cumulative_avg_total_points']

# Display the resulting DataFrame
vs_mean_df[['name', 'matchday', 'cumulative_total_points', 'cumulative_avg_total_points', 'difference']]

# write to csv
vs_mean_df.to_csv('data/vs_mean_df.csv')



In [391]:
vs_mean_df

Unnamed: 0,name,matchday,stage_detail,base_points,total_points,cumulative_total_points,cumulative_avg_total_points,difference
0,Corfe,1,01. Groups - Matchday 1,2,2,2,2.8,-0.8
1,Corfe,2,02. Groups - Matchday 2,4,5,7,5.6,1.4
2,Ed,1,01. Groups - Matchday 1,4,7,7,2.8,4.2
3,Ed,2,02. Groups - Matchday 2,2,2,9,5.6,3.4
4,Jay,1,01. Groups - Matchday 1,5,5,5,2.8,2.2
5,Jay,2,02. Groups - Matchday 2,4,5,10,5.6,4.4
6,Jonny,1,01. Groups - Matchday 1,1,2,2,2.8,-0.8
7,Jonny,2,02. Groups - Matchday 2,2,3,5,5.6,-0.6
8,Larry,1,01. Groups - Matchday 1,2,3,3,2.8,0.2
9,Larry,2,02. Groups - Matchday 2,1,2,5,5.6,-0.6


In [392]:
# Plot the difference vs mean over time using Plotly
fig = px.line(vs_mean_df, x='matchday', y='difference', color='name', 
              labels={'matchday': 'Matchday', 'difference': 'Difference vs Mean'},
              title='Difference vs Mean Over Time')  # purple for all others)

# Customize the layout to match the theme of the earlier bar plot
fig.update_layout(
    font=dict(family='Arial', size=12),
    title_font=dict(family='Arial', size=16),
    paper_bgcolor='rgba(0,0,0,0)',
    plot_bgcolor='rgba(0,0,0,0)',
    xaxis_title='Matchday',
    yaxis_title='Difference vs Mean'
)

fig.show()

In [387]:
points_by_match_code = df_merged.groupby(['name', 'match_code', 'stage_detail']).agg({'base_points': 'sum', 'total_points': 'sum'}).reset_index()

# Calculate the cumulative total points for each player
points_by_match_code['cumulative_total_points'] = points_by_match_code.groupby('name')['total_points'].cumsum()

# Calculate the cumulative average total points for each matchday
cumulative_avg_total_points = points_by_match_code.groupby('match_code')['cumulative_total_points'].mean().reset_index()
cumulative_avg_total_points.rename(columns={'cumulative_total_points': 'cumulative_avg_total_points'}, inplace=True)

# Merge the cumulative average total points back to the original DataFrame
vs_mean_match_code_df = points_by_match_code.merge(cumulative_avg_total_points, on='match_code')

# Calculate the difference between each player's cumulative total points and the cumulative average
vs_mean_match_code_df['difference'] = vs_mean_match_code_df['cumulative_total_points'] - vs_mean_match_code_df['cumulative_avg_total_points']

# Display the resulting DataFrame
vs_mean_match_code_df[['name', 'match_code', 'cumulative_total_points', 'cumulative_avg_total_points', 'difference']]

# write to csv
vs_mean_match_code_df.to_csv('data/vs_mean_match_code_df.csv')

In [394]:
# Plot the difference vs mean over time using Plotly
fig = px.line(vs_mean_match_code_df, x='match_code', y='difference', color='name', 
              labels={'match_code': 'Matchday', 'difference': 'Difference vs Mean'},
              title='Difference vs Mean Over Time')  

# Customize the layout to match the theme of the earlier bar plot
fig.update_layout(
    font=dict(family='Arial', size=12),
    title_font=dict(family='Arial', size=16),
    paper_bgcolor='rgba(0,0,0,0)',
    plot_bgcolor='rgba(0,0,0,0)',
    xaxis_title='Match',
    yaxis_title='Difference vs Mean'
)

fig.show()

In [390]:
# Group by name to get overall standings
overall_standings = vs_mean_match_code_df.groupby('name').agg({
    'cumulative_total_points': 'last',
    'difference': lambda x: x.tolist(),  # Convert difference values to list
}).reset_index()

# Sort the overall standings by 'cumulative_total_points' in descending order to determine position
overall_standings = overall_standings.sort_values(by='cumulative_total_points', ascending=False).reset_index(drop=True)

# Calculate position
overall_standings['position'] = overall_standings.index + 1

# Ensure 'position' column is of integer type
overall_standings['position'] = overall_standings['position'].astype(int)

# Group by name to get cumulative total points and previous total points
points_by_name_stage['prev_total_points'] = points_by_name_stage.groupby('name')['total_points'].shift(1).fillna(0)
points_by_name_stage['cumulative_total_points'] = points_by_name_stage.groupby('name')['total_points'].cumsum()

# Calculate ranks for each matchday
points_by_name_stage['rank'] = points_by_name_stage.groupby('matchday')['cumulative_total_points'].rank(ascending=False, method='min')

# Shift rank to get previous matchday rank
points_by_name_stage['prev_rank'] = points_by_name_stage.groupby('name')['rank'].shift(1).fillna(0)

# Calculate rank change
points_by_name_stage['rank_change'] = points_by_name_stage['prev_rank'] - points_by_name_stage['rank']

# Get the latest matchday rank change for each player
latest_matchday = points_by_name_stage['matchday'].max()
latest_rank_change = points_by_name_stage[points_by_name_stage['matchday'] == latest_matchday][['name', 'rank_change']]

# Merge the rank change with overall standings
overall_standings = overall_standings.merge(latest_rank_change, on='name', how='left')

# Calculate the points change from the previous matchday for each player
latest_matchday_points = points_by_name_stage[points_by_name_stage['matchday'] == latest_matchday][['name', 'cumulative_total_points']]
previous_matchday = latest_matchday - 1
previous_matchday_points = points_by_name_stage[points_by_name_stage['matchday'] == previous_matchday][['name', 'cumulative_total_points']]
previous_matchday_points.columns = ['name', 'prev_cumulative_total_points']

points_change_df = latest_matchday_points.merge(previous_matchday_points, on='name', how='left')
points_change_df['points_change'] = points_change_df['cumulative_total_points'] - points_change_df['prev_cumulative_total_points'].fillna(0)

# Merge the points change with overall standings
overall_standings = overall_standings.merge(points_change_df[['name', 'points_change']], on='name', how='left')

# Define the symbols for rank change
def rank_change_symbol(change):
    if change >= 2:
        return '🔥'  # Player moved up in rank by 2 or more
    elif change == 1:
        return '👍'  # Player moved up in rank by 1
    elif change == -1:
        return '😵'  # Player moved down in rank by 1
    elif change <= -2:
        return '💀'  # Player moved down in rank by 2 or more
    else:
        return '🥱'  # No change in rank

# Apply rank change symbols and convert to string
overall_standings['rank_change_symbol'] = overall_standings['rank_change'].apply(rank_change_symbol)

# Reorder the columns
overall_standings = overall_standings[['name', 'position', 'rank_change', 'rank_change_symbol', 'cumulative_total_points', 'points_change', 'difference']]

overall_standings.to_csv('data/overall_standings.csv')