In [7]:
import altair as alt
import pandas as pd

res = pd.read_csv('data/results.csv', parse_dates=['game_date'])
res_with_comp = res[['game_date', 'season', 'competition']].copy()

goals = pd.read_csv('data/goals.csv', parse_dates=['game_date'])

players = pd.read_csv('data/players.csv')
player_names = players[['player_id', 'player_name']].copy()

In [12]:
df = goals.merge(player_names, on='player_id', how='left') \
    .merge(res_with_comp, on='game_date', how='left') \
    .query('own_goal != 1') \
    .groupby(['season', 'player_name', 'competition']) \
    .size() \
    .reset_index(name='goals')

df

Unnamed: 0,season,player_name,competition,goals
0,1921/22,Billy Rainford,Division Three (North),3
1,1921/22,Charles Milnes,Division Three (North),2
2,1921/22,Charlie Cunningham,Division Three (North),6
3,1921/22,David Fulton,Division Three (North),3
4,1921/22,Denis Bullough,Division Three (North),5
...,...,...,...,...
1823,2024/25,Harvey Saunders,Carabao Cup,1
1824,2024/25,Josh Davison,Bristol Street Motors Trophy,1
1825,2024/25,Josh Williams,Carabao Cup,1
1826,2024/25,Omari Patrick,Carabao Cup,1


In [17]:
# Function to create the chart
def create_stacked_bar_chart(df, season):
    # Filter the DataFrame for the given season
    season_df = df[df['season'] == season]
    
    # Sort players by total goals
    player_order = season_df.sort_values('goals', ascending=False)['player_name'].tolist()
    
    # Create the chart
    chart = alt.Chart(season_df).mark_bar().encode(
        y=alt.Y('player_name:N', sort=player_order, title='player_name'),
        x=alt.X('goals:Q', title='Goals'),
        color=alt.Color('competition:N', title='Competition'),
        order=alt.Order('competition:N', sort='ascending'),
        tooltip=['player_name', 'competition', 'goals']
    ).properties(
        title=f'Goal Distribution by Player and Competition ({season} Season)',
        width=600,
        height=400
    )
    
    return chart

chart = create_stacked_bar_chart(df, '2023/24')

chart

In [44]:
import pandas as pd
import altair as alt

def create_stacked_bar_chart(df, season, n_scorers=3):
    # Filter the DataFrame for the given season
    season_df = df[df['season'] == season].copy()
    
    # Calculate total goals for each player across all competitions
    player_totals = season_df.groupby('player_name')['goals'].sum().reset_index()
    player_totals = player_totals.rename(columns={'goals': 'total_goals'})
    
    # Sort players by total goals (descending order) and select top n_scorers
    player_order = player_totals.sort_values('total_goals', ascending=False)['player_name'].head(n_scorers).tolist()
    
    # Filter season_df to include only top n_scorers
    season_df = season_df[season_df['player_name'].isin(player_order)]
    
    # Merge the totals back to the main dataframe
    season_df = season_df.merge(player_totals, on='player_name')
    
    # Define the custom order for competitions
    competition_order = ['League Two', 'The Emirates FA Cup', 'Carabao Cup']
    
    # Create a mapping for competition order
    competition_order_map = {comp: i for i, comp in enumerate(competition_order)}
    
    # Add a new column for competition order
    season_df['competition_order'] = season_df['competition'].map(competition_order_map)
    
    # Calculate height based on number of players (50 pixels per player, minimum 200)
    chart_height = max(len(player_order) * 50, 200)
    
    # Create the base chart
    base = alt.Chart(season_df).encode(
        y=alt.Y('player_name:N', sort=player_order, title=None)
    )
    
    # Create the stacked bar chart with custom competition order
    bars = base.mark_bar().encode(
        x=alt.X('goals:Q', title=None),
        color=alt.Color('competition:N', 
                        scale=alt.Scale(domain=competition_order),
                        title='Competition'),
        order=alt.Order('competition_order:Q', sort='ascending'),
        tooltip=['player_name', 'competition', 'goals']
    )
    
    # Add text labels for total goals
    text = base.mark_text(align='left', dx=5).encode(
        x=alt.X('total_goals:Q'),
        text=alt.Text('total_goals:Q', format='.0f')
    )
    
    # Combine the chart elements
    chart = (bars + text).properties(
        title=f'Top {n_scorers} Goal Scorers by Competition ({season} Season)',
        width=600,
        height=chart_height
    ).configure_legend(
        orient='bottom'  # Move legend to the bottom
    ).resolve_scale(
        x='independent'
    )
    
    return chart

chart = create_stacked_bar_chart(df, '2023/24', n_scorers=5)
chart

In [46]:
seasons = ['2024/25', '2023/24']

d = df[df['season'].isin(seasons)].copy()

d.goals.max()

np.int64(12)