In [88]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import matplotlib.patheffects as path_effects
import matplotlib.font_manager as fm
import matplotlib.colors as mcolors
from matplotlib import cm
from highlight_text import fig_text, ax_text
from matplotlib.colors import LinearSegmentedColormap, Normalize
import matplotlib.gridspec as gridspec

from PIL import Image
import urllib
import json
import os

In [97]:
#read data from fbref
def get_data (url, table_name):
    df = pd.read_html(url)[table_name]
    
    #team name last split of url
    team_name = url.split('/')[-1]
    
    df['Team'] = team_name
    
    #remove 'stats and the - before stats' from Team
    df['Team'] = df['Team'].str.replace('-Stats', '')
    
    df = df[['Round', 'Team', 'GF', 'xG', 'GA', 'xGA']]
    
    #keep rows with Round named Matchweek 1 to 38
    df = df[df['Round'].str.contains('Matchweek')]

    df = df.dropna()
    return df
 
#Function to get data
urls = ['https://fbref.com/en/squads/d48ad4ff/Napoli-Stats',
        'https://fbref.com/en/squads/922493f3/Atalanta-Stats',
        'https://fbref.com/en/squads/dc56fe14/Milan-Stats',
        'https://fbref.com/en/squads/d609edc0/Internazionale-Stats',
        'https://fbref.com/en/squads/cf74a709/Roma-Stats',
        'https://fbref.com/en/squads/7213da33/Lazio-Stats',
        'https://fbref.com/en/squads/04eea015/Udinese-Stats']

#use for loop to get data from urls
lists = []
for url in urls:
    lists.append(get_data(url, 1))
    
pd.concat(lists).to_csv(r'team.csv', sep=',', encoding='utf-8-sig', index=False)

df = pd.read_csv("team.csv", header = 0)
df.columns = ['Round', 'team', 'GF', 'xG', 'GA', 'xGA']
df = df.assign(diff = df['GF'] - df['xG'])
df['team'] = df['team'].str.replace('Internazionale','Inter')

df1 = pd.read_csv('Loghi_SerieA.csv')
df = pd.merge(df, df1, on='team')

df.head()

Unnamed: 0,Round,team,GF,xG,GA,xGA,diff,team_id
0,Matchweek 1,Napoli,5.0,2.3,2.0,1.2,2.7,9875
1,Matchweek 2,Napoli,4.0,2.8,0.0,0.1,1.2,9875
2,Matchweek 3,Napoli,0.0,1.1,0.0,0.4,-1.1,9875
3,Matchweek 4,Napoli,1.0,2.1,1.0,1.0,-1.1,9875
4,Matchweek 5,Napoli,2.0,2.1,1.0,0.4,-0.1,9875


In [98]:
team_map = {
    'team': [
        "Atalanta",
        "Bologna",
        "Cremonese",
        "Empoli",
        "Fiorentina",
        "Inter",
        "Juventus",
        "Lazio",
        "Lecce",
        "Milan",
        "Monza",
        "Napoli",
        "Roma",
        "Salernitana",
        "Sampdoria",
        "Sassuolo",
        "Spezia",
        "Torino",
        "Udinese",
        "Verona"
    ],
    'fotmob_id':[
        8524,
        9857,
        7801,
        8534,
        8535,
        8636,
        9885,
        8543,
        9888,
        8564,
        6504,
        9875,
        8686,
        6480,
        9882,
        7943,
        9881,
        9804,
        8600,
        9876
    ]
}

team_map = pd.DataFrame(team_map)
team_map

Unnamed: 0,team,fotmob_id
0,Atalanta,8524
1,Bologna,9857
2,Cremonese,7801
3,Empoli,8534
4,Fiorentina,8535
5,Inter,8636
6,Juventus,9885
7,Lazio,8543
8,Lecce,9888
9,Milan,8564


In [99]:
URL = 'https://fbref.com/en/comps/11/Serie-A-Stats'
df_top = pd.read_html(URL)[0]
df_top = df_top[['Squad', 'GF', 'GA', 'xG', 'xGA']].copy()
df_top.columns = ['team', 'GF', 'GA', 'xG', 'xGA']
df_top['team'] = df_top['team'].str.replace('Hellas Verona','Verona')
df_top = df_top.assign(diff = df_top['GF'] - df_top['xG'])

df_top = pd.merge(df_top, team_map)
df_top

Unnamed: 0,team,GF,GA,xG,xGA,diff,fotmob_id
0,Napoli,25,9,21.4,9.1,3.6,9875
1,Atalanta,16,6,15.6,8.9,0.4,8524
2,Milan,20,10,18.0,9.4,2.0,8564
3,Roma,13,9,20.1,7.0,-7.1,8686
4,Lazio,21,5,13.7,13.0,7.3,8543
5,Udinese,19,10,13.6,10.4,5.4,8600
6,Inter,18,14,16.6,8.5,1.4,8636
7,Juventus,13,7,13.7,11.5,-0.7,9885
8,Sassuolo,12,12,13.4,12.8,-1.4,7943
9,Empoli,9,11,9.8,14.5,-0.8,8534


In [100]:
def get_cumGF_df(team, data = df):

    df = data.copy()
    df = df[(df['team'] == team)]
    df['cum_GF'] = df['GF'].cumsum()
    df['cum_xpoints'] = df['xG'].cumsum()
    df['cum_diff'] = df['diff'].cumsum()
    return df

# Check to see if it works
get_cumGF_df('Inter')

Unnamed: 0,Round,team,GF,xG,GA,xGA,diff,team_id,cum_GF,cum_xpoints,cum_diff
30,Matchweek 1,Inter,2.0,2.5,1.0,0.5,-0.5,8636,2.0,2.5,-0.5
31,Matchweek 2,Inter,3.0,2.5,0.0,0.5,0.5,8636,5.0,5.0,0.0
32,Matchweek 3,Inter,1.0,1.6,3.0,0.8,-0.6,8636,6.0,6.6,-0.6
33,Matchweek 4,Inter,3.0,2.3,1.0,1.1,0.7,8636,9.0,8.9,0.1
34,Matchweek 5,Inter,2.0,1.6,3.0,1.5,0.4,8636,11.0,10.5,0.5
35,Matchweek 6,Inter,1.0,0.9,0.0,0.7,0.1,8636,12.0,11.4,0.6
36,Matchweek 7,Inter,1.0,0.8,3.0,1.3,0.2,8636,13.0,12.2,0.8
37,Matchweek 8,Inter,1.0,0.8,2.0,0.6,0.2,8636,14.0,13.0,1.0
38,Matchweek 9,Inter,2.0,2.1,1.0,1.1,-0.1,8636,16.0,15.1,0.9
39,Matchweek 10,Inter,2.0,1.6,0.0,0.3,0.4,8636,18.0,16.7,1.3


In [114]:
def plot_xG_gF(ax, team_name, label_x=True, label_y=False, data=df, df_top=df_top):
    ax.grid(ls='--', color='#efe9e6', zorder=2)
    test_df = get_cumGF_df(team_name, data)

    for x in data['team'].unique():
        if x == team_name:
            if test_df['cum_diff'].iloc[-1] > 0:
                color = '#336699'
                aux_text = '+'
            else:
                color = '#DA4167'
                aux_text = ''
            ax.plot(test_df.index, test_df['cum_diff'], lw=1.5, color=color, zorder=5, markevery=[-1], marker='o', ms=6, mfc='white')
        else:
            ax.plot(test_df.index, test_df['cum_diff'], lw=.75, color='grey', alpha=0.25)


    ax.set_xlim(ax.get_xlim()[0], ax.get_xlim()[1])
    ax.set_ylim(ax.get_ylim()[0], ax.get_ylim()[1])
    ax.plot([ax.get_xlim()[0], ax.get_xlim()[1]], [0,0], color='black', ls='dashed', lw=1)
    
    # -- Fancy fillbetween
    ax.fill_between(x=[ax.get_xlim()[0], ax.get_xlim()[1]], 
                    y1=0, y2=ax.get_ylim()[1], color='#336699', alpha=0.05, ec='None', hatch='......', zorder=1)
    ax.fill_between(x=[ax.get_xlim()[0], ax.get_xlim()[1]], 
                    y1=0, y2=ax.get_ylim()[0], color='#DA4167', alpha=0.05, ec='None', hatch='......', zorder=1)
    
    # -- Highlighted player annotation
    text_ = ax.annotate(
        xy=(test_df.index[-1], test_df['cum_diff'].iloc[-1]),
        text=f"{test_df['cum_diff'].iloc[-1]:.1f}",
        xytext=(10,5),
        textcoords='offset points',
        weight='bold',
        ha='center',
        va='center',
        color=color,
        size=7,
        zorder=3
    )
    text_.set_path_effects(
        [path_effects.Stroke(linewidth=1.5, foreground='white'), path_effects.Normal()]
    )
    if label_x:
        ax.set_xlabel('Match index')
    else:
        ax.set_xticklabels([])
    if label_y:
        ax.set_ylabel('Cum. xGOT minus goals conceded')
    else:
        ax.set_yticklabels([])
    return ax

In [2]:
fig = plt.figure(figsize=(22, 14), dpi = 200)
nrows = 8
ncols = 5
gspec = gridspec.GridSpec(
    ncols=ncols, nrows=nrows, figure=fig, 
    height_ratios = [(1/nrows)*2.6 if x % 2 != 0 else (1/nrows)/2.6 for x in range(nrows)], hspace=0.2
)

plt.rcParams['font.size'] = 7
plt.rcParams['xtick.labelsize'] = 7
plt.rcParams['ytick.labelsize'] = 7
plt.rcParams['hatch.linewidth'] = 0.5

plot_counter = 0
logo_counter = 0
for row in range(nrows):
    for col in range(ncols):
        if row % 2 != 0:
            ax = plt.subplot(
                gspec[row, col],
                facecolor = "#EFE9E6"
            )
            team_name = df_top['team'].iloc[plot_counter]
            if col == 0:
                label_y = True
            else:
                label_y = False
            if row == 5:
                label_x = True
            else:
                label_x = False
            
            plot_xG_gF(ax, team_name, label_x, label_y, df, df_top)           
            plot_counter += 1
        else:
            teamId = df_top['fotmob_id'].iloc[logo_counter]
            teamName = df_top['team'].iloc[logo_counter]
            goalsFor = df_top['GF'].iloc[logo_counter]
            xgFor = df_top['xG'].iloc[logo_counter]
            diff = df_top['diff'].iloc[logo_counter]
            fotmob_url = 'https://images.fotmob.com/image_resources/logo/teamlogo/'
            logo_ax = plt.subplot(
                gspec[row,col],
                anchor = 'NW', facecolor = '#EFE9E6'
            )
            club_icon = Image.open(urllib.request.urlopen(f'{fotmob_url}{teamId:.0f}.png')).convert('LA')
            logo_ax.imshow(club_icon)
            logo_ax.axis('off')
            # # -- Add the team name
            ax_text(
                x = 1.2, 
                y = .9,
                s = f'<{teamName}>\nGoals: {goalsFor:.0f} | xG: {xgFor:.1f} | diff: {diff:.0f}',
                ax = logo_ax, 
                highlight_textprops=[{'weight':'bold', 'font':'DM Sans', 'size':'8'}],
                font = 'Karla', 
                ha = 'left', 
                size = 8, 
                annotationbbox_kw = {'xycoords':'axes fraction'}
            )
            logo_counter += 1

fig_text(
    x=0.12, y=.96, 
    s='La Liga\'s Gatekeepers',
    va='bottom', ha='left',
    fontsize=15, color='black', font='DM Sans', weight='bold'
)
fig_text(
    x=0.12, y=.91, 
    s='Which GKs are <saving> or <costing> their teams some goals? | Cumulative xGOT minus goals conceded since the 2021/2022 season\nData is via Opta | viz by @sonofacorner',
    highlight_textprops=[{'weight':'bold', 'color': '#336699'}, {'weight':'bold', 'color': '#DA4167'}],
    va='bottom', ha='left',
    fontsize=8, color='#4E616C', font='Karla'
)

NameError: name 'plt' is not defined