In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from bs4 import BeautifulSoup

import bokeh
from bokeh.palettes import RdYlGn
from bokeh.models import ColumnDataSource, LinearColorMapper, ColorBar, ColumnDataSource
from bokeh.plotting import figure, output_file, show
from bokeh.transform import linear_cmap
from bokeh.models.tools import HoverTool

from selenium import webdriver
CHROMEDRIVER_PATH = "/Users/paulemiledugnat/Downloads/chromedriver_upd"

In [2]:
team_name = "Lyon"

driver = webdriver.Chrome(executable_path=CHROMEDRIVER_PATH)
driver.get(f"https://understat.com/team/{team_name}/2020")

team_soup = BeautifulSoup(driver.page_source)
table_html = team_soup.find("div", {"id": "team-statistics"}).find("table")

driver.quit()

In [3]:
pd.read_html(str(table_html))[0]

Unnamed: 0,№,Situation,Sh,G,ShA,GA,xG,xGA,xGD,xG/Sh,xGA/Sh
0,1,Open play,319,37,223,19,47.19+10.19,17.36-1.64,29.83,0.15,0.08
1,2,From corner,68,2,45,1,6.01+4.01,3.09+2.09,2.92,0.09,0.07
2,3,Set piece,23,4,16,1,2.84-1.16,1.39+0.39,1.45,0.12,0.09
3,4,Direct Freekick,21,2,11,0,1.10-0.90,0.79+0.79,0.31,0.05,0.07
4,5,Penalty,9,9,5,3,6.84-2.16,3.80+0.80,3.04,0.76,0.76


In [4]:
def process_html_stat(html_table: str) -> pd.DataFrame:
    df_team = pd.read_html(html_table)[0].drop("№", axis=1)
    
    df_team["xG"] = df_team["xG"].str.split(r"\+|\-").apply(lambda x: float(x[0]))
    df_team["xGA"] = df_team["xGA"].str.split(r"\+|\-").apply(lambda x: float(x[0]))

    df_team["diff_xG"] = (df_team["G"] - df_team["xG"])
    df_team["diff_xGA"] = (df_team["GA"] - df_team["xGA"])
    
    return df_team

In [40]:
from bokeh.models import CategoricalTicker, NumeralTickFormatter
from bokeh.plotting import figure
from bokeh.models.tools import HoverTool


In [41]:
df = process_html_stat(str(table_html))
df.head(2)

Unnamed: 0,Situation,Sh,G,ShA,GA,xG,xGA,xGD,xG/Sh,xGA/Sh,diff_xG,diff_xGA
0,Open play,319,37,223,19,47.19,17.36,29.83,0.15,0.08,-10.19,1.64
1,From corner,68,2,45,1,6.01,3.09,2.92,0.09,0.07,-4.01,-2.09


In [46]:
from bokeh.layouts import row

In [65]:
x = df['Situation']
y = df['diff_xG']

amplitude = df['diff_xG'].abs().max() + 1
color_mapper = LinearColorMapper(palette=RdYlGn[9][::-1], low=-amplitude, high=amplitude)

h_barchart = figure(
    title='Visualisation de la diff xG pour Lyon, saison 2020-2021',
    y_range=x.values,
    plot_width=1500,
    plot_height=500,
    x_range=(- amplitude, amplitude))

h_barchart.yaxis.ticker = CategoricalTicker()
r = h_barchart.hbar(right=y, y=x, height=0.2,
    color={'field': "right", 'transform': color_mapper},
    )

glyph = r.glyph
glyph.fill_alpha = 1
glyph.line_color = "black"
glyph.line_width = 0.2

hover = HoverTool()
hover.tooltips = [
    ('diff xG', '@y'),
    ('Catégorie', '@right{0.2f}'),
]

h_barchart.add_tools(hover)
h_barchart.toolbar.logo = None
h_barchart.toolbar_location = None

color_bar = ColorBar(color_mapper=color_mapper, width=12)
color_bar_plot = figure(height=500, width=100, 
                        toolbar_location="right")

color_bar_plot.add_layout(color_bar, 'right')
color_bar_plot.toolbar.logo = None
color_bar_plot.toolbar_location = None

layout = row(h_barchart, color_bar_plot)

show(layout)



In [None]:
data = np.random.normal(0, 0.5, 1000)
hist, edges = np.histogram(data, density=True, bins=50)

p = figure()
p.quad(top=hist, bottom=0, left=edges[:-1], right=edges[1:], line_color="white")

output_file("hist.html")
show(p)

In [66]:
from bokeh.io import output_file, show
from bokeh.palettes import Category20c
from bokeh.plotting import figure
from bokeh.transform import cumsum

df['angle'] = df['xG'] / df['xG'].sum() * (2 * 3.14159)
df['color'] = Category20c[df.shape[0]]

In [72]:
p = figure(plot_height=350, title="Pie Chart", toolbar_location=None,
           tools="hover", tooltips="@Situation: @xG{0.1f} xG", x_range=(-0.5, 1.0))

p.wedge(x=0, y=1, radius=0.4,
        start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
        line_color="white", fill_color='color', legend_field='Situation', source=df)

p.axis.axis_label=None
p.axis.visible=False
p.grid.grid_line_color = None

show(p)

In [2]:
def get_xG_html_table(team_name: str) -> str:
    driver = webdriver.Chrome(executable_path=CHROMEDRIVER_PATH)
    driver.get(f"https://understat.com/team/{team_name}/2020")
    
    team_soup = BeautifulSoup(driver.page_source)
    table_html = team_soup.find("div", {"id": "team-players"}).find("table")
    
    driver.quit()

    return str(table_html)

In [8]:
def process_html(html_table: str) -> pd.DataFrame:
    df_team = pd.read_html(html_table)[0].drop("№", axis=1).iloc[:15]
    
    df_team["xG"] = df_team["xG"].str.split(r"\+|\-").apply(lambda x: float(x[0]))
    df_team["xA"] = df_team["xA"].str.split(r"\+|\-").apply(lambda x: float(x[0]))

    df_team["diff_xG"] = (df_team["G"] - df_team["xG"])
    df_team["diff_xA"] = (df_team["A"] - df_team["xA"])

    df_team = df_team[df_team["xG"] > 0.5] # select only players that could score 
    df_team = df_team.round(2)
    
    return df_team

In [4]:
def plot_xG_df(df_xG_team: pd.DataFrame, team_name: str) -> None:
    plot_max = df_xG_team["xG"].max() + 1

    amplitude = max(abs(df_xG_team["diff_xG"].min()), abs(df_xG_team["diff_xG"].max()))
    
    color_mapper = LinearColorMapper(
        palette=RdYlGn[9][::-1], low=-amplitude, high=amplitude)

    p = figure(title=f"xG vs. vrais buts à {team_name} 2020-2021")

    p.line([0, plot_max], [0, plot_max], color="black",
           legend_label="Performance normale", line_width=2)

    p.line([0, plot_max], [0, 1.2 * plot_max], color="black",
           line_dash=[4, 4], legend_label="Surperf de 20 %", line_color='green', line_width=2)

    p.line([0, plot_max], [0, 0.8 * plot_max], color="black",
           line_dash=[4, 4], legend_label="Sousperf de 20 %", line_color='red', line_width=2)

    p.line([0, plot_max], [0, 1.4 * plot_max], color="black",
           line_dash=[4, 4], line_color='green', line_width=1)

    p.line([0, plot_max], [0, 0.6 * plot_max], color="black",
           line_dash=[4, 4], line_color='red', line_width=1)


    r = p.circle(x='xG',
                 y='G',
                 source=df_xG_team,
                 size=10,
                 color={'field': 'diff_xG', 'transform': color_mapper})

    glyph = r.glyph
    glyph.size = 15
    glyph.fill_alpha = 1
    glyph.line_color = "black"
    glyph.line_width = 1

    p.background_fill_color = "gray"
    p.background_fill_alpha = 0.05

    hover = HoverTool()
    hover.tooltips = [
        ('', '@Player'),
        ('xG', '@xG{0.2f}'),
        ('G', '@G{0.2f}'),
        ('diff_xG', '@diff_xG{0.2f}')
    ]

    color_bar = ColorBar(color_mapper=color_mapper, width=8)

    p.add_layout(color_bar, 'right')
    p.add_tools(hover)
    p.legend.location = "top_left"

    show(p)

In [5]:
html_team_table = get_xG_html_table("Paris_Saint_Germain")
df_team = process_html(html_team_table)
plot_xG_df(df_team, team_name="Paris_Saint_Germain")

In [21]:
CHOIX_TEAMS = ["<Choix d'un pays>"]

FRENCH_TEAMS = ["<Choix d'une équipe>", "Lille", "Lyon", "Paris_Saint_Germain",
                "Monaco", "Lens", "Metz", "Marseille",
                "Rennes", "Lorient", "Strasbourg",
                "Montpellier", "Bordeaux", "Nice", "Brest",
                "Angers", "Nantes", "Reims", "Nantes",
                "Dijon", "Nimes"]

GERMAN_TEAMS = ["<Choix d'une équipe>", 'Werder_Bremen', 'VfB_Stuttgart', 'Union_Berlin', 'Schalke_04',
                'RasenBallsport_Leipzig', 'Mainz_05', 'Hoffenheim',
                'Hertha_Berlin', 'Freiburg', 'FC_Cologne', 'Eintracht_Frankfurt',
                'Borussia_M.Gladbach', 'Borussia_Dortmund', 'Bayern_Munich',
                'Bayer_everkusen', 'Augsburg', 'Arminia_Bielefeld']

ENGLISH_TEAMS = ["<Choix d'une équipe>", 'Manchester_City', 'Manchester_United', 'Leicester', 'West_Ham', 'Chelsea',
                 'Liverpool', 'Everton', 'Aston_Villa', 'Tottenham', 'Leeds',
                 'Arsenal', 'Wolverhampton_Wanderers', 'Crystal_Palace',
                 'Southampton', 'Burnley', 'Brighton', 'Newcastle_United', 'Fulham',
                 'West_Bromwich_Albion', 'Sheffield_United']

SPANISH_TEAMS = ["<Choix d'une équipe>", 'Atletico_Madrid', 'Real_Madrid', 'Barcelona', 'Sevilla',
                 'Real_Sociedad', 'Villarreal', 'Real_Betis', 'Levante', 'Granada',
                 'Athletic_Club', 'Celta_Vigo', 'Valencia', 'Osasuna', 'Cadiz',
                 'Getafe', 'Alaves', 'Eibar', 'Real_Valladolid', 'Elche',
                 'SD_Huesca']

ITALIAN_TEAMS = ["<Choix d'une équipe>", 'Inter', 'AC_Milan', 'Juventus', 'Roma', 'Atalanta', 'Lazio',
                 'Napoli', 'Sassuolo', 'Verona', 'Sampdoria', 'Genoa', 'Bologna',
                 'Udinese', 'Fiorentina', 'Benevento', 'Spezia', 'Torino',
                 'Cagliari', 'Parma_Calcio_1913', 'Crotone']

COUNTRY_TEAMS = {"France": FRENCH_TEAMS,
                 "Espagne": SPANISH_TEAMS,
                 "Angleterre": ENGLISH_TEAMS,
                 "Allemagne": GERMAN_TEAMS,
                 "Italie": ITALIAN_TEAMS,
                 "<Choix d'un pays>": CHOIX_TEAMS}


In [22]:
import streamlit as st

In [115]:
option = st.selectbox(
    'Quelle équipe veux-tu analyser',
    list_of_teams)
st.write('You selected:', option)

In [25]:
teams = []
[teams.extend(COUNTRY_TEAMS[x]) for x in COUNTRY_TEAMS.keys()]
teams = [x for x in teams if x != "<Choix d'une équipe>"]

[None, None, None, None, None, None]

In [30]:
LIST_OF_YEARS = [2020, 2019, 2018, 2017, 2016, 2015, 2014]

In [52]:
def update_db(list_teams, list_years):
    import itertools
    for team, year in itertools.product(list_teams, list_years):
        try:
            get_xG_html_table(team, year, force_update=True)
        except:
            print(f'unable to update {team}-{year}')


In [53]:
update_db(["Lyon"], [2020])

Lyon 2020


In [28]:
for team, year in itertools.product(a, LIST_OF_YEARS):
    

['Lille',
 'Lyon',
 'Paris_Saint_Germain',
 'Monaco',
 'Lens',
 'Metz',
 'Marseille',
 'Rennes',
 'Lorient',
 'Strasbourg',
 'Montpellier',
 'Bordeaux',
 'Nice',
 'Brest',
 'Angers',
 'Nantes',
 'Reims',
 'Nantes',
 'Dijon',
 'Nimes',
 'Atletico_Madrid',
 'Real_Madrid',
 'Barcelona',
 'Sevilla',
 'Real_Sociedad',
 'Villarreal',
 'Real_Betis',
 'Levante',
 'Granada',
 'Athletic_Club',
 'Celta_Vigo',
 'Valencia',
 'Osasuna',
 'Cadiz',
 'Getafe',
 'Alaves',
 'Eibar',
 'Real_Valladolid',
 'Elche',
 'SD_Huesca',
 'Manchester_City',
 'Manchester_United',
 'Leicester',
 'West_Ham',
 'Chelsea',
 'Liverpool',
 'Everton',
 'Aston_Villa',
 'Tottenham',
 'Leeds',
 'Arsenal',
 'Wolverhampton_Wanderers',
 'Crystal_Palace',
 'Southampton',
 'Burnley',
 'Brighton',
 'Newcastle_United',
 'Fulham',
 'West_Bromwich_Albion',
 'Sheffield_United',
 'Werder_Bremen',
 'VfB_Stuttgart',
 'Union_Berlin',
 'Schalke_04',
 'RasenBallsport_Leipzig',
 'Mainz_05',
 'Hoffenheim',
 'Hertha_Berlin',
 'Freiburg',
 'FC_Col