In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from bs4 import BeautifulSoup

import bokeh
from bokeh.palettes import RdYlGn
from bokeh.models import ColumnDataSource, LinearColorMapper, ColorBar, ColumnDataSource
from bokeh.plotting import figure, output_file, show
from bokeh.transform import linear_cmap
from bokeh.models.tools import HoverTool

from selenium import webdriver
CHROMEDRIVER_PATH = "/Users/paulemiledugnat/Downloads/chromedriver_upd"

In [2]:
def get_xG_html_table(team_name: str) -> str:
    driver = webdriver.Chrome(executable_path=CHROMEDRIVER_PATH)
    driver.get(f"https://understat.com/team/{team_name}/2020")
    
    team_soup = BeautifulSoup(driver.page_source)
    table_html = team_soup.find("div", {"id": "team-players"}).find("table")
    
    driver.quit()

    return str(table_html)

In [3]:
def process_html(html_table: str) -> pd.DataFrame:
    df_team = pd.read_html(html_table)[0].drop("№", axis=1).iloc[:15]
    
    df_team["xG"] = df_team["xG"].str.split(r"\+|\-").apply(lambda x: float(x[0]))
    df_team["xA"] = df_team["xA"].str.split(r"\+|\-").apply(lambda x: float(x[0]))

    df_team["diff_xG"] = (df_team["G"] - df_team["xG"])
    df_team["diff_xA"] = (df_team["A"] - df_team["xA"])

    df_team = df_team[df_team["xG"] > 0.5] # select only players that could score 
    df_team = df_team.round(2)
    
    return df_team

In [4]:
def plot_xG_df(df_xG_team: pd.DataFrame, team_name: str) -> None:
    plot_max = df_xG_team["xG"].max() + 1

    amplitude = max(abs(df_xG_team["diff_xG"].min()), abs(df_xG_team["diff_xG"].max()))
    
    color_mapper = LinearColorMapper(
        palette=RdYlGn[9][::-1], low=-amplitude, high=amplitude)

    p = figure(title=f"xG vs. vrais buts à {team_name} 2020-2021")

    p.line([0, plot_max], [0, plot_max], color="black",
           legend_label="Performance normale", line_width=2)

    p.line([0, plot_max], [0, 1.2 * plot_max], color="black",
           line_dash=[4, 4], legend_label="Surperf de 20 %", line_color='green', line_width=2)

    p.line([0, plot_max], [0, 0.8 * plot_max], color="black",
           line_dash=[4, 4], legend_label="Sousperf de 20 %", line_color='red', line_width=2)

    p.line([0, plot_max], [0, 1.4 * plot_max], color="black",
           line_dash=[4, 4], line_color='green', line_width=1)

    p.line([0, plot_max], [0, 0.6 * plot_max], color="black",
           line_dash=[4, 4], line_color='red', line_width=1)


    r = p.circle(x='xG',
                 y='G',
                 source=df_xG_team,
                 size=10,
                 color={'field': 'diff_xG', 'transform': color_mapper})

    glyph = r.glyph
    glyph.size = 15
    glyph.fill_alpha = 1
    glyph.line_color = "black"
    glyph.line_width = 1

    p.background_fill_color = "gray"
    p.background_fill_alpha = 0.05

    hover = HoverTool()
    hover.tooltips = [
        ('', '@Player'),
        ('xG', '@xG{0.2f}'),
        ('G', '@G{0.2f}'),
        ('diff_xG', '@diff_xG{0.2f}')
    ]

    color_bar = ColorBar(color_mapper=color_mapper, width=8)

    p.add_layout(color_bar, 'right')
    p.add_tools(hover)
    p.legend.location = "top_left"

    show(p)

In [5]:
html_team_table = get_xG_html_table("Paris_Saint_Germain")
df_team = process_html(html_team_table)
plot_xG_df(df_team, team_name="Paris_Saint_Germain")

In [6]:
list_of_teams = ["Lille", "Lyon", "Paris_Saint_Germain", 
                 "Monaco", "Lens", "Metz", "Marseille", 
                 "Rennes", "Lorient", "Strasbourg"
                 "Montpellier", "Bordeaux", "Nice", "Brest", 
                 "Angers", "Nantes", "Reims", "Nantes", 
                 "Dijon", "Nimes"]

In [7]:
import streamlit as st

In [115]:
option = st.selectbox(
    'Quelle équipe veux-tu analyser',
    list_of_teams)
st.write('You selected:', option)