In [1]:
import os
import re

import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from dash import dcc, html, Input, Output
from jupyter_dash import JupyterDash
px.defaults.template = "simple_white"
os.getcwd()

'/home/viet/OneDrive/Studying_Materials/Data_Visualization/FootballVizualization/oss'

In [12]:
file_name_dict = {
    "zone_against" : "stage-attempt-zones-against.csv",
    "zone_for" : "stage-attempt-zones-for.csv",
    "direction_against" : "stage-attempt-directions-against.csv",
    "direction_for" : "stage-attempt-directions-for.csv",
    "offensive" : "stage-team-stats-offensive.csv"
}
ATTEMPT = ["attempt-directions", "attempt-zones"]
GOAL_PASS = ["goals", "passes"]
FOR_AGAINST = [*ATTEMPT, *GOAL_PASS]
ALL_TYPE = [*FOR_AGAINST, "cards", "team", "touch"]
LEAGUE = ["Bundesliga", "EPL", "LaLiga", "Ligue1", "SerieA"]


def _add_suffix_columns(df, suffix):
    col_name_to_change = list(df.columns)
    col_name_to_change.remove("Team")

    df.rename(
            columns={
                col_name:f"{col_name}_{suffix}" for col_name in col_name_to_change
            },
            inplace=True
        )


def gather_all_league():
    df = pd.DataFrame()
    for league in LEAGUE:
        df = pd.concat([df, gather_all_seasons(league)], ignore_index=True)
    return df


def gather_one_season(league, season):
    first = True
    df_league = pd.DataFrame()
    season_path = os.path.join("../data", league)

    for type in FOR_AGAINST: # everything csv file that has for and against
        for mode in ["for", "against"]:
            df_temp = pd.read_csv(os.path.join(season_path, f"{season}/stage-{type}-{mode}.csv"), dtype=str)
            df_temp.drop(["R"], axis=1, inplace=True)
            df_temp.rename(
                columns = {
                    'Left Side':'left',
                    'Attempts from the middle':'middle',
                    'Right Side': 'right',
                }, 
            inplace = True
    )
            if first:
                df_league = df_temp
                first = False
            else:
                df_league = df_league.merge(df_temp, on="Team", suffixes=("_for","_against"))

    type = "cards" # handle cards file
    df_temp = pd.read_csv(os.path.join(season_path, f"{season}/stage-{type}.csv"), dtype=str)
    df_temp.drop(["R"], axis=1, inplace=True)
    df_league = df_league.merge(df_temp, on="Team")

    type = "team" # handle team files
    for mode in ["defensive", "offensive", "detailed", "summary"]:
        df_temp = pd.read_csv(os.path.join(season_path, f"{season}/stage-{type}-stats-{mode}.csv"), dtype=str)

        # format Team column in this fucker table
        df_temp["Team"] = df_temp["Team"].apply(lambda s: re.sub(re.compile(r"\d{1,2}\.\s"), "", s))

        _add_suffix_columns(df_temp, mode)

        df_league = df_league.merge(df_temp, on="Team")
    
    type = "touch" # handle touch files
    for mode in ["channels", "zones"]:
        df_temp = pd.read_csv(os.path.join(season_path, f"{season}/stage-{type}-{mode}.csv"), dtype=str)
        df_temp.drop(["R"], axis=1, inplace=True)
        if mode == "channels":
            _add_suffix_columns(df_temp, mode)
        
        df_league = df_league.merge(df_temp, on="Team")

    df_league = df_league.applymap(lambda x: int(x[:-1])/100 if x.__contains__("%") else x)

    season = season.split("_")[0]
    df_league["season"] = [int(season) for i in range(len(df_league))]
    df_league["league"] = [league for i in range(len(df_league))]

    df_league.rename(
            columns = {
                'Team':'team', 
            }, 
            inplace = True
    )
    return df_league


def gather_all_seasons(league):
    df = pd.DataFrame()

    season_path = os.path.join("../data", league)
    for season in os.listdir(season_path): # iterate through each season
        df = pd.concat([df, gather_one_season(league, season)], ignore_index=True)

    return df.sort_values(by=['season'])

In [14]:
test = gather_one_season("SerieA", "2009_2010")
print(list(test.columns))
test.tail()

['team', 'left_for', 'middle_for', 'right_for', 'left_against', 'middle_against', 'right_against', 'In 6 Yards Box_for', 'In 18 Yards Box_for', 'Outside of Box_for', 'In 6 Yards Box_against', 'In 18 Yards Box_against', 'Outside of Box_against', 'Open Play_for', 'Counter Attack_for', 'Set Piece_for', 'Penalty_for', 'Own Goal_for', 'Open Play_against', 'Counter Attack_against', 'Set Piece_against', 'Penalty_against', 'Own Goal_against', 'Cross pg_for', 'Through Ball pg_for', 'Long Balls pg_for', 'Short Passes pg_for', 'Cross pg_against', 'Through Ball pg_against', 'Long Balls pg_against', 'Short Passes pg_against', 'Fouls', 'Unprofessional', 'Dive', 'Other', 'Shots pg_defensive', 'Tackles pg_defensive', 'Interceptions pg_defensive', 'Fouls pg_defensive', 'Offsides pg_defensive', 'Rating_defensive', 'Shots pg_offensive', 'Shots OT pg_offensive', 'Dribbles pg_offensive', 'Fouled pg_offensive', 'Rating_offensive', 'Total_detailed', 'OutOfBox_detailed', 'SixYardBox_detailed', 'PenaltyArea_de

Unnamed: 0,team,left_for,middle_for,right_for,left_against,middle_against,right_against,In 6 Yards Box_for,In 18 Yards Box_for,Outside of Box_for,...,AerialsWon_summary,Rating_summary,Left Side_channels,Middle of the pitch_channels,Right Side_channels,Own Third,Middle Third,Opposition Third,season,league
15,Napoli,0.18,0.62,0.21,0.19,0.67,0.15,0.06,0.48,0.46,...,8.9,6.96,0.29,0.32,0.4,0.25,0.46,0.3,2009,SerieA
16,Catania,0.16,0.7,0.15,0.24,0.6,0.16,0.04,0.43,0.53,...,9.5,6.84,0.34,0.33,0.33,0.27,0.47,0.26,2009,SerieA
17,Bologna,0.15,0.65,0.2,0.18,0.66,0.16,0.05,0.42,0.53,...,9.1,6.78,0.34,0.33,0.32,0.3,0.45,0.26,2009,SerieA
18,Livorno,0.15,0.6,0.25,0.21,0.62,0.17,0.05,0.35,0.6,...,8.0,6.7,0.32,0.31,0.38,0.28,0.45,0.26,2009,SerieA
19,Chievo,0.13,0.73,0.14,0.21,0.6,0.19,0.1,0.48,0.42,...,10.4,6.84,0.28,0.33,0.38,0.27,0.45,0.28,2009,SerieA


In [4]:
gather_all_seasons("SerieA").head(30)

Unnamed: 0,team,left_for,middle_for,right_for,left_against,middle_against,right_against,In 6 Yards Box_for,In 18 Yards Box_for,Outside of Box_for,...,Pass%_summary,AerialsWon_summary,Rating_summary,Left Side_channels,Middle of the pitch_channels,Right Side_channels,Own Third,Middle Third,Opposition Third,season
219,Chievo,0.13,0.73,0.14,0.21,0.6,0.19,0.1,0.48,0.42,...,71.8,10.4,6.84,0.28,0.33,0.38,0.27,0.45,0.28,2009
200,Lazio,0.26,0.54,0.2,0.18,0.63,0.18,0.04,0.35,0.6,...,77.6,8.0,6.88,0.34,0.3,0.36,0.27,0.45,0.28,2009
201,AC Milan,0.22,0.64,0.14,0.25,0.55,0.2,0.07,0.49,0.44,...,84.8,7.9,7.04,0.38,0.31,0.31,0.25,0.45,0.3,2009
202,Roma,0.22,0.6,0.18,0.22,0.6,0.19,0.06,0.45,0.5,...,79.7,8.7,7.07,0.31,0.34,0.35,0.27,0.46,0.27,2009
203,Sampdoria,0.21,0.63,0.16,0.17,0.65,0.18,0.07,0.42,0.5,...,75.4,8.4,6.87,0.36,0.27,0.37,0.28,0.45,0.27,2009
204,Robur Siena,0.21,0.61,0.17,0.16,0.68,0.17,0.06,0.4,0.53,...,76.0,9.5,6.74,0.34,0.31,0.34,0.28,0.44,0.28,2009
205,Palermo FC,0.21,0.58,0.21,0.18,0.59,0.22,0.06,0.47,0.47,...,77.2,8.6,6.92,0.36,0.31,0.33,0.3,0.42,0.28,2009
206,Fiorentina,0.2,0.64,0.16,0.22,0.59,0.2,0.05,0.43,0.52,...,75.7,8.5,6.83,0.35,0.31,0.34,0.25,0.43,0.33,2009
207,Udinese,0.2,0.6,0.2,0.21,0.64,0.15,0.05,0.47,0.49,...,78.5,8.5,6.94,0.35,0.3,0.34,0.27,0.45,0.27,2009
208,Juventus,0.2,0.67,0.14,0.2,0.6,0.2,0.05,0.47,0.48,...,79.4,9.4,6.84,0.35,0.32,0.32,0.25,0.44,0.3,2009


In [15]:
gather_all_league().head()

Unnamed: 0,team,left_for,middle_for,right_for,left_against,middle_against,right_against,In 6 Yards Box_for,In 18 Yards Box_for,Outside of Box_for,...,AerialsWon_summary,Rating_summary,Left Side_channels,Middle of the pitch_channels,Right Side_channels,Own Third,Middle Third,Opposition Third,season,league
0,Borussia Dortmund,0.15,0.67,0.18,0.19,0.63,0.17,0.1,0.47,0.42,...,9.2,6.98,0.34,0.31,0.35,0.26,0.47,0.27,2009,Bundesliga
1,Hamburger SV,0.27,0.55,0.18,0.21,0.6,0.19,0.04,0.51,0.45,...,5.7,6.88,0.4,0.24,0.36,0.27,0.47,0.27,2009,Bundesliga
2,Nuernberg,0.27,0.52,0.22,0.23,0.62,0.15,0.05,0.48,0.47,...,5.8,6.74,0.37,0.26,0.37,0.28,0.45,0.27,2009,Bundesliga
3,Wolfsburg,0.26,0.59,0.15,0.21,0.58,0.21,0.08,0.58,0.34,...,5.6,6.95,0.37,0.26,0.37,0.28,0.44,0.28,2009,Bundesliga
4,Werder Bremen,0.25,0.6,0.15,0.24,0.55,0.21,0.08,0.53,0.39,...,7.2,7.06,0.39,0.26,0.34,0.24,0.45,0.31,2009,Bundesliga


In [16]:
df = gather_all_league()

In [17]:
df.head(20)

Unnamed: 0,team,left_for,middle_for,right_for,left_against,middle_against,right_against,In 6 Yards Box_for,In 18 Yards Box_for,Outside of Box_for,...,AerialsWon_summary,Rating_summary,Left Side_channels,Middle of the pitch_channels,Right Side_channels,Own Third,Middle Third,Opposition Third,season,league
0,Borussia Dortmund,0.15,0.67,0.18,0.19,0.63,0.17,0.1,0.47,0.42,...,9.2,6.98,0.34,0.31,0.35,0.26,0.47,0.27,2009,Bundesliga
1,Hamburger SV,0.27,0.55,0.18,0.21,0.6,0.19,0.04,0.51,0.45,...,5.7,6.88,0.4,0.24,0.36,0.27,0.47,0.27,2009,Bundesliga
2,Nuernberg,0.27,0.52,0.22,0.23,0.62,0.15,0.05,0.48,0.47,...,5.8,6.74,0.37,0.26,0.37,0.28,0.45,0.27,2009,Bundesliga
3,Wolfsburg,0.26,0.59,0.15,0.21,0.58,0.21,0.08,0.58,0.34,...,5.6,6.95,0.37,0.26,0.37,0.28,0.44,0.28,2009,Bundesliga
4,Werder Bremen,0.25,0.6,0.15,0.24,0.55,0.21,0.08,0.53,0.39,...,7.2,7.06,0.39,0.26,0.34,0.24,0.45,0.31,2009,Bundesliga
5,Borussia M.Gladbach,0.25,0.58,0.17,0.21,0.62,0.17,0.04,0.47,0.49,...,7.6,6.74,0.37,0.27,0.36,0.29,0.46,0.24,2009,Bundesliga
6,Bochum,0.24,0.57,0.19,0.21,0.61,0.18,0.05,0.39,0.56,...,6.2,6.65,0.35,0.26,0.4,0.28,0.46,0.26,2009,Bundesliga
7,Freiburg,0.23,0.58,0.2,0.22,0.64,0.14,0.08,0.45,0.47,...,7.3,6.68,0.38,0.27,0.34,0.28,0.44,0.28,2009,Bundesliga
8,Schalke 04,0.22,0.65,0.13,0.22,0.6,0.19,0.12,0.51,0.37,...,7.8,6.95,0.33,0.26,0.41,0.24,0.47,0.3,2009,Bundesliga
9,Eintracht Frankfurt,0.17,0.68,0.14,0.21,0.63,0.16,0.07,0.43,0.5,...,9.4,6.77,0.37,0.28,0.35,0.27,0.47,0.25,2009,Bundesliga


In [18]:
len(df)

1274

### Test direction plot

In [19]:
fig = px.line(df, x="season", y="right_for", color="team", title="right", markers=True)

fig.update_traces(
    opacity=0.4,
    line_color='rgb(189,189,189)'
)

mean = df.groupby("season")["right_for"].mean()
fig.add_trace(go.Scatter(
    x=mean.index,
    y=mean.values,
    line=dict(
        width=4,
        color="rgb(49,130,189)"
    ),
    name="Average"
))

fig.update_layout(
    yaxis = dict(
        tickformat=".0%",
        showgrid=True
    ), 
    xaxis = dict(
        tickvals = [i for i in range(2009, 2022)],
        ticktext = ["2009-2010", "2010-2011", "2011-2012", "2012-2013", "2013-2014", "2014-2015", "2015-2016", "2016-2017", "2017-2018", "2018-2019", "2019-2020", "2020-2021", "2021-2022"],
        showgrid=True
    ),
    showlegend=False
)

### Interactive percentage of attempts from each direction for the last 10 years

In [21]:
from dash import dcc, html, Input, Output, State, ctx
from jupyter_dash import JupyterDash
import plotly.express as px
from dash.exceptions import PreventUpdate

app = JupyterDash(__name__)

app.layout = html.Div([
    html.Div([
        html.Div([
            html.H3('League'),
            dcc.Checklist(
                options=[
                    {
                        "label": html.Div(
                            [
                                html.Img(
                                    src="./assets/flags/germany-flag-icon-16.png"),
                                " Bundesliga",
                            ], style={'display': 'inline-block', 'marginTop': '5px'}
                        ),
                        "value": "Bundesliga",
                    },
                    {
                        "label": html.Div(
                            [
                                html.Img(
                                    src="./assets/flags/england-flag-icon-16.png", alt='image'),
                                " EPL",
                            ], style={'display': 'inline-block', 'marginTop': '5px'}
                        ),
                        "value": "EPL",
                    },
                    {
                        "label": html.Div(
                            [
                                html.Img(
                                    src="./assets/flags/spain-flag-icon-16.png", alt='image'),
                                " LaLiga",
                            ], style={'display': 'inline-block', 'marginTop': '5px'}
                        ),
                        "value": "LaLiga",
                    },
                    {
                        "label": html.Div(
                            [
                                html.Img(
                                    src="./assets/flags/france-flag-icon-16.png", alt='image'),
                                " Ligue 1",
                            ], style={'display': 'inline-block', 'marginTop': '5px'}
                        ),
                        "value": "Ligue1",
                    },
                    {
                        "label": html.Div(
                            [
                                html.Img(
                                    src="./assets/flags/italy-flag-icon-16.png", alt='image'),
                                " Serie A",
                            ], style={'display': 'inline-block', 'marginTop': '5px'}
                        ),
                        "value": "SerieA",
                    },
                ],
                value=["Bundesliga"],
                id="league",
                labelStyle={'display': 'block'},
                style={"height":150, "width":200, "overflow":"auto"}
            )
        ], style={'padding': 10, 'flex': 1}),
        html.Div([
            html.H3('Direction'),
            dcc.RadioItems(
                {
                    "right": "Right Side",
                    "left": "Left Side",
                    "middle": "Middle"
                },
                "right",
                id="direction",
                labelStyle={'display': 'block'},
                style={"height":150, "width":200, "overflow":"auto"}
            )
        ], style={'padding': 10, 'flex': 1}),
        html.Div([
            html.H3('Attempts for or against'),
            dcc.RadioItems(
                {
                    "for": "Attempts for",
                    "against": "Attempts against"
                },
                "for",
                id="for_or_against",
                labelStyle={'display': 'block'},
                style={"height":150, "width":200, "overflow":"auto"}
            )
        ], style={'padding': 10, 'flex': 1}),
        html.Div([
            html.H3('Team'),
            dcc.Dropdown([], id='team_dropdown', multi=True),
            html.Div(id='dd-output-container')
        ], style={'padding': 10, 'flex': 1})
    ], style={'display': 'flex', 'flex-direction': 'row'}),

    
    dcc.Graph(
        id='graph',
    )
])


@app.callback(
    Output("team_dropdown", "options"),
    Input('league', 'value'))
def update_team_dropdown(selected_league):
    mask = None
    for i in range(len(selected_league)):
        if i == 0:
            mask = (df.league == selected_league[i])
        else:
            mask |= (df.league == selected_league[i])
    if mask is not None:
        filtered_df = df[mask]
        return sorted(filtered_df["team"].unique())
    return []


@app.callback(
    Output('graph', 'figure'),
    Input('team_dropdown', 'value'),
    Input('league', 'value'),
    Input('direction', 'value'),
    Input('for_or_against', 'value'))
def update_figure(selected_team, selected_league, selected_dir, selected_for_or_against):
    if len(selected_league) == 0:  # if no league is selected then return an empty plot
        fig = px.line()

        fig.update_layout(
            yaxis=dict(
                tickformat=".0%",
                showgrid=True
            ),
            xaxis=dict(
                tickvals=[i for i in range(2009, 2022)],
                ticktext=["2009-2010", "2010-2011", "2011-2012", "2012-2013", "2013-2014", "2014-2015",
                          "2015-2016", "2016-2017", "2017-2018", "2018-2019", "2019-2020", "2020-2021", "2021-2022"],
                showgrid=True
            ),
            showlegend=False
        )
        return fig

    # filtered_df = df[df["for_or_against"]
    #                           == selected_for_or_against]

    mask = None
    for i in range(len(selected_league)):
        if i == 0:
            mask = (df.league == selected_league[i])
        else:
            mask |= (df.league == selected_league[i])
    filtered_df_league = df[mask]

    mask_team = None
    if ctx.triggered_id == "team_dropdown":
        for i in range(len(selected_team)):
            if i == 0:
                mask_team = (df.team == selected_team[i])
            else:
                mask_team |= (df.team == selected_team[i])

    if mask_team is not None: 
        mask_team &= mask
        filtered_df_league_team = df[mask_team]

        league_str = ", ".join(selected_league)
        fig = px.line(filtered_df_league_team, x="season", y=f"{selected_dir}_{selected_for_or_against}", color="team",
                title=f"<b>Percentage of attempts from the {selected_dir} in {league_str} league</b>", markers=True,
                hover_name="team", hover_data=["season", f"{selected_dir}_{selected_for_or_against}", "league"])

        fig.update_layout(
            title_font_size=20,
            hoverlabel=dict(
                bgcolor="white",
                font_size=16,
            )
        )

        fig.update_traces(
            opacity=0.4,
        )
    else:
        league_str = ", ".join(selected_league)
        fig = px.line(filtered_df_league, x="season", y=f"{selected_dir}_{selected_for_or_against}", color="team",
                    title=f"<b>Percentage of attempts from the {selected_dir} in {league_str} league</b>", markers=True,
                    hover_name="team", hover_data=["season", f"{selected_dir}_{selected_for_or_against}", "league"])

        fig.update_layout(
            title_font_size=20,
            hoverlabel=dict(
                bgcolor="white",
                font_size=16,
            ),
            showlegend=False
        )

        fig.update_traces(
            opacity=0.4,
            line_color='rgb(189,189,189)'
        )
    mean = filtered_df_league.groupby("season")[f"{selected_dir}_{selected_for_or_against}"].mean()
    fig.add_trace(go.Scatter(
        x=mean.index,
        y=mean.values,
        line=dict(
            width=5,
            color="rgb(49,130,189)"
        ),
        name="Average",
        showlegend=False
    ))

    fig.update_layout(
        yaxis=dict(
            tickformat=".0%",
            showgrid=True
        ),
        xaxis=dict(
            tickvals=[i for i in range(2009, 2022)],
            ticktext=["2009-2010", "2010-2011", "2011-2012", "2012-2013", "2013-2014", "2014-2015",
                      "2015-2016", "2016-2017", "2017-2018", "2018-2019", "2019-2020", "2020-2021", "2021-2022"],
            showgrid=True
        ),
    )

    return fig

app.run_server(debug=True, mode="external")

Dash app running on http://127.0.0.1:8050/
