In [2]:
import os

import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from dash import dcc, html, Input, Output
from jupyter_dash import JupyterDash
px.defaults.template = "simple_white"
os.getcwd()

'/home/viet/OneDrive/Studying_Materials/Data_Visualization/FootballVizualization/oss'

In [3]:
file_name_dict = {
    "zone_against" : "stage-attempt-zones-against.csv",
    "zone_for" : "stage-attempt-zones-against.csv",
    "direction_against" : "stage-attempt-directions-against.csv",
    "direction_for" : "stage-attempt-directions-for.csv",
    "offensive" : "stage-team-stats-offensive.csv"
}
LEAGUE = ["Bundesliga", "EPL", "LaLiga", "Ligue1", "SerieA"]

def gather_season_all_league():
    df = pd.DataFrame()
    for mode in ["for", "against"]:
        for league in LEAGUE:
            df = pd.concat([df, gather_season(league, mode)], ignore_index=False)
    return df


def gather_season(league, mode):
    assert mode in ["for", "against"]
    if isinstance(league, str):
        assert league in LEAGUE, "ayo wrong league name dude"
    else:
        assert False, "wrong type mah dude"

    df_league = pd.DataFrame()
    season_path = os.path.join("../data", league)
    for folder_name in os.listdir(season_path):
        df_temp = pd.read_csv(os.path.join(season_path, f"{folder_name}/stage-attempt-directions-{mode}.csv"))
        # season = folder_name.replace("_", "-")
        season = folder_name.split("_")[0]
        df_temp["Season"] = [season for i in range(len(df_temp))]
        df_temp.drop(["R"], axis=1, inplace=True)

        df_league = pd.concat([df_league, df_temp], ignore_index=True)
    
    df_league = df_league.applymap(lambda x: int(x[:-1])/100 if x.__contains__("%") else x)
    df_league["Season"] = pd.to_numeric(df_league["Season"])
    # df_league["Left Side"] = pd.to_numeric(df_league["Left Side"])
    # df_league["Right Side"] = pd.to_numeric(df_league["Right Side"])
    # df_league["Attempts from the middle"] = pd.to_numeric(df_league["Attempts from the middle"])
    df_league["League"] = pd.Series([league for i in range(len(df_league))])
    df_league["for_or_against"] = pd.Series([mode for i in range(len(df_league))])

    df_league.rename(
        columns = {
            'Team':'team', 
            'Left Side':'left',
            'Attempts from the middle':'middle',
            'Right Side':'right',
            'Season':'season',
            'League':'league'
        }, 
        inplace = True)
    
    return df_league.sort_values(by=['season'])

In [4]:
df = gather_season_all_league()

In [5]:
df.head(20)

Unnamed: 0,team,left,middle,right,season,league,for_or_against
197,Borussia Dortmund,0.15,0.67,0.18,2009,Bundesliga,for
180,Hamburger SV,0.27,0.55,0.18,2009,Bundesliga,for
181,Nuernberg,0.27,0.52,0.22,2009,Bundesliga,for
182,Wolfsburg,0.26,0.59,0.15,2009,Bundesliga,for
183,Werder Bremen,0.25,0.6,0.15,2009,Bundesliga,for
184,Borussia M.Gladbach,0.25,0.58,0.17,2009,Bundesliga,for
185,Bochum,0.24,0.57,0.19,2009,Bundesliga,for
186,Freiburg,0.23,0.58,0.2,2009,Bundesliga,for
187,Schalke 04,0.22,0.65,0.13,2009,Bundesliga,for
196,Eintracht Frankfurt,0.17,0.68,0.14,2009,Bundesliga,for


In [6]:
df[df["for_or_against"] == "against"]

Unnamed: 0,team,left,middle,right,season,league,for_or_against
197,FC Koln,0.18,0.67,0.15,2009,Bundesliga,against
180,Werder Bremen,0.24,0.55,0.21,2009,Bundesliga,against
181,Mainz 05,0.24,0.58,0.18,2009,Bundesliga,against
182,Bayern Munich,0.23,0.55,0.23,2009,Bundesliga,against
183,Hertha Berlin,0.23,0.62,0.16,2009,Bundesliga,against
...,...,...,...,...,...,...,...
236,Roma,0.17,0.66,0.17,2021,SerieA,against
237,Sassuolo,0.17,0.71,0.13,2021,SerieA,against
238,Genoa,0.17,0.68,0.15,2021,SerieA,against
239,Udinese,0.15,0.71,0.15,2021,SerieA,against


In [7]:
len(df)

2548

### Test direction plot

In [8]:
fig = px.line(df[df["for_or_against"] == "for"], x="season", y="right", color="team", title="right", markers=True)

fig.update_traces(
    opacity=0.4,
    line_color='rgb(189,189,189)'
)

mean = df.groupby("season")["right"].mean()
fig.add_trace(go.Scatter(
    x=mean.index,
    y=mean.values,
    line=dict(
        width=4,
        color="rgb(49,130,189)"
    ),
    name="Average"
))

fig.update_layout(
    yaxis = dict(
        tickformat=".0%",
        showgrid=True
    ), 
    xaxis = dict(
        tickvals = [i for i in range(2009, 2022)],
        ticktext = ["2009-2010", "2010-2011", "2011-2012", "2012-2013", "2013-2014", "2014-2015", "2015-2016", "2016-2017", "2017-2018", "2018-2019", "2019-2020", "2020-2021", "2021-2022"],
        showgrid=True
    ),
    showlegend=False
)

### Interactive percentage of attempts from each direction for the last 10 years

In [9]:
from dash import dcc, html, Input, Output, State, ctx
from jupyter_dash import JupyterDash
import plotly.express as px
from dash.exceptions import PreventUpdate

app = JupyterDash(__name__)

app.layout = html.Div([
    html.Div([
        html.Div([
            html.H3('League'),
            dcc.Checklist(
                options=[
                    {
                        "label": html.Div(
                            [
                                html.Img(
                                    src="./assets/flags/germany-flag-icon-16.png"),
                                " Bundesliga",
                            ], style={'display': 'inline-block', 'marginTop': '5px'}
                        ),
                        "value": "Bundesliga",
                    },
                    {
                        "label": html.Div(
                            [
                                html.Img(
                                    src="./assets/flags/england-flag-icon-16.png", alt='image'),
                                " EPL",
                            ], style={'display': 'inline-block', 'marginTop': '5px'}
                        ),
                        "value": "EPL",
                    },
                    {
                        "label": html.Div(
                            [
                                html.Img(
                                    src="./assets/flags/spain-flag-icon-16.png", alt='image'),
                                " LaLiga",
                            ], style={'display': 'inline-block', 'marginTop': '5px'}
                        ),
                        "value": "LaLiga",
                    },
                    {
                        "label": html.Div(
                            [
                                html.Img(
                                    src="./assets/flags/france-flag-icon-16.png", alt='image'),
                                " Ligue 1",
                            ], style={'display': 'inline-block', 'marginTop': '5px'}
                        ),
                        "value": "Ligue1",
                    },
                    {
                        "label": html.Div(
                            [
                                html.Img(
                                    src="./assets/flags/italy-flag-icon-16.png", alt='image'),
                                " Serie A",
                            ], style={'display': 'inline-block', 'marginTop': '5px'}
                        ),
                        "value": "SerieA",
                    },
                ],
                value=["Bundesliga"],
                id="league",
                labelStyle={'display': 'block'},
                style={"height":150, "width":200, "overflow":"auto"}
            )
        ], style={'padding': 10, 'flex': 1}),
        html.Div([
            html.H3('Direction'),
            dcc.RadioItems(
                {
                    "right": "Right Side",
                    "left": "Left Side",
                    "middle": "Middle"
                },
                "right",
                id="direction",
                labelStyle={'display': 'block'},
                style={"height":150, "width":200, "overflow":"auto"}
            )
        ], style={'padding': 10, 'flex': 1}),
        html.Div([
            html.H3('Attempts for or against'),
            dcc.RadioItems(
                {
                    "for": "Attempts for",
                    "against": "Attempts against"
                },
                "for",
                id="for_or_against",
                labelStyle={'display': 'block'},
                style={"height":150, "width":200, "overflow":"auto"}
            )
        ], style={'padding': 10, 'flex': 1}),
        html.Div([
            html.H3('Team'),
            dcc.Dropdown([], id='team_dropdown', multi=True),
            html.Div(id='dd-output-container')
        ], style={'padding': 10, 'flex': 1})
    ], style={'display': 'flex', 'flex-direction': 'row'}),

    
    dcc.Graph(
        id='graph',
    )
])


@app.callback(
    Output("team_dropdown", "options"),
    Input('league', 'value'))
def update_team_dropdown(selected_league):
    mask = None
    for i in range(len(selected_league)):
        if i == 0:
            mask = (df.league == selected_league[i])
        else:
            mask |= (df.league == selected_league[i])
    if mask is not None:
        filtered_df = df[mask]
        return sorted(filtered_df["team"].unique())
    return []


@app.callback(
    Output('graph', 'figure'),
    Input('team_dropdown', 'value'),
    Input('league', 'value'),
    Input('direction', 'value'),
    Input('for_or_against', 'value'))
def update_figure(selected_team, selected_league, selected_dir, selected_for_or_against):
    if len(selected_league) == 0:  # if no league is selected then return an empty plot
        fig = px.line()

        fig.update_layout(
            yaxis=dict(
                tickformat=".0%",
                showgrid=True
            ),
            xaxis=dict(
                tickvals=[i for i in range(2009, 2022)],
                ticktext=["2009-2010", "2010-2011", "2011-2012", "2012-2013", "2013-2014", "2014-2015",
                          "2015-2016", "2016-2017", "2017-2018", "2018-2019", "2019-2020", "2020-2021", "2021-2022"],
                showgrid=True
            ),
            showlegend=False
        )
        return fig

    filtered_df = df[df["for_or_against"]
                              == selected_for_or_against]

    mask = None
    for i in range(len(selected_league)):
        if i == 0:
            mask = (filtered_df.league == selected_league[i])
        else:
            mask |= (filtered_df.league == selected_league[i])
    filtered_df_league = filtered_df[mask]

    mask_team = None
    if ctx.triggered_id == "team_dropdown":
        for i in range(len(selected_team)):
            if i == 0:
                mask_team = (filtered_df.team == selected_team[i])
            else:
                mask_team |= (filtered_df.team == selected_team[i])

    if mask_team is not None: 
        mask_team &= mask
        filtered_df_league_team = filtered_df[mask_team]

        league_str = ", ".join(selected_league)
        fig = px.line(filtered_df_league_team, x="season", y=selected_dir, color="team",
                title=f"Percentage of attempts from the {selected_dir} in {league_str} league", markers=True,
                hover_name="team", hover_data=["season", selected_dir, "league"])

        fig.update_layout(
            title_font_size=20,
            hoverlabel=dict(
                bgcolor="white",
                font_size=16,
            )
        )

        fig.update_traces(
            opacity=0.4,
        )
    else:
        league_str = ", ".join(selected_league)
        fig = px.line(filtered_df_league, x="season", y=selected_dir, color="team",
                    title=f"Percentage of attempts from the {selected_dir} in {league_str}", markers=True,
                    hover_name="team", hover_data=["season", selected_dir, "league"])

        fig.update_layout(
            title_font_size=20,
            hoverlabel=dict(
                bgcolor="white",
                font_size=16,
            ),
            showlegend=False
        )

        fig.update_traces(
            opacity=0.4,
            line_color='rgb(189,189,189)'
        )
    mean = filtered_df_league.groupby("season")[selected_dir].mean()
    fig.add_trace(go.Scatter(
        x=mean.index,
        y=mean.values,
        line=dict(
            width=5,
            color="rgb(49,130,189)"
        ),
        name="Average",
        showlegend=False
    ))

    fig.update_layout(
        yaxis=dict(
            tickformat=".0%",
            showgrid=True
        ),
        xaxis=dict(
            tickvals=[i for i in range(2009, 2022)],
            ticktext=["2009-2010", "2010-2011", "2011-2012", "2012-2013", "2013-2014", "2014-2015",
                      "2015-2016", "2016-2017", "2017-2018", "2018-2019", "2019-2020", "2020-2021", "2021-2022"],
            showgrid=True
        ),
    )

    return fig

app.run_server(debug=True, mode="external")

Dash app running on http://127.0.0.1:8050/
