**Topic:** Data Visualization Dashboard

**Domain:** EPL Football Analytics 22/23

**Tools used:**


1.   Python.
2.   Pandas.
3.   The SciPy.
4.   Matplotlib and Plotly to create visualizations.
5.   Dash for the dashboard.



**Dataset:**

Dataset used from https://fbref.com/en/comps/9/Premier-League-Stats . The datasets directory has a total of 20 teams.


In [None]:
!conda --version

conda 22.11.1


In [None]:
!pip install -q condacolab
import condacolab
condacolab.install()

[0m✨🍰✨ Everything looks OK!


In [None]:
!conda --version

conda 22.11.1


In [None]:
!pip install jupyter-dash

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
[0m

In [None]:
!pip install pycountry

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
[0m

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
from jupyter_dash import JupyterDash
from dash import Dash, dcc, html, Input, Output
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
import numpy as np
import os
import pycountry
import matplotlib.pyplot as plt
from matplotlib.patches import Arc
from io import BytesIO
import base64
import scipy.stats as ss
import seaborn as sns

In [None]:
def fig_to_uri(in_fig, close_all=True, **save_args):
    # type: (plt.Figure) -> str
    """
    Save a figure as a URI
    :param in_fig:
    :return:
    """
    out_img = BytesIO()
    in_fig.savefig(out_img, format='png', **save_args)
    if close_all:
        in_fig.clf()
        plt.close('all')
    out_img.seek(0)  # rewind file
    encoded = base64.b64encode(out_img.read()).decode("ascii").replace("\n", "")
    return "data:image/png;base64,{}".format(encoded)

In [None]:
![image](https://user-images.githubusercontent.com/41315903/168644061-b3373db2-b2ca-49fa-87ff-7349bbba6eff.png)

/bin/bash: -c: line 0: syntax error near unexpected token `https://user-images.githubusercontent.com/41315903/168644061-b3373db2-b2ca-49fa-87ff-7349bbba6eff.png'
/bin/bash: -c: line 0: `[image](https://user-images.githubusercontent.com/41315903/168644061-b3373db2-b2ca-49fa-87ff-7349bbba6eff.png)'


In [None]:
![image](https://user-images.githubusercontent.com/41315903/168644319-760e9b6d-2bc2-4843-ace6-65ccd4eda62c.png)

/bin/bash: -c: line 0: syntax error near unexpected token `https://user-images.githubusercontent.com/41315903/168644319-760e9b6d-2bc2-4843-ace6-65ccd4eda62c.png'
/bin/bash: -c: line 0: `[image](https://user-images.githubusercontent.com/41315903/168644319-760e9b6d-2bc2-4843-ace6-65ccd4eda62c.png)'


In [None]:
# PLEASE CHANGE BELOW PATH AS NECESSARY
path = '/content/drive/MyDrive/Colab Notebooks/DAV_Group_Assignment_2023/datasets'
dir_list = os.listdir(path)

all_options = {} 
position_dict = {'MF': 'Midfielder', 'DF': 'Defender', 'GK': 'Goalkeeper', 'FW': 'Forward'}

for team_name in dir_list:
    players_list = []
    new_path = path+'/content/drive/MyDrive/Colab Notebooks/DAV_Group_Assignment_2023/datasets/Arsenal/Arsenal.csv'.format(team_name, team_name)
    df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/DAV_Group_Assignment_2023/datasets/Arsenal/Arsenal.csv', skipfooter=2, engine='python')
    df = df[df['Player'].notna()]
    players_list = df['Player'].tolist()
    all_options[team_name] = players_list

In [None]:
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']

app = JupyterDash(__name__, external_stylesheets=external_stylesheets)

app.layout = html.Div(children=[
    html.H1("English Premier League Football Analytics 22/23", style={'text-align':'center', 'padding-top': '25px', 
                                                               'background-color':'red', 'padding-bottom': '25px',
                                                              'color': 'white'}),
    
    html.P(children="An Analysis of EPL Football 2022-23 Season", style={'text-align':'center',
                                                                                        'font-size': '20px'}),
      
    html.Hr(style={'border': '0', 'height': '2px', 'width':'75%',
                   'background-image': 'linear-gradient(to right, rgba(0, 0, 0, 0), rgba(0, 0, 0, 0.75), rgba(0, 0, 0, 0))'}),
    
    html.Br(),

    html.Div([
    html.P(children="Teams:", style={'text-align':'left'}),    
    dcc.Dropdown(
        list(all_options.keys()),
        'Arsenal',
        id='teams-dropdown',
        style={'border-radius': '10px', 'box-shadow': '0 0 2px rgb(204, 204, 204)'}
    ),
    html.Hr(),
    html.P(children="Players:", style={'text-align':'left'}),
    dcc.Dropdown(id='players-dropdown', style={'border-radius': '10px', 'box-shadow': '0 0 2px rgb(204, 204, 204)'}),
    html.Hr()
    ], 
        style={'width': '15%', 'display': 'inline-block', 'position': 'relative', 
               'padding': '25px 100px 0px 100px',  'vertical-align': 'top'}
    ),
    
    html.Div(children=[
        html.Div(id='display-player-name', style = {'padding-left': '10px'}),
        html.Br(),
        html.Div(id='display-player-age', style = {'padding-left': '10px'}),
        html.Br(),
        html.Div(id='display-player-country', style = {'padding-left': '10px'}),
        html.Br(),
        html.Div(id='display-player-position', style = {'padding-left': '10px'}),
        html.Br(),
        dcc.Graph(id='stats-table', figure={}, style={'width': '50vh', 'height': '36vh'})
    ], style={'width': '20%', 'display': 'inline-block', 'vertical-align': 'top'}
    ),
    
    
    html.Div([
    dcc.Graph(id='matches-played-graph', figure={})
    ], style={'width': '20%', 'display': 'inline-block',  'vertical-align': 'top'}
    ),
    
    html.Div([
    dcc.Graph(id='minutes-played-graph', figure={})
    ], style={'width': '28%', 'display': 'inline-block',  'vertical-align': 'top'}
    ),
    
    html.Hr(style={'border': '0', 'height': '2px', 'width':'75%',
                   'background-image': 'linear-gradient(to right, rgba(0, 0, 0, 0), rgba(0, 0, 0, 0.75), rgba(0, 0, 0, 0))'}),
    
    html.Br(),
    
    html.Div([
    dcc.Graph(id='pass-percent-graph', figure={})
    ], style={'width': '38%', 'display': 'inline-block',  'vertical-align': 'top', 'padding-left': '100px'}
    ),
    
    html.Div([
    dcc.Graph(id='lollipop-graph', figure={})
    ], style={'width': '50%', 'display': 'inline-block',  'vertical-align': 'top'}
    ),
    
    html.Hr(style={'border': '0', 'height': '2px', 'width':'75%',
                   'background-image': 'linear-gradient(to right, rgba(0, 0, 0, 0), rgba(0, 0, 0, 0.75), rgba(0, 0, 0, 0))'}),
    
    html.Br(),
    
    html.Img(id = 'def-actions-graph', src = '', style={'padding-left': '100px', 'width': '38%', 'display': 'inline-block',  'vertical-align': 'top'}),
    
    html.Img(id = 'touches-graph', src = '', style={'padding-left': '100px', 'width': '38%', 'display': 'inline-block',  'vertical-align': 'top'}),
    
    html.Footer(children=[
        html.P(children="", style={'text-align':'center',
                                                                                        'font-size': '20px',
                                                                                        'color': 'white',
                                                                                        'padding-top': '20px'}),
    ],
    style={'height': '80px', 'background-color': 'black'})
  ]
)


@app.callback(
    Output('players-dropdown', 'options'),
    Input('teams-dropdown', 'value'))
def set_players_options(selected_team):
    return [{'label': i, 'value': i} for i in all_options[selected_team]]


@app.callback(
    Output('players-dropdown', 'value'),
    Input('players-dropdown', 'options'))
def set_players_value(available_options):
    return available_options[0]['value']


@app.callback(
    Output('display-player-name', 'children'),
    Output('display-player-age', 'children'),
    Output('display-player-country', 'children'),
    Output('display-player-position', 'children'),
    Output('stats-table', 'figure'),
    Output('matches-played-graph', 'figure'),
    Output('minutes-played-graph', 'figure'),
    Output('pass-percent-graph', 'figure'),
    Output('lollipop-graph', 'figure'),
    Output('def-actions-graph', 'src'),
    Output('touches-graph', 'src'),
    Input('teams-dropdown', 'value'),
    Input('players-dropdown', 'value'))
def set_display_children(selected_team, selected_player):
    new_path = path+'/content/drive/MyDrive/Colab Notebooks/DAV_Group_Assignment_2023/datasets/Arsenal/Arsenal.csv'.format(selected_team, selected_team)
    df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/DAV_Group_Assignment_2023/datasets/Arsenal/Arsenal.csv', skipfooter=2, engine='python')
    df = df[df['Player'].notna()]
    values = ['Squad Total', 'Opponent Total']
    df = df[df.Player.isin(values) == False]

    try:
        age = df.loc[df['Player'] == selected_player].Age.values[0].split('-')[0]
    except:
        age = 0
    
    try:
        code = df.loc[df['Player'] == selected_player].Nation.values[0].split(' ')[1]
        if code == 'GER':
            country_name = pycountry.countries.get(alpha_3='DEU').name
        elif code =='DEN':
            country_name = pycountry.countries.get(alpha_3='DNK').name
        elif code == 'POR':
            country_name = pycountry.countries.get(alpha_3='PRT').name
        elif code not in ['ENG', 'WAL', 'NIR', 'SCO']:
            country_name = pycountry.countries.get(alpha_3=code).name
        else:
            country_name = 'United Kingdom'
    except:
        country_name = 'Not Specified'
    
    try:
        pos_list = df[df['Player'] == selected_player].Pos.values[0].split(',')
        player_position = ""
        for position in pos_list:
            player_position += position_dict[position]+' '
        player_position = player_position.strip().replace(' ', ', ')
    except:
        player_position = 'Unknown'
    
    new_df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/DAV_Group_Assignment_2023/datasets/Arsenal/Arsenal.csv')
    new_df = new_df[new_df['Player'].notna()]
    selected_player_df = new_df[new_df['Player'] == selected_player]
    
    try:
        matches_played = selected_player_df.Matches_Played.values[0]
    except:
        matches_played = 0
    try:
        starts = selected_player_df.Starts.values[0]
    except:
        starts = 0
    try:
        matches_unplayed = new_df[new_df['Player'] == 'Squad Total'].Matches_Played.values[0] - matches_played
    except:
        matches_unplayed = 30 - matches_unplayed
    games = {'Player':[selected_player]*3, 'Type': ['Starts', 'Substitute', 'Unplayed Matches'],'Number of Games':[starts, matches_played-starts, matches_unplayed]}
    matches_df = pd.DataFrame(data=games)
    sbc = px.bar(matches_df, x="Player", y="Number of Games", color="Type", title="Playing Time", template='simple_white')
    sbc.update_layout(
            title={
            'x':0.5,
            'xanchor': 'center'
        })
    
    try:
        total_minutes = int(new_df[new_df['Player'] == 'Squad Total'].Minutes_Played.values[0].replace(',',''))
    except:
        total_minutes = 0
    try:
        minutes_played = int(selected_player_df.Minutes_Played.values[0].replace(',',''))
    except:
        minutes_played = 0
    minutes_games = {'Player':[selected_player]*2, 'Type': ['Minutes Benched', 'Minutes Played'],'Minutes':[total_minutes-minutes_played, minutes_played]}
    minutes_df = pd.DataFrame(data=minutes_games)
    pc = px.pie(minutes_df, values='Minutes', names='Type', title='Minutes Played', template='simple_white')
    pc.update_layout(
            title={
            'x':0.5,
            'xanchor': 'center'
        })
    
    try:
        goals = selected_player_df.Gls.values[0]
    except:
        goals = 0
    try:
        assists = selected_player_df.Ast.values[0]
    except:
        assists = 0
    tab = go.Figure(data=[go.Table(header=dict(values=['Statistic', 'Value']),
                     cells=dict(values=[['Goals', 'Assists'], [goals, assists]]))
                         ], layout={'title':'Player Performance'})
    
 
    passing_short = selected_player_df['Short_Cmp%'].values[0]
    passing_medium = selected_player_df['Medium_Cmp%'].values[0]
    passing_long = selected_player_df['Long_Cmp%'].values[0]
    passing_dict = {'Player': [selected_player]*3, 'Passing Type': ['Short', 'Medium', 'Long'], 
                    'Passing Percentage':[passing_short, passing_medium, passing_long]}
    passing_df = pd.DataFrame(data=passing_dict)
    passing_df["Color"] = np.where(passing_df["Passing Percentage"]<passing_df["Passing Percentage"].max(), 'grey', 'red')
    p_bc = go.Figure()
    p_bc.add_trace(
        go.Bar(name='Passing Percentage',
               x=passing_df['Passing Type'],
               y=passing_df['Passing Percentage'],
               marker_color=passing_df['Color']))
    p_bc.update_layout(template='simple_white', title={
        'text': "Prominent Passing Style",
        'x':0.5,
        'xanchor': 'center'
    })
    p_bc.update_xaxes(title_text="Passing Type")
    p_bc.update_yaxes(title_text="Percentage")
    
    try:
        player_xG = selected_player_df.xG.values[0]
    except:
        player_xG = 0
    try:
        player_xA = selected_player_df.xA.values[0]
    except:
        player_xA = 0
    xG_mean = df.xG.mean()
    xA_mean = df.xA.mean()
    expected_values = [player_xG, player_xA]
    mean_values = [xG_mean, xA_mean]
    metric = ['xG','xA']
    fig = go.Figure()
    fig.add_trace(go.Scatter(
        x=expected_values,
        y=metric,
        name='Actual Values',
        marker=dict(
            color='LightSkyBlue',
            line_color='rgba(156, 165, 196, 1.0)',
        )
    ))
    fig.add_trace(go.Scatter(
        x=mean_values, y=metric,
        name='Team Mean',
        marker=dict(
            color='rgba(204, 204, 204, 0.95)',
            line_color='rgba(217, 217, 217, 1.0)'
        )
    ))
    for i in range(0, 2):
                   fig.add_shape(type='line',
                                  x0 = mean_values[i], y0 = i,
                                  x1 = expected_values[i],
                                  y1 = i,
                                  line=dict(color='grey', width = 3))
    fig.update_traces(mode='markers', marker=dict(line_width=1, symbol='circle', size=16))
    fig.update_layout(
        title={'text':'Expected Goals', 'x':0.5, 'xanchor': 'center'},
        xaxis=dict(
            linecolor='rgb(102, 102, 102)',
            tickfont_color='rgb(102, 102, 102)',
            ticks='outside',
            tickcolor='rgb(102, 102, 102)',
        ),
        yaxis=dict(
            ticks='outside',
            tickcolor='rgb(102, 102, 102)',
        ),
        margin=dict(l=140, r=40, b=50, t=80),
        legend=dict(
            font_size=10,
        ),
        paper_bgcolor='white',
        plot_bgcolor='white',
        hovermode='closest',
    )
    fig.update_xaxes(title = 'Expected')
    fig.update_yaxes(title = 'Metric')
    
    try:
        tackles_def_3rd = selected_player_df.Tackles_Def_3rd.values[0]
    except:
        tackles_def_3rd = 0
    try:
        tackles_mid_3rd = selected_player_df['Tackles_Mid 3rd'].values[0]
    except:
        tackles_mid_3rd = 0
    try:
        tackles_att_3rd = selected_player_df.Tackles_Att_3rd.values[0]
    except:
        tackles_att_3rd = 0
    tackles_list = [tackles_def_3rd, tackles_mid_3rd, tackles_att_3rd]
    rank_list = ss.rankdata(tackles_list, method='max')
    color_list = ['#Fddbdb', '#B35050', '#770505']
    no_data_color = '#FFFFFF'

    da_field=plt.figure()
    da_field.set_size_inches(7, 5)
    ax=da_field.add_subplot(1,1,1)

    plt.plot([0,0],[0,90], color="black")
    plt.plot([0,130],[90,90], color="black")
    plt.plot([130,130],[90,0], color="black")
    plt.plot([130,0],[0,0], color="black")
    plt.plot([65,65],[0,90], color="black")

  
    plt.plot([16.5,16.5],[65,25],color="black")
    plt.plot([0,16.5],[65,65],color="black")
    plt.plot([16.5,0],[25,25],color="black")


    plt.plot([130,113.5],[65,65],color="black")
    plt.plot([113.5,113.5],[65,25],color="black")
    plt.plot([113.5,130],[25,25],color="black")

  
    plt.plot([0,5.5],[54,54],color="black")
    plt.plot([5.5,5.5],[54,36],color="black")
    plt.plot([5.5,0.5],[36,36],color="black")

    
    plt.plot([130,124.5],[54,54],color="black")
    plt.plot([124.5,124.5],[54,36],color="black")
    plt.plot([124.5,130],[36,36],color="black")

 
    centreCircle = plt.Circle((65,45),9.15,color="black",fill=False)
    centreSpot = plt.Circle((65,45),0.8,color="black")
    leftPenSpot = plt.Circle((11,45),0.8,color="black")
    rightPenSpot = plt.Circle((119,45),0.8,color="black")


    ax.add_patch(centreCircle)
    ax.add_patch(centreSpot)
    ax.add_patch(leftPenSpot)
    ax.add_patch(rightPenSpot)


    leftArc = Arc((11,45),height=18.3,width=18.3,angle=0,theta1=310,theta2=50,color="black")
    rightArc = Arc((119,45),height=18.3,width=18.3,angle=0,theta1=130,theta2=230,color="black")


    ax.add_patch(leftArc)
    ax.add_patch(rightArc)


    plt.axis('off')

    y = np.arange(0,90,0.1)

    
    if tackles_list[0]==0:
        plt.fill_betweenx(y, 0, 43.33, alpha=.9, color=no_data_color)
    else:
        plt.fill_betweenx(y, 0, 43.33, alpha=.9, color=color_list[rank_list[0]-1])
    if tackles_list[1]==0:
        plt.fill_betweenx(y, 43.33, 86.66, alpha=.9, color=no_data_color)
    else:
        plt.fill_betweenx(y, 43.33, 86.66, alpha=.9, color=color_list[rank_list[1]-1])
    if tackles_list[2]==0:
        plt.fill_betweenx(y, 86.66, 130, alpha=.9, color=no_data_color)
    else:
        plt.fill_betweenx(y, 86.66, 130, alpha=.9, color=color_list[rank_list[2]-1])

    # plt.axvline(x = 43.33, color = 'b', label = 'axvline - full height')
    # plt.axvline(x = 86.66, color = 'b', label = 'axvline - full height')

    plt.ylim(0, 90)
    plt.xlim(0, 130)

    plt.title("Tackles Heatmap", fontdict={'fontsize':18})
    plt.text(23, 45, str(tackles_list[0]), fontsize = 15, fontstyle='italic')
    plt.text(65, 45, str(tackles_list[1]), fontsize = 15, fontstyle='italic')
    plt.text(107, 45, str(tackles_list[2]), fontsize = 15, fontstyle='italic')
    out_url = fig_to_uri(da_field)
    plt.close()
    
    
    try:
        touches_def_pen = selected_player_df.Touches_Def_Pen.values[0]
    except:
        touches_def_pen = 0
    try:
        touches_def_3rd = selected_player_df.Touches_Def_3rd.values[0]
    except:
        touches_def_3rd = 0
    try:
        touches_mid_3rd = selected_player_df.Touches_Mid_3rd.values[0]
    except:
        touches_mid_3rd = 0
    try:
        touches_att_3rd = selected_player_df.Touches_Att_3rd.values[0]
    except:
        touches_att_3rd = 0
    try:
        touches_att_pen = selected_player_df.Touches_Att_Pen.values[0]
    except:
        touches_att_pen = 0
    touches_list = [touches_def_pen, touches_def_3rd, touches_mid_3rd, touches_att_3rd, touches_att_pen]
    rank_list = ss.rankdata(touches_list, method='max')
    color_list = ['#Fff5f5', '#Fdc5c5', '#Ce6565', '#D61b1b', '#810606']

  
    t_field=plt.figure()
    t_field.set_size_inches(7, 5)
    ax=t_field.add_subplot(1,1,1)

    
    plt.plot([0,0],[0,90], color="black")
    plt.plot([0,130],[90,90], color="black")
    plt.plot([130,130],[90,0], color="black")
    plt.plot([130,0],[0,0], color="black")
    plt.plot([65,65],[0,90], color="black")

    plt.plot([16.5,16.5],[65,25],color="black")
    plt.plot([0,16.5],[65,65],color="black")
    plt.plot([16.5,0],[25,25],color="black")


    plt.plot([130,113.5],[65,65],color="black")
    plt.plot([113.5,113.5],[65,25],color="black")
    plt.plot([113.5,130],[25,25],color="black")


    plt.plot([0,5.5],[54,54],color="black")
    plt.plot([5.5,5.5],[54,36],color="black")
    plt.plot([5.5,0.5],[36,36],color="black")

    
    plt.plot([130,124.5],[54,54],color="black")
    plt.plot([124.5,124.5],[54,36],color="black")
    plt.plot([124.5,130],[36,36],color="black")

    
    centreCircle = plt.Circle((65,45),9.15,color="black",fill=False)
    centreSpot = plt.Circle((65,45),0.8,color="black")
    leftPenSpot = plt.Circle((11,45),0.8,color="black")
    rightPenSpot = plt.Circle((119,45),0.8,color="black")

   
    ax.add_patch(centreCircle)
    ax.add_patch(centreSpot)
    ax.add_patch(leftPenSpot)
    ax.add_patch(rightPenSpot)

    
    leftArc = Arc((11,45),height=18.3,width=18.3,angle=0,theta1=310,theta2=50,color="black")
    rightArc = Arc((119,45),height=18.3,width=18.3,angle=0,theta1=130,theta2=230,color="black")

  
    ax.add_patch(leftArc)
    ax.add_patch(rightArc)

  
    plt.axis('off')

    y = np.arange(0,90,0.1)
    x1 = np.arange(0,16.5,0.1)
    x2 = np.arange(113.5,130,0.1)

  
    if touches_list[1]==0:
        plt.fill_betweenx(y, 0, 43.33, alpha=.9, color=no_data_color)
    else:
        plt.fill_betweenx(y, 0, 43.33, alpha=.9, color=color_list[rank_list[1]-1])
    if touches_list[0]==0:
        plt.fill_between(x1, 25, 65, alpha=.9, color=no_data_color)
    else:
        plt.fill_between(x1, 25, 65, alpha=.9, color=color_list[rank_list[0]-1])
    if touches_list[2]==0:
        plt.fill_betweenx(y, 43.33, 86.66, alpha=.9, color=no_data_color)
    else:
        plt.fill_betweenx(y, 43.33, 86.66, alpha=.9, color=color_list[rank_list[2]-1])
    if touches_list[3]==0:
        plt.fill_betweenx(y, 86.66, 130, alpha=.9, color=no_data_color)
    else:
        plt.fill_betweenx(y, 86.66, 130, alpha=.9, color=color_list[rank_list[3]-1])
    if touches_list[4]==0:
        plt.fill_between(x2, 25, 65, alpha=.9, color=no_data_color)
    else:
        plt.fill_between(x2, 25, 65, alpha=.9, color=color_list[rank_list[4]-1])

    # plt.axvline(x = 43.33, color = 'b', label = 'axvline - full height')
    # plt.axvline(x = 86.66, color = 'b', label = 'axvline - full height')

    plt.ylim(0, 90)
    plt.xlim(0, 130)

    plt.title("Touches Heatmap", fontdict={'fontsize':18})
    plt.text(6.5, 45, str(touches_list[0]), fontsize = 15, fontstyle='italic')
    plt.text(25, 45, str(touches_list[1]), fontsize = 15, fontstyle='italic')
    plt.text(65, 45, str(touches_list[2]), fontsize = 15, fontstyle='italic')
    plt.text(98, 45, str(touches_list[3]), fontsize = 15, fontstyle='italic')
    plt.text(114.5, 45, str(touches_list[4]), fontsize = 15, fontstyle='italic')
    touches_url = fig_to_uri(t_field)
    
    return u'Player Name : {}'.format(selected_player), 'Age : {}'.format(age), 'Country : {}'.format(country_name), 'Position : {}'.format(player_position), tab, sbc, pc, p_bc, fig, out_url, touches_url


if __name__ == '__main__':
    app.run_server(debug=True)

Dash is running on http://127.0.0.1:8050/

Dash app running on:


<IPython.core.display.Javascript object>

**Predict 2023 EPL**

In [None]:
df

Unnamed: 0,Player,Nation,Pos,Age,Tackles_Def_3rd,Tackles_Mid 3rd,Tackles_Att_3rd,Pressures_Def_3rd,Pressure_Mid_3rd,Pressure_Att_3rd,...,Touches_Mid_3rd,Touches_Att_3rd,Touches_Att_Pen,Matches_Played,Starts,Minutes_Played,Gls,Ast,xG,xA
0,Ben White,eng ENG,DF,24-189,21,9,4,105,77,19,...,993,111,20,28,28,2520,0,0,1.0,0.5
1,Bukayo Saka,eng ENG,"FW,MF",20-222,9,13,5,76,151,153,...,491,781,184,30,28,2316,9,5,6.5,6.2
2,Gabriel Dos Santos,br BRA,DF,24-117,27,11,3,99,62,8,...,880,82,34,27,27,2376,3,0,2.3,0.8
3,Aaron Ramsdale,eng ENG,GK,23-336,0,0,0,2,0,0,...,9,0,0,26,26,2340,0,0,0.0,0.0
4,Martin Ødegaard,no NOR,MF,23-119,4,16,12,82,195,163,...,711,591,53,28,24,2066,6,3,3.8,4.8
5,Thomas Partey,gh GHA,MF,28-306,25,23,7,107,149,36,...,845,284,23,24,23,2028,2,1,2.5,1.3
6,Kieran Tierney,sct SCO,DF,24-314,11,7,3,60,58,45,...,613,472,44,22,22,1915,1,3,0.7,1.9
7,Alexandre Lacazette,fr FRA,"FW,MF",30-322,7,13,5,46,116,183,...,294,314,105,25,20,1709,4,7,7.8,1.9
8,Granit Xhaka,ch SUI,MF,29-200,15,14,5,72,100,59,...,694,317,35,19,19,1635,0,2,0.9,1.6
9,Emile Smith Rowe,eng ENG,"MF,FW",21-261,5,10,0,49,101,68,...,427,439,59,26,18,1657,9,2,5.1,2.1


In [None]:
df.head()

Unnamed: 0,Player,Nation,Pos,Age,Tackles_Def_3rd,Tackles_Mid 3rd,Tackles_Att_3rd,Pressures_Def_3rd,Pressure_Mid_3rd,Pressure_Att_3rd,...,Touches_Mid_3rd,Touches_Att_3rd,Touches_Att_Pen,Matches_Played,Starts,Minutes_Played,Gls,Ast,xG,xA
0,Ben White,eng ENG,DF,24-189,21,9,4,105,77,19,...,993,111,20,28,28,2520,0,0,1.0,0.5
1,Bukayo Saka,eng ENG,"FW,MF",20-222,9,13,5,76,151,153,...,491,781,184,30,28,2316,9,5,6.5,6.2
2,Gabriel Dos Santos,br BRA,DF,24-117,27,11,3,99,62,8,...,880,82,34,27,27,2376,3,0,2.3,0.8
3,Aaron Ramsdale,eng ENG,GK,23-336,0,0,0,2,0,0,...,9,0,0,26,26,2340,0,0,0.0,0.0
4,Martin Ødegaard,no NOR,MF,23-119,4,16,12,82,195,163,...,711,591,53,28,24,2066,6,3,3.8,4.8


In [None]:
df.shape

(27, 26)

In [None]:
from sklearn.ensemble import RandomForestClassifier

In [None]:
rf = RandomForestClassifier(n_estimators=50, min_samples_split=10, random_state=1)

In [None]:
train = df

In [None]:
test = df

In [None]:
from sklearn.metrics import accuracy_score

In [None]:
from sklearn.metrics import precision_score

In [None]:
preds = rf.predict

In [None]:
18/31
## 58% correct prediction

0.5806451612903226

**Scraping**

In [None]:
import requests

In [None]:
standings_url = "https://fbref.com/en/comps/9/Premier-League-Stats"

In [None]:
data = requests.get(standings_url)

In [None]:
from bs4 import BeautifulSoup

In [None]:
soup = BeautifulSoup(data.text)
standings_table = soup.select('table.stats_table')[0]
links = standings_table.find_all('a')
links = [l.get("href") for l in links]
links = [l for l in links if '/squads/' in l]

In [None]:
team_urls = [f"https://fbref.com{l}" for l in links]

In [None]:
data = requests.get(team_urls[0])

In [None]:
matches = pd.read_html(data.text, match="Scores & Fixtures")[0]

In [None]:
soup = BeautifulSoup(data.text)
links = soup.find_all('a')
links = [l.get("href") for l in links]
links = [l for l in links if l and 'all_comps/shooting/' in l]

In [None]:
data = requests.get(f"https://fbref.com{links[0]}")

In [None]:
shooting = pd.read_html(data.text, match="Shooting")[0]

In [None]:
shooting.head()

Unnamed: 0_level_0,For Arsenal,For Arsenal,For Arsenal,For Arsenal,For Arsenal,For Arsenal,For Arsenal,For Arsenal,For Arsenal,For Arsenal,...,Standard,Standard,Standard,Standard,Expected,Expected,Expected,Expected,Expected,Unnamed: 25_level_0
Unnamed: 0_level_1,Date,Time,Comp,Round,Day,Venue,Result,GF,GA,Opponent,...,Dist,FK,PK,PKatt,xG,npxG,npxG/Sh,G-xG,np:G-xG,Match Report
0,2022-08-05,20:00,Premier League,Matchweek 1,Fri,Away,W,2,0,Crystal Palace,...,14.6,1.0,0,0,1.0,1.0,0.1,0.0,0.0,Match Report
1,2022-08-13,15:00,Premier League,Matchweek 2,Sat,Home,W,4,2,Leicester City,...,13.0,0.0,0,0,2.7,2.7,0.16,1.3,1.3,Match Report
2,2022-08-20,17:30,Premier League,Matchweek 3,Sat,Away,W,3,0,Bournemouth,...,14.8,0.0,0,0,1.3,1.3,0.1,1.7,1.7,Match Report
3,2022-08-27,17:30,Premier League,Matchweek 4,Sat,Home,W,2,1,Fulham,...,15.5,1.0,0,0,2.6,2.6,0.12,-0.6,-0.6,Match Report
4,2022-08-31,19:30,Premier League,Matchweek 5,Wed,Home,W,2,1,Aston Villa,...,16.3,1.0,0,0,2.4,2.4,0.12,-0.4,-0.4,Match Report
