In [25]:
import streamlit as st

In [26]:
st.__version__

'1.35.0'

In [27]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
# from snowflake.connector.pandas_tools import write_pandas
# from sqlalchemy import create_engine
import snowflake.connector

from mplsoccer import Pitch, VerticalPitch
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import math

from snowflake.connector.pandas_tools import write_pandas

In [28]:
import snowflake.connector

In [29]:
## Set options to view all columns
pd.set_option('display.max_columns', None)

In [30]:
SNOWFLAKE_USER = 'karan14'
SNOWFLAKE_PASSWORD = 'Snowfl@key0014'
SNOWFLAKE_ACCOUNT = 'lv65293.ca-central-1.aws'
SNOWFLAKE_WAREHOUSE = 'COMPUTE_WH'
SNOWFLAKE_DATABASE = 'GEGENSTATS'
SNOWFLAKE_SCHEMA = 'TABLES'

In [31]:
conn = snowflake.connector.connect(
    user=SNOWFLAKE_USER,
    password=SNOWFLAKE_PASSWORD,
    account=SNOWFLAKE_ACCOUNT,
    warehouse=SNOWFLAKE_WAREHOUSE,
    database=SNOWFLAKE_DATABASE,
    schema=SNOWFLAKE_SCHEMA
)

In [32]:
cursor = conn.cursor()

In [33]:
def fetch_data(cursor, query):
    cursor.execute(query)
    rows = cursor.fetchall()
    column_names = [desc[0] for desc in cursor.description]
    return pd.DataFrame(rows, columns=column_names)

In [34]:
team_names = fetch_data(cursor, 'SELECT * FROM TEAMS')

In [35]:
team_misc = fetch_data(cursor, 'SELECT * FROM TEAM_MISC_STATS')

In [36]:
team_standard = fetch_data(cursor, 'SELECT * FROM TEAM_STANDARD_STATS')

In [37]:
team_attacking = fetch_data(cursor, 'SELECT * FROM TEAM_ATTACKING_STATS')

In [38]:
team_defending = fetch_data(cursor, 'SELECT * FROM TEAM_DEFENDING_STATS')

In [39]:
df_competitions = fetch_data(cursor, 'SELECT COMPETITION, COMPETITION_ACRONYM, SEASON FROM COMPETITIONS')

In [40]:
df_matches = fetch_data(cursor, 'SELECT * FROM MATCHES')

In [41]:
df_shots = fetch_data(cursor, 'SELECT * FROM SHOT_EVENTS')

In [42]:
df_players = fetch_data(cursor, 'SELECT * FROM PLAYERS')

In [43]:
team_attacking = team_attacking.merge(df_competitions, on=['COMPETITION','SEASON'], how='left')

team_attacking = team_attacking.merge(team_names, on='TEAM_FBREF_ID', how='left')

team_attacking = team_attacking.merge(team_standard, on=['TEAM_FBREF_ID', 'SEASON', 'COMPETITION'], how='left')

In [44]:
team_attacking['SHOTS PER GAME'] = team_attacking['SHOTS']/team_attacking['MATCHES_PLAYED']

In [45]:
team_attacking['CONVERSION RATE (%)'] = team_attacking['GOALS_SCORED']/team_attacking['SHOTS']

In [46]:
team_attacking.head()

Unnamed: 0,TEAM_FBREF_ID,SEASON,COMPETITION,GOALS_SCORED,NPXG,XG,SHOTS,SHOTS_ON_TARGET,PASS_COMPLETED,PASS_ATTEMPTED,TAKEONS_ATTEMPTED,TAKEONS_COMPLETED,CROSSES_INTO_PA,FOULS_AGAINST,COMPETITION_ACRONYM,TEAM_NAME,TEAM_LOGO_URL,TEAM_WS_ID,MATCHES_PLAYED,TEAM_WINS,TEAM_DRAWS,TEAM_LOSSES,TEAM_PTS,TEAM_XPTS,SHOTS PER GAME,CONVERSION RATE (%)
0,7a54bb4f,2223,FRA-Ligue 1,23,24.22,31.06,311.0,81.0,11216.0,15286.0,630.0,283.0,71.0,495.0,Ligue 1,Ajaccio,https://i.imgur.com/FSm4pRb.png,610,38.0,7,5,26,26,33.12,8.184211,0.073955
1,e2d8892c,2223,FRA-Ligue 1,89,81.59,86.15,562.0,242.0,23694.0,26476.0,976.0,454.0,46.0,416.0,Ligue 1,Paris S-G,https://i.imgur.com/kuN3QSp.png,304,38.0,27,4,7,85,74.01,14.789474,0.158363
2,d2c87802,2223,FRA-Ligue 1,52,42.79,47.35,397.0,149.0,14837.0,18087.0,916.0,406.0,38.0,501.0,Ligue 1,Lorient,https://i.imgur.com/9k1Kgp6.png,146,38.0,15,10,13,55,42.43,10.447368,0.130982
3,fb08dbb3,2223,FRA-Ligue 1,44,35.32,41.4,417.0,132.0,12486.0,16452.0,655.0,270.0,94.0,462.0,Ligue 1,Brest,https://i.imgur.com/klKUp8n.png,2332,38.0,11,11,16,44,43.52,10.973684,0.105516
4,7fdd64e0,2223,FRA-Ligue 1,45,53.34,59.52,516.0,165.0,13670.0,17565.0,800.0,347.0,87.0,423.0,Ligue 1,Reims,https://i.imgur.com/xULypbI.png,950,38.0,12,15,11,51,55.35,13.578947,0.087209


In [47]:
attacking_efficiency_chart = team_attacking[['SEASON', 'COMPETITION_ACRONYM', 'TEAM_NAME', 'TEAM_LOGO_URL',
                                            'SHOTS PER GAME', 'CONVERSION RATE (%)']]

In [48]:
def upsert_to_snowflake(table_name, dataframe, primary_keys):
    if isinstance(primary_keys, str):
        primary_keys = [primary_keys]

    temp_table_name = f"{table_name}_TEMP"
    success, nchunks, nrows, _ = write_pandas(conn, dataframe, temp_table_name, auto_create_table=True)

    on_condition = ' AND '.join([f"{table_name}.{pk} = {temp_table_name}.{pk}" for pk in primary_keys])

    update_columns = [col for col in dataframe.columns if col not in primary_keys]
    update_sql = ', '.join([f"{table_name}.{col} = {temp_table_name}.{col}" for col in update_columns])

    merge_sql = f"""
    MERGE INTO {table_name} USING {temp_table_name}
    ON {on_condition}
    """

    if update_columns:
        merge_sql += f"""
        WHEN MATCHED THEN
            UPDATE SET
                {update_sql}
        """

    merge_sql += f"""
    WHEN NOT MATCHED THEN
        INSERT ({', '.join(dataframe.columns)})
        VALUES ({', '.join([f"{temp_table_name}.{col}" for col in dataframe.columns])})
    """

    cur.execute(merge_sql)
    cur.execute(f"DROP TABLE IF EXISTS {temp_table_name}")

    print(f"Upsert completed. {nrows} rows processed.")

In [49]:
SNOWFLAKE_SCHEMA = 'RADAR_CHARTS'

conn = snowflake.connector.connect(
    user=SNOWFLAKE_USER,
    password=SNOWFLAKE_PASSWORD,
    account=SNOWFLAKE_ACCOUNT,
    warehouse=SNOWFLAKE_WAREHOUSE,
    database=SNOWFLAKE_DATABASE,
    schema=SNOWFLAKE_SCHEMA
    )

cur = conn.cursor()
cur.execute(f"USE WAREHOUSE {SNOWFLAKE_WAREHOUSE}")

create_schema_sql = f"CREATE SCHEMA IF NOT EXISTS {SNOWFLAKE_SCHEMA}"
cur.execute(create_schema_sql)

<snowflake.connector.cursor.SnowflakeCursor at 0x143e0865be0>

In [50]:
create_table_query = """
CREATE TABLE IF NOT EXISTS TEAM_ATTACKING_EFFICIENCY_CHART (
    SEASON INT,
    COMPETITION_ACRONYM VARCHAR(255),
    TEAM_NAME VARCHAR(255),
    TEAM_LOGO_URL VARCHAR(255),
    SHOTS_PER_GAME FLOAT,
    CONVERSION_RATE FLOAT,
    PRIMARY KEY (SEASON, COMPETITION_ACRONYM, TEAM_NAME)
);
"""

In [51]:
cur.execute(create_table_query)

<snowflake.connector.cursor.SnowflakeCursor at 0x143e0865be0>

In [52]:
attacking_efficiency_chart.columns = attacking_efficiency_chart.columns.str.replace(" ", "_")

In [53]:
attacking_efficiency_chart = attacking_efficiency_chart.rename({"CONVERSION_RATE_(%)":"CONVERSION_RATE"}, axis=1)

In [54]:
attacking_efficiency_chart["CONVERSION_RATE"] = attacking_efficiency_chart["CONVERSION_RATE"] * 100

In [55]:
upsert_to_snowflake("TEAM_ATTACKING_EFFICIENCY_CHART", attacking_efficiency_chart,
                    ['SEASON', 'COMPETITION_ACRONYM', 'TEAM_NAME'])

Upsert completed. 292 rows processed.


In [56]:
attacking_efficiency_chart = fetch_data(cursor, 'SELECT * FROM GEGENSTATS.RADAR_CHARTS.TEAM_ATTACKING_EFFICIENCY_CHART')

In [57]:
attacking_efficiency_chart.rename(columns={'CONVERSION_RATE':'CONVERSION RATE (%)',
                                'SHOTS_PER_GAME':'SHOTS PER GAME'}, inplace=True)

In [58]:
def create_FM_team_scatter_chart(df, chart_name, team_name, x_axis_label, y_axis_label, img_size, x_min, x_max, y_min, y_max, bottom_left_label, 
                                 bottom_right_label, top_left_label, top_right_label, bl_color, br_color, tl_color, tr_color):
    fig = go.Figure()
    x_axis_mean_val = df[x_axis_label].mean()
    y_axis_mean_val = df[y_axis_label].mean()

    # Add the scatter plot points
    for index, row in df.iterrows():
        fig.add_trace(go.Scatter(
            x=[row[x_axis_label]],
            y=[row[y_axis_label]],
            mode='markers',
            text=row["TEAM_NAME"],
            marker=dict(
                opacity=0
            ),
            hoverinfo='text',
        ))

        # Add team logo as a layout_image
        fig.add_layout_image(
            dict(
                source=row["TEAM_LOGO_URL"],
                x=row[x_axis_label],
                y=row[y_axis_label],
                xref="x",
                yref="y",
                xanchor="center",
                yanchor="middle"
            )
        )


    # Update axes and layout as necessary
    # ...
        
    fig.update_xaxes(range=[x_min, x_max], title=x_axis_label)
    fig.update_yaxes(range=[y_min, y_max], title=y_axis_label)

    fig.add_shape(
        type='line',
        x0=x_axis_mean_val, y0=fig.layout.yaxis.range[0],  # start of the line
        x1=x_axis_mean_val, y1=fig.layout.yaxis.range[1],  # end of the line
        line=dict(color='White', width=3),
        layer='below'
    )

    # Add a horizontal line at the mean aerials attempted per game
    fig.add_shape(
        type='line',
        x0=fig.layout.xaxis.range[0], y0=y_axis_mean_val,  # start of the line
        x1=fig.layout.xaxis.range[1], y1=y_axis_mean_val,  # end of the line
        line=dict(color='White', width=3),
        layer='below'
    )

    fig.update_layout(
        width=625,
        height=625,
        showlegend=False,
        paper_bgcolor='rgb(70, 70, 70)',
        plot_bgcolor='rgb(70, 70, 70)',
        font=dict(
                family="Roboto, sans-serif",  # Specify the font family
                size=25,                     # Specify the font size
                color="white"                # Specify the font color
            ),
        hoverlabel=dict(
                bgcolor="rgba(20, 20, 20, 0.8)",
                font_family="Roboto, sans-serif"),
        title={
            'text': f'{chart_name}',
            'y':0.98,  # Sets the y position of the title (1 is the top of the figure)
            'x':0.5,  # Centers the title horizontally (0.5 is the center of the figure)
            'xanchor': 'center',  # Ensures the title is centered at the x position
            'yanchor': 'top',  # Ensures the title is at the top of the y position
            'font': dict(
                family="Roboto, sans-serif",  # Specify the font family
                size=23,                     # Specify the font size
                color="white"                # Specify the font color
            )
        },
        margin=dict(l=10, r=30, t=50, b=10),
        images= [dict(
            source= row["TEAM_LOGO_URL"],
            xref="x",
            yref="y",
            x= row[x_axis_label],
            y= row[y_axis_label],
            sizex=img_size*1.8 if row['TEAM_NAME'] == team_name else img_size,  # The size of the image in x axis units
            sizey=img_size*1.8 if row['TEAM_NAME'] == team_name else img_size,  # The size of the image in y axis units
            sizing="contain",
            opacity=1 if row['TEAM_NAME'] == team_name else 0.35,
            layer="above") for index, row in df.iterrows()]
    )

    fig.update_xaxes(
        title=dict(font=dict(size=25)),
        showline=True,  # Show the axis line
        linewidth=2,  # Width of the axis line
        linecolor='white',  # Color of the axis line
        gridcolor='rgba(0,0,0,0)',  # Set grid line color to transparent
        tickfont=dict(color='white', size=15),  # Set the color of the axis ticks (numbers)
    )

    fig.update_yaxes(
        title=dict(font=dict(size=25)),
        showline=True,
        linewidth=2,
        linecolor='white',
        gridcolor='rgba(0,0,0,0)',
        tickfont=dict(color='white', size=15),
    )

    fig.add_annotation(text=bottom_left_label,
                    xref="paper", yref="paper",
                    x=0, y=0,  # Bottom left corner
                    showarrow=False,
                    font=dict(size=15, color=bl_color, family="Roboto, sans-serif"),
                    align="left")

    fig.add_annotation(text=top_left_label,
                    xref="paper", yref="paper",
                    x=0, y=1,  # Top left corner
                    showarrow=False,
                    font=dict(size=15, color=tl_color, family="Roboto, sans-serif"),
                    align="left")

    fig.add_annotation(text=top_right_label,
                    xref="paper", yref="paper",
                    x=1, y=1,  # Top right corner
                    showarrow=False,
                    font=dict(size=15, color=tr_color, family="Roboto, sans-serif"),
                    align="right")

    fig.add_annotation(text=bottom_right_label,
                    xref="paper", yref="paper",
                    x=1, y=0,  # Bottom right corner
                    showarrow=False,
                    font=dict(size=15, color=br_color, family="Roboto, sans-serif"),
                    align="right")

    return fig

In [59]:
attacking_efficiency_chart.head()

Unnamed: 0,SEASON,COMPETITION_ACRONYM,TEAM_NAME,TEAM_LOGO_URL,SHOTS PER GAME,CONVERSION RATE (%)
0,2223,Ligue 1,Ajaccio,https://i.imgur.com/FSm4pRb.png,8.184211,7.395498
1,2223,Ligue 1,Paris S-G,https://i.imgur.com/kuN3QSp.png,14.789474,15.836299
2,2223,Ligue 1,Lorient,https://i.imgur.com/9k1Kgp6.png,10.447368,13.098237
3,2223,Ligue 1,Brest,https://i.imgur.com/klKUp8n.png,10.973684,10.551559
4,2223,Ligue 1,Reims,https://i.imgur.com/xULypbI.png,13.578947,8.72093


In [60]:
season_selected = 2324
league_selected = "EPL"

In [61]:
filt_attacking_efficiency_chart = attacking_efficiency_chart[attacking_efficiency_chart['SEASON'] == season_selected]
filt_attacking_efficiency_chart = filt_attacking_efficiency_chart[filt_attacking_efficiency_chart['COMPETITION_ACRONYM'] == league_selected]

In [62]:
create_FM_team_scatter_chart(filt_attacking_efficiency_chart, 'ATTACKING EFFICIENCY', "AC Milan", 
                             'CONVERSION RATE (%)', 'SHOTS PER GAME', 0.7, 5, 20, 7, 21, 
                                                        "Passive Shooting<br>Wasteful Shooting", 
                                                        "Passive Shooting<br>Clinical Shooting",
                                                        "Aggressive Shooting<br>Wasteful Shooting",
                                                        "Aggressive Shooting<br>Clinical Shooting",
                                                        "red", "orange", "orange", "green")


In [63]:
team_attacking['GOALS PER GAME'] = team_attacking['GOALS_SCORED']/team_attacking['MATCHES_PLAYED']
team_attacking['NPXG PER GAME'] = team_attacking['NPXG']/team_attacking['MATCHES_PLAYED']

In [64]:
scoring_chart = team_attacking[['SEASON', 'COMPETITION_ACRONYM', 'TEAM_NAME', 'TEAM_LOGO_URL',
                                            'GOALS PER GAME', 'NPXG PER GAME']]

In [65]:
create_table_query = """
CREATE TABLE IF NOT EXISTS TEAM_SCORING_CHART (
    SEASON INT,
    COMPETITION_ACRONYM VARCHAR(255),
    TEAM_NAME VARCHAR(255),
    TEAM_LOGO_URL VARCHAR(255),
    GOALS_PER_GAME FLOAT,
    NPXG_PER_GAME FLOAT,
    PRIMARY KEY (SEASON, COMPETITION_ACRONYM, TEAM_NAME)
);
"""

In [66]:
cur.execute(create_table_query)

<snowflake.connector.cursor.SnowflakeCursor at 0x143e0865be0>

In [67]:
scoring_chart.columns = scoring_chart.columns.str.replace(" ", "_")

In [68]:
upsert_to_snowflake("TEAM_SCORING_CHART", scoring_chart, ['SEASON', 'COMPETITION_ACRONYM', 'TEAM_NAME'])

Upsert completed. 292 rows processed.


In [69]:
scoring_chart = fetch_data(cursor, 'SELECT * FROM GEGENSTATS.RADAR_CHARTS.TEAM_SCORING_CHART')

In [70]:
scoring_chart.rename(columns={'GOALS_PER_GAME':'GOALS PER GAME','NPXG_PER_GAME':'NPXG PER GAME'}, inplace=True)

In [71]:
filt_scoring_chart = scoring_chart[scoring_chart['SEASON'] == season_selected]
filt_scoring_chart = filt_scoring_chart[filt_scoring_chart['COMPETITION_ACRONYM'] == league_selected]

In [72]:
create_FM_team_scatter_chart(filt_scoring_chart, 'SCORING', "Arsenal", 
                             'NPXG PER GAME', 'GOALS PER GAME', 0.125, 0.5, 3.1, 0.5, 3.1, 
                                                        "Low Scoring<br>Low NPxG", 
                                                        "Low Scoring<br>High NPxG",
                                                        "High Scoring<br>Low NPxG",
                                                        "High Scoring<br>High NPxG",
                                                        "red", "orange", "orange", "green")


In [73]:
team_attacking.head()

Unnamed: 0,TEAM_FBREF_ID,SEASON,COMPETITION,GOALS_SCORED,NPXG,XG,SHOTS,SHOTS_ON_TARGET,PASS_COMPLETED,PASS_ATTEMPTED,TAKEONS_ATTEMPTED,TAKEONS_COMPLETED,CROSSES_INTO_PA,FOULS_AGAINST,COMPETITION_ACRONYM,TEAM_NAME,TEAM_LOGO_URL,TEAM_WS_ID,MATCHES_PLAYED,TEAM_WINS,TEAM_DRAWS,TEAM_LOSSES,TEAM_PTS,TEAM_XPTS,SHOTS PER GAME,CONVERSION RATE (%),GOALS PER GAME,NPXG PER GAME
0,7a54bb4f,2223,FRA-Ligue 1,23,24.22,31.06,311.0,81.0,11216.0,15286.0,630.0,283.0,71.0,495.0,Ligue 1,Ajaccio,https://i.imgur.com/FSm4pRb.png,610,38.0,7,5,26,26,33.12,8.184211,0.073955,0.605263,0.637368
1,e2d8892c,2223,FRA-Ligue 1,89,81.59,86.15,562.0,242.0,23694.0,26476.0,976.0,454.0,46.0,416.0,Ligue 1,Paris S-G,https://i.imgur.com/kuN3QSp.png,304,38.0,27,4,7,85,74.01,14.789474,0.158363,2.342105,2.147105
2,d2c87802,2223,FRA-Ligue 1,52,42.79,47.35,397.0,149.0,14837.0,18087.0,916.0,406.0,38.0,501.0,Ligue 1,Lorient,https://i.imgur.com/9k1Kgp6.png,146,38.0,15,10,13,55,42.43,10.447368,0.130982,1.368421,1.126053
3,fb08dbb3,2223,FRA-Ligue 1,44,35.32,41.4,417.0,132.0,12486.0,16452.0,655.0,270.0,94.0,462.0,Ligue 1,Brest,https://i.imgur.com/klKUp8n.png,2332,38.0,11,11,16,44,43.52,10.973684,0.105516,1.157895,0.929474
4,7fdd64e0,2223,FRA-Ligue 1,45,53.34,59.52,516.0,165.0,13670.0,17565.0,800.0,347.0,87.0,423.0,Ligue 1,Reims,https://i.imgur.com/xULypbI.png,950,38.0,12,15,11,51,55.35,13.578947,0.087209,1.184211,1.403684


In [74]:
team_attacking['SHOTS ON TARGET PER GAME'] = team_attacking['SHOTS_ON_TARGET']/team_attacking['MATCHES_PLAYED']
team_attacking['XG PER SHOT'] = team_attacking['XG']/team_attacking['SHOTS']

In [75]:
shooting_chart = team_attacking[['SEASON', 'COMPETITION_ACRONYM', 'TEAM_NAME', 'TEAM_LOGO_URL',
                                            'SHOTS ON TARGET PER GAME', 'XG PER SHOT']]

In [76]:
create_table_query = """
CREATE TABLE IF NOT EXISTS TEAM_SHOOTING_CHART (
    SEASON INT,
    COMPETITION_ACRONYM VARCHAR(255),
    TEAM_NAME VARCHAR(255),
    TEAM_LOGO_URL VARCHAR(255),
    SHOTS_ON_TARGET_PER_GAME FLOAT,
    XG_PER_SHOT FLOAT,
    PRIMARY KEY (SEASON, COMPETITION_ACRONYM, TEAM_NAME)
);
"""

In [77]:
cur.execute(create_table_query)

<snowflake.connector.cursor.SnowflakeCursor at 0x143e0865be0>

In [78]:
shooting_chart.columns = shooting_chart.columns.str.replace(" ", "_")

In [79]:
upsert_to_snowflake("TEAM_SHOOTING_CHART", shooting_chart, ['SEASON', 'COMPETITION_ACRONYM', 'TEAM_NAME'])

Upsert completed. 292 rows processed.


In [80]:
shooting_chart = fetch_data(cursor, 'SELECT * FROM GEGENSTATS.RADAR_CHARTS.TEAM_SHOOTING_CHART')

In [81]:
shooting_chart.rename(columns={'SHOTS_ON_TARGET_PER_GAME':'SHOTS ON TARGET PER GAME',
                                'XG_PER_SHOT':'XG PER SHOT'}, inplace=True)

In [82]:
filt_shooting_chart = shooting_chart[shooting_chart['SEASON'] == season_selected]
filt_shooting_chart = filt_shooting_chart[filt_shooting_chart['COMPETITION_ACRONYM'] == league_selected]

In [83]:
create_FM_team_scatter_chart(filt_shooting_chart, 'SHOOTING', "AC Milan", 
                             'XG PER SHOT', 'SHOTS ON TARGET PER GAME', 0.25, 0.05, 0.2, 2, 8, 
                                                        "Passive Shooting<br>Wasteful Shooting", 
                                                        "Passive Shooting<br>Clinical Shooting",
                                                        "Aggressive Shooting<br>Wasteful Shooting",
                                                        "Aggressive Shooting<br>Clinical Shooting",
                                                        "red", "orange", "orange", "green")


### Time to look at set-pieces again

In [84]:
df_events = fetch_data(cursor, 'SELECT * FROM EVENTS_SPADL')

In [85]:
df_events = df_events.merge(df_matches[['MATCH_ID','COMPETITION','SEASON']], on='MATCH_ID', how='left')

In [86]:
set_piece_cross_events = df_events[df_events['TYPE_NAME'].isin(['freekick_crossed', 'corner_crossed'])]

In [87]:
set_piece_cross_events.loc[:, 'OPPO_TEAM_ID'] = "NaN"



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [88]:
def swap_team_id(row, home_id, away_id):
    if row['TEAM_FBREF_ID'] == home_id:
        return away_id
    elif row['TEAM_FBREF_ID'] == away_id:
        return home_id
    else:
        return "PROBLEM"

In [89]:
def specify_oppo_team_id_to_events(df_events, filtered_matches):
    for ind, row in filtered_matches.iterrows():
        home_team_id = row['HOME_TEAM_ID']
        away_team_id = row['AWAY_TEAM_ID']

        inds_to_replace = df_events[df_events['MATCH_ID'] == row['MATCH_ID']].index
        df_events.loc[inds_to_replace, 'OPPO_TEAM_ID'] = df_events.loc[inds_to_replace].apply(swap_team_id, 
                                                                                             axis=1, args=(home_team_id, away_team_id))
    return df_events

In [90]:
set_piece_cross_events = specify_oppo_team_id_to_events(set_piece_cross_events, df_matches)

In [91]:
team_names_2 = team_names[['TEAM_NAME', 'TEAM_FBREF_ID']].copy()
team_names_2.rename({'TEAM_FBREF_ID':'OPPO_TEAM_ID', 'TEAM_NAME':'OPPO_TEAM_NAME'}, axis=1, inplace=True)

In [92]:
set_piece_cross_events = set_piece_cross_events.merge(team_names_2, on="OPPO_TEAM_ID")

In [93]:
set_piece_cross_events = set_piece_cross_events.merge(team_names[['TEAM_NAME', 'TEAM_FBREF_ID']], on="TEAM_FBREF_ID")

In [94]:
set_piece_cross_events_grouped = set_piece_cross_events[['OPPO_TEAM_NAME','COMPETITION',
                                'SEASON','MATCH_ID']].groupby(['OPPO_TEAM_NAME','COMPETITION','SEASON']).count()

In [95]:
own_set_piece_cross_events_grouped = set_piece_cross_events[['TEAM_NAME','COMPETITION','SEASON',
                                    'MATCH_ID']].groupby(['TEAM_NAME','COMPETITION','SEASON']).count()

In [96]:
team_name_matches_played = team_attacking[['TEAM_NAME', 'COMPETITION','SEASON','MATCHES_PLAYED']].copy()

In [97]:
set_piece_cross_events_grouped.rename({'MATCH_ID':'OPPOSITION CROSSES FROM SET PIECE'}, axis=1, inplace=True)

In [98]:
own_set_piece_cross_events_grouped.rename({'MATCH_ID':'CROSSES FROM SET PIECES'}, axis=1, inplace=True)

In [99]:
set_piece_cross_events_grouped = team_name_matches_played.merge(set_piece_cross_events_grouped.reset_index().rename(
    {'OPPO_TEAM_NAME':'TEAM_NAME'}, axis=1),on=['TEAM_NAME','COMPETITION','SEASON'])

In [100]:
own_set_piece_cross_events_grouped = team_name_matches_played.merge(own_set_piece_cross_events_grouped.reset_index(),
                                                                    on=['TEAM_NAME','COMPETITION','SEASON'])

In [101]:
set_piece_box_deliveries = set_piece_cross_events.copy()

In [102]:
set_piece_box_deliveries.reset_index(drop=True, inplace=True)

In [103]:
## Only interested in crosses that end up in the box
set_piece_box_deliveries = set_piece_box_deliveries[set_piece_box_deliveries['END_X'] >= 88.5]
set_piece_box_deliveries = set_piece_box_deliveries[set_piece_box_deliveries['END_Y'] <= 54.16]
set_piece_box_deliveries = set_piece_box_deliveries[set_piece_box_deliveries['END_Y'] >= 13.84]

In [104]:
## Now, we want to cateogrize each cross as a "near post", "central", or "far post" cross
""" 
If START_Y < 30.34, then:
    - Near post is where END_Y is < 30.34
    - Central is where END_Y is between 30.34 and 37.66
    - Far post is where END_Y is > 37.66

If START_Y > 37.66, then:
    - Near post is where END_Y is > 37.66
    - Central is where END_Y is between 30.34 and 37.66
    - Far post is where END_Y is < 30.34
"""

def classify_cross(row):
    if row['START_Y'] < 34.00:
        if row['END_Y'] < 30.34:
            return 'Near post'
        elif row['END_Y'] > 37.66:
            return 'Far post'
        elif row['END_Y'] >= 30.34 and row['END_Y'] <= 37.66:
            return 'Central'
    elif row['START_Y'] >= 34.0:
        if row['END_Y'] > 37.66:
            return 'Near post'
        elif row['END_Y'] < 30.34:
            return 'Far post'
        elif row['END_Y'] >= 30.34 and row['END_Y'] <= 37.66:
            return 'Central'
    else:
        return 'Not Classified'

In [105]:
set_piece_box_deliveries['Cross_end_location'] = "NaN"
set_piece_box_deliveries['Cross_end_location'] = set_piece_box_deliveries.apply(classify_cross, axis=1)

In [106]:
set_piece_box_deliveries['Cross_end_location'].value_counts()

Central      27936
Near post    19043
Far post     13546
Name: Cross_end_location, dtype: int64

In [107]:
filter_1st_contacts = set_piece_box_deliveries[['MATCH_ID', 'ACTION_ID']].copy()
filter_1st_contacts['ACTION_ID'] += 1

In [108]:
set_piece_cross_1st_contacts = df_events.merge(filter_1st_contacts, on=['MATCH_ID', 'ACTION_ID'],how='right')

In [109]:
match_ids_to_remove = set_piece_cross_1st_contacts[set_piece_cross_1st_contacts['TEAM_FBREF_ID'].isna()]['MATCH_ID'].values 
action_ids_to_remove = set_piece_cross_1st_contacts[set_piece_cross_1st_contacts['TEAM_FBREF_ID'].isna()]['ACTION_ID'].values

In [110]:
## Remove missing rows!
for i in range(len(match_ids_to_remove)):
    set_piece_cross_1st_contacts = set_piece_cross_1st_contacts[(set_piece_cross_1st_contacts['MATCH_ID'] != match_ids_to_remove[i]) |
                            (set_piece_cross_1st_contacts['ACTION_ID'] != action_ids_to_remove[i])]
    
    set_piece_box_deliveries = set_piece_box_deliveries[(set_piece_box_deliveries['MATCH_ID'] != match_ids_to_remove[i]) |
                            (set_piece_box_deliveries['ACTION_ID'] != action_ids_to_remove[i]-1)]

In [111]:
set_piece_cross_1st_contacts = specify_oppo_team_id_to_events(set_piece_cross_1st_contacts, df_matches)

In [112]:
set_piece_box_deliveries = specify_oppo_team_id_to_events(set_piece_box_deliveries, df_matches)

In [113]:
set_piece_cross_1st_contacts = set_piece_cross_1st_contacts.merge(team_names[['TEAM_NAME', 'TEAM_FBREF_ID']], on="TEAM_FBREF_ID")
set_piece_cross_1st_contacts = set_piece_cross_1st_contacts.merge(team_names_2, on="OPPO_TEAM_ID")

In [114]:
# set_piece_box_deliveries = set_piece_box_deliveries.merge(team_names[['TEAM_NAME', 'TEAM_FBREF_ID']], on="TEAM_FBREF_ID")

In [115]:
set_piece_cross_1st_contacts.sort_values(by=['MATCH_ID', 'ACTION_ID'], inplace=True)
set_piece_cross_1st_contacts.reset_index(drop=True, inplace=True)
set_piece_box_deliveries.sort_values(by=['MATCH_ID', 'ACTION_ID'], inplace=True)
set_piece_box_deliveries.reset_index(drop=True, inplace=True)

In [116]:
set_piece_cross_1st_contacts['Cross_end_location'] = set_piece_box_deliveries['Cross_end_location']

In [117]:
set_piece_same_1st_contacts = set_piece_cross_1st_contacts[set_piece_box_deliveries['TEAM_FBREF_ID'] == set_piece_cross_1st_contacts['TEAM_FBREF_ID']]
set_piece_other_1st_contacts = set_piece_cross_1st_contacts[set_piece_box_deliveries['TEAM_FBREF_ID'] != set_piece_cross_1st_contacts['TEAM_FBREF_ID']]

In [118]:
att_team_won_1st_contact = set_piece_same_1st_contacts.groupby(['COMPETITION', 'SEASON', 'TEAM_NAME', 
                                                                 'Cross_end_location'])['Cross_end_location'].count()

In [119]:
att_team_lost_1st_contact = set_piece_other_1st_contacts.groupby(['COMPETITION', 'SEASON', 'OPPO_TEAM_NAME', 
                                                                 'Cross_end_location'])['Cross_end_location'].count()

In [120]:
att_team_won_1st_contact.shape

(876,)

In [121]:
att_team_lost_1st_contact.shape

(876,)

In [122]:
att_set_piece_final = (att_team_won_1st_contact/(att_team_lost_1st_contact+att_team_won_1st_contact))*100

In [123]:
att_set_piece_final = pd.DataFrame(att_set_piece_final).rename({'Cross_end_location':'PERC_1ST_CONTACT'}, axis=1).reset_index()

In [124]:
att_set_piece_final.rename(columns={'Cross_end_location':'CROSS_END_LOCATION'}, inplace=True)

In [125]:
att_set_piece_final = att_set_piece_final.merge(df_competitions, on=['COMPETITION','SEASON'])

In [126]:
att_set_piece_final.drop(['COMPETITION'], axis=1, inplace=True)

In [127]:
create_table_query = """
CREATE TABLE IF NOT EXISTS TEAM_ATT_SET_PIECE_FIRST_CONTACTS (
    SEASON INT,
    COMPETITION_ACRONYM VARCHAR(255),
    TEAM_NAME VARCHAR(255),
    PERC_1ST_CONTACT FLOAT,
    CROSS_END_LOCATION VARCHAR(255),
    PRIMARY KEY (SEASON, COMPETITION_ACRONYM, TEAM_NAME, CROSS_END_LOCATION)
);
"""

In [128]:
cur.execute(create_table_query)

<snowflake.connector.cursor.SnowflakeCursor at 0x143e0865be0>

In [129]:
upsert_to_snowflake("TEAM_ATT_SET_PIECE_FIRST_CONTACTS", att_set_piece_final, ['SEASON', 'COMPETITION_ACRONYM', 'TEAM_NAME',
                                                                            'CROSS_END_LOCATION'])

Upsert completed. 876 rows processed.


In [130]:
att_set_piece_final = fetch_data(cursor, 'SELECT * FROM  GEGENSTATS.RADAR_CHARTS.TEAM_ATT_SET_PIECE_FIRST_CONTACTS')

In [131]:
att_set_piece_final

Unnamed: 0,SEASON,COMPETITION_ACRONYM,TEAM_NAME,PERC_1ST_CONTACT,CROSS_END_LOCATION
0,2122,EPL,Arsenal,25.892857,Central
1,2122,EPL,Arsenal,44.000000,Far post
2,2122,EPL,Arsenal,18.181818,Near post
3,2122,EPL,Aston Villa,17.948718,Central
4,2122,EPL,Aston Villa,52.000000,Far post
...,...,...,...,...,...
871,2324,Serie A,Torino,35.000000,Far post
872,2324,Serie A,Torino,34.939759,Near post
873,2324,Serie A,Udinese,44.897959,Central
874,2324,Serie A,Udinese,43.478261,Far post


In [132]:
att_set_piece_final_filt = att_set_piece_final[att_set_piece_final['SEASON'] == season_selected]
att_set_piece_final_filt = att_set_piece_final_filt[att_set_piece_final_filt['COMPETITION_ACRONYM'] == league_selected]
att_set_piece_final_filt = att_set_piece_final_filt[att_set_piece_final_filt['TEAM_NAME'] == 'Arsenal']

In [133]:
att_set_piece_chart = att_set_piece_final_filt[['CROSS_END_LOCATION', 'PERC_1ST_CONTACT']].set_index('CROSS_END_LOCATION')

In [134]:
att_set_piece_chart.loc['Near post'].values[0]

28.57142857142857

In [135]:
def create_set_piece_first_contacts_plot(def_set_piece_chart):
    pitch = VerticalPitch(pitch_color='#2B2B2B', line_color='white', goal_type='box', pitch_type='uefa', linewidth=1, half=True)
    fig, ax = pitch.draw(figsize=(8, 12))

    # Define the coordinates for the rectangles
    rect_coords = [
        [(13.84, 105), (13.84, 88.5), (30.09, 88.5), (30.09, 105)],  # Rect1
        [(30.59, 105), (30.59, 88.5), (37.41, 88.5), (37.41, 105)],  # Rect2
        [(37.91, 105), (37.91, 88.5), (54.16, 88.5), (54.16, 105)]  # Rect3
    ]

    # Example percentages for each rectangle (use your actual values here)
    percentages = [def_set_piece_chart.loc['Near post'].values[0], def_set_piece_chart.loc['Central'].values[0], 
                   def_set_piece_chart.loc['Far post'].values[0]]

    # Function to calculate green color based on percentage
    def get_green_color(percentage, max_percentage=100):  # Assuming 100 is the max percentage
        green_intensity = int((percentage / max_percentage) * 255)
        return f'#00{green_intensity:02x}00'

    # Create the rectangular patches
    for i, coords in enumerate(rect_coords):
        polygon = patches.Polygon(coords, closed=True, color="#00b200", zorder=2,  alpha=0.75)
        ax.add_patch(polygon)

        # Add text label in the center of each rectangle
        rect_center_x = (coords[0][0] + coords[2][0]) / 2 
        rect_center_y = (coords[0][1] + coords[2][1]) / 2
        ax.text(rect_center_x, rect_center_y, f'{int(percentages[i])}%',fontproperties='Roboto',
                va='center', ha='center', color='white', fontsize=18, zorder=3)

    # Set figure and axis background color
    fig.patch.set_facecolor('#2B2B2B')
    ax.patch.set_facecolor('#2B2B2B')

    arrow_start = (0, 107)  # Adjust these values as needed for your plot
    arrow_end = (18.84, 107)    # Adjust these values as needed for your plot

    # Draw the arrow
    ax.add_patch(patches.FancyArrow(
        arrow_start[0], arrow_start[1],  # x, y start point
        arrow_end[0] - arrow_start[0], arrow_end[1] - arrow_start[1],  # dx, dy length
        width=0.3,  # Width of the full arrow tail
        length_includes_head=False,  # The head is included in the calculation of the arrow's length
        head_width=1,  # Width of the arrow head
        head_length=1.5,  # Length of the arrow head
        color='lightgrey'  # Light grey color
    ))

    plt.title('SET PIECE FIRST CONTACTS - OPPOSITION BOX', color='gold', fontsize=20, fontname='Roboto', loc='left')

    return fig

In [136]:
create_set_piece_first_contacts_plot(att_set_piece_chart)

<Figure size 800x1200 with 1 Axes>

Shot data extraction

In [137]:
df_shots = fetch_data(cursor, 'SELECT * FROM SHOT_EVENTS')

In [138]:
df_events.head()

Unnamed: 0,MATCH_ID,ORIGINAL_EVENT_ID,PERIOD_ID,TIME_SECONDS,NEW_TIME_SECONDS,TEAM_FBREF_ID,PLAYER_WS_ID,START_X,END_X,START_Y,END_Y,RESULT_ID,ACTION_ID,TYPE_NAME,BODYPART_NAME,COMPETITION,SEASON
0,9cd87b54,,2,775.5,595.5,922493f3,299451,54.495,76.965,57.324,55.896,1,1048,dribble,foot,ITA-Serie A,2223
1,8e214fe9,2519912000.0,1,1459.0,1459.0,dc56fe14,259102,60.165,58.8,15.708,36.312,1,434,pass,foot,ITA-Serie A,2223
2,e5be6a0a,2451175000.0,2,909.0,789.0,d609edc0,329665,28.245,29.82,42.84,24.752,1,1073,pass,foot,ITA-Serie A,2223
3,e4822dc4,2554614000.0,1,72.0,72.0,e2befd26,261474,7.35,9.66,18.156,5.44,1,12,pass,foot,ITA-Serie A,2223
4,20916b65,,2,1343.0,1223.0,9aad3a77,322584,69.93,72.24,1.768,4.012,1,1082,dribble,foot,ITA-Serie A,2223


In [140]:
# df_shots = df_shots.merge(df_events[['MATCH_ID', 'ACTION_ID', 'TEAM_FBREF_ID']], on=['MATCH_ID', 'ACTION_ID'])
df_shots = df_shots.merge(df_events[['MATCH_ID', 'ACTION_ID', 'TEAM_FBREF_ID', 'PERIOD_ID','NEW_TIME_SECONDS','START_X','END_X',
                                     'START_Y', 'END_Y', 'PLAYER_WS_ID', 'RESULT_ID']], on=['MATCH_ID', 'ACTION_ID'])

In [141]:
df_shots = df_shots.merge(team_names[['TEAM_NAME', 'TEAM_FBREF_ID']], on="TEAM_FBREF_ID")

In [142]:
df_shots = df_shots.merge(df_matches[['MATCH_ID','HOME_TEAM_ID', 'AWAY_TEAM_ID', 'GAMEWEEK','COMPETITION',
                                      'SEASON']], on='MATCH_ID', how='left')

In [143]:
df_shots = df_shots.merge(df_competitions, on=['COMPETITION', 'SEASON'])

In [144]:
df_shots['OPPO_TEAM_ID'] = df_shots.apply(lambda row: row['AWAY_TEAM_ID'] if 
                                          row['TEAM_FBREF_ID'] == row['HOME_TEAM_ID'] else row['HOME_TEAM_ID'], axis=1)

In [145]:
df_shots.drop(['HOME_TEAM_ID', 'AWAY_TEAM_ID'], axis=1, inplace=True)

In [146]:
df_shots = df_shots.merge(team_names_2, on='OPPO_TEAM_ID')

In [147]:
df_shots = df_shots.merge(df_players[['PLAYER_WS_ID', 'PLAYER_FBREF_NAME']], on="PLAYER_WS_ID")

In [182]:
df_last_5_matches = df_matches.copy()
df_last_5_matches['DATE_TIME'] = pd.to_datetime(df_last_5_matches['DATE_TIME'])

In [183]:
team_names[team_names['TEAM_NAME'] == "Atalanta"]

Unnamed: 0,TEAM_NAME,TEAM_FBREF_ID,TEAM_LOGO_URL,TEAM_WS_ID
87,Atalanta,922493f3,https://i.imgur.com/SDA0nMH.png,300


In [184]:
df_last_5_matches['COMPETITION'].value_counts()

ESP-La Liga           1140
ENG-Premier League    1140
ITA-Serie A           1139
FRA-Ligue 1           1066
GER-Bundesliga         917
Name: COMPETITION, dtype: int64

In [185]:
df_last_5_matches[(df_last_5_matches['HOME_TEAM_ID'] == '922493f3') | 
                  (df_last_5_matches['AWAY_TEAM_ID'] == '922493f3')].sort_values(by='DATE_TIME')

Unnamed: 0,MATCH_ID,DATE_TIME,HOME_TEAM_ID,AWAY_TEAM_ID,COMPETITION,SEASON,STADIUM,GAMEWEEK,DAY,HOME_TEAM_SCORE,AWAY_TEAM_SCORE,HOME_TEAM_XG,AWAY_TEAM_XG,ATTENDANCE,REFEREE,WS_MATCH_ID
2200,0465e44e,2021-08-21 20:45:00,105360fe,922493f3,ITA-Serie A,2122,Stadio Olimpico di Torino,1,Sat,1,2,1.3,0.7,3475.0,Daniele Chiffi,1575789
2183,b8799a66,2021-08-28 18:30:00,922493f3,1d8099f8,ITA-Serie A,2122,Stadio Atleti Azzurri d'Italia,2,Sat,0,0,0.9,0.3,7912.0,Daniele Orsato,1575791
2168,7559e08b,2021-09-11 20:45:00,922493f3,421387cf,ITA-Serie A,2122,Stadio Atleti Azzurri d'Italia,3,Sat,1,2,2.2,1.8,9162.0,Valerio Marini,1575801
2074,d54b884f,2021-09-18 20:45:00,c5577084,922493f3,ITA-Serie A,2122,Stadio Arechi,4,Sat,0,1,0.7,1.6,11949.0,Paolo Valeri,1575817
2222,6f32a770,2021-09-21 20:45:00,922493f3,e2befd26,ITA-Serie A,2122,Stadio Atleti Azzurri d'Italia,5,Tue,2,1,1.9,0.9,7353.0,Davide Massa,1575821
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
147,526fed3e,2024-04-28 18:00:00,922493f3,a3d88bd8,ITA-Serie A,2324,Gewiss Stadium,34,Sun,2,0,1.9,0.4,14614.0,Michael Fabbri,1746292
148,4d799b02,2024-05-06 18:00:00,c5577084,922493f3,ITA-Serie A,2324,Stadio Arechi,35,Mon,1,2,0.5,1.9,12980.0,Ermanno Feliciani,1746382
155,9eba4ff1,2024-05-12 20:45:00,922493f3,cf74a709,ITA-Serie A,2324,Gewiss Stadium,36,Sun,2,1,2.7,1.2,14895.0,Marco Guida,1746390
48,d63c561d,2024-05-18 18:00:00,ffcbe334,922493f3,ITA-Serie A,2324,Stadio Comunale Via Del Mare,37,Sat,0,2,1.3,1.6,26354.0,Antonio Rapuano,1746403


In [159]:
df_last_5_matches = df_last_5_matches.merge(df_competitions, on=['COMPETITION', 'SEASON'])

In [160]:
home_teams = team_names.copy()
away_teams = team_names.copy()

In [161]:
home_teams.rename({'TEAM_FBREF_ID':'HOME_TEAM_ID', 'TEAM_NAME':'HOME_TEAM_NAME'}, axis=1, inplace=True)
away_teams.rename({'TEAM_FBREF_ID':'AWAY_TEAM_ID', 'TEAM_NAME':'AWAY_TEAM_NAME'}, axis=1, inplace=True)

In [162]:
df_last_5_matches = df_last_5_matches.merge(home_teams[['HOME_TEAM_NAME', 'HOME_TEAM_ID']], on=['HOME_TEAM_ID'])
df_last_5_matches = df_last_5_matches.merge(away_teams[['AWAY_TEAM_NAME', 'AWAY_TEAM_ID']], on=['AWAY_TEAM_ID'])

In [163]:
def get_last_5_matches(group):
    return group.nlargest(5, 'DATE_TIME')

# Splitting the original DataFrame into home and away games
home_games = df_last_5_matches[['COMPETITION', 'SEASON', 'HOME_TEAM_ID', 'DATE_TIME']].rename(columns={'HOME_TEAM_ID': 'TEAM_ID'})
away_games = df_last_5_matches[['COMPETITION', 'SEASON', 'AWAY_TEAM_ID', 'DATE_TIME']].rename(columns={'AWAY_TEAM_ID': 'TEAM_ID'})

# Concatenating both DataFrames to consider home and away games together
all_games = pd.concat([home_games, away_games])

# Group by 'COMPETITION', 'SEASON', and 'TEAM_ID' and apply the function to get the last 5 games
last_5_matches = all_games.groupby(['COMPETITION', 'SEASON', 'TEAM_ID']).apply(get_last_5_matches)

In [164]:
game_weeks = [index[-1] for index in last_5_matches.index]
print(game_weeks)

[4598, 4522, 4500, 5258, 4537, 4266, 4263, 5006, 4344, 4275, 5130, 5300, 5303, 4769, 5290, 5202, 5245, 4637, 5194, 5076, 5339, 4925, 5317, 4697, 5307, 5130, 5048, 4500, 5098, 5218, 4266, 4684, 4646, 5194, 4697, 5260, 5398, 4372, 5258, 4486, 4719, 4480, 4769, 4537, 4747, 5158, 4925, 4877, 5098, 4902, 4405, 5046, 4646, 4877, 4393, 5158, 5300, 4468, 5165, 5380, 4795, 5046, 5048, 4393, 5076, 5339, 5229, 5245, 5303, 4634, 4795, 4522, 4807, 4902, 4826, 4405, 4480, 4468, 4807, 4486, 5260, 5026, 5317, 5006, 5092, 5202, 5229, 4372, 5165, 4361, 4719, 5398, 4263, 5380, 5307, 4598, 4684, 5026, 4637, 4634, 5150, 4528, 5356, 4523, 4333, 4944, 4345, 4307, 5162, 4310, 4926, 5308, 4964, 5332, 5329, 5133, 4995, 5087, 4650, 5099, 4550, 4682, 4835, 4650, 4723, 4864, 4307, 4835, 4364, 5279, 5133, 5281, 4829, 5270, 5279, 4768, 5281, 4735, 4310, 4751, 4926, 4396, 4920, 5004, 4942, 5350, 4396, 5087, 4412, 4399, 5150, 4624, 5162, 4412, 5138, 4768, 5234, 4920, 5238, 4635, 4805, 5234, 5378, 4829, 4523, 4450, 443

In [165]:
df_last_5_matches = df_last_5_matches.loc[game_weeks]

In [166]:
last_5_matches = list(df_last_5_matches['MATCH_ID'])
df_shots_last_5_matches = df_shots[df_shots['MATCH_ID'].isin(last_5_matches)]

In [None]:
create_table_query = """
CREATE TABLE IF NOT EXISTS SHOT_MAP (
    MATCH_ID VARCHAR(255),
    ACTION_ID INT,
    XG DOUBLE,
    PSXG DOUBLE,
    OUTCOME VARCHAR(255),
    DISTANCE INT,
    BODY_PART VARCHAR(255),
    NOTES VARCHAR(255),
    SCA_1_PLAYER_WS_ID INT,
    SCA_1_PLAYER_FBREF_EVENT VARCHAR(255),
    SCA_2_PLAYER_WS_ID INT,
    SCA_2_PLAYER_FBREF_EVENT VARCHAR(255),
    TEAM_FBREF_ID VARCHAR(255),
    PERIOD_ID INT,
    NEW_TIME_SECONDS DOUBLE,
    START_X DOUBLE, 
    END_X DOUBLE,
    START_Y DOUBLE,
    END_Y DOUBLE,
    PLAYER_WS_ID INT,
    RESULT_ID INT,
    TEAM_NAME VARCHAR(255),
    GAMEWEEK INT,
    COMPETITION VARCHAR(255),
    SEASON INT,
    COMPETITION_ACRONYM VARCHAR(255),
    OPPO_TEAM_ID VARCHAR(255),
    OPPO_TEAM_NAME VARCHAR(255),
    PLAYER_FBREF_NAME VARCHAR(255),
    PRIMARY KEY (MATCH_ID, ACTION_ID),
    FOREIGN KEY (MATCH_ID) REFERENCES GEGENSTATS.TABLES.MATCHES(MATCH_ID),
    FOREIGN KEY (MATCH_ID, ACTION_ID) REFERENCES GEGENSTATS.TABLES.SHOT_EVENTS(MATCH_ID, ACTION_ID),
    FOREIGN KEY (SCA_1_PLAYER_WS_ID) REFERENCES GEGENSTATS.TABLES.PLAYERS(PLAYER_WS_ID),
    FOREIGN KEY (SCA_2_PLAYER_WS_ID) REFERENCES GEGENSTATS.TABLES.PLAYERS(PLAYER_WS_ID)
);
"""

In [None]:
cur.execute(create_table_query)

<snowflake.connector.cursor.SnowflakeCursor at 0x206f16622a0>

In [None]:
df_shots_last_5_matches

Unnamed: 0,MATCH_ID,ACTION_ID,XG,PSXG,OUTCOME,DISTANCE,BODY_PART,NOTES,SCA_1_PLAYER_WS_ID,SCA_1_PLAYER_FBREF_EVENT,SCA_2_PLAYER_WS_ID,SCA_2_PLAYER_FBREF_EVENT,TEAM_FBREF_ID,PERIOD_ID,NEW_TIME_SECONDS,START_X,END_X,START_Y,END_Y,PLAYER_WS_ID,RESULT_ID,TEAM_NAME,GAMEWEEK,COMPETITION,SEASON,COMPETITION_ACRONYM,OPPO_TEAM_ID,OPPO_TEAM_NAME,PLAYER_FBREF_NAME
21,9bb3a778,34,0.10,,Woodwork,16,Left Foot,,342563.0,Pass (Live),299254.0,Pass (Live),b2b47a98,1,69.0,90.300,105.000,27.608,37.536,141486,0,Newcastle Utd,35,ENG-Premier League,2223,EPL,18bb7c10,Arsenal,Jacob Murphy
22,a3b3a0d5,943,0.13,0.64,Saved,18,Right Foot,,93160.0,Pass (Live),141486.0,Pass (Live),b2b47a98,2,2041.0,88.515,99.855,24.752,31.076,141486,0,Newcastle Utd,37,ENG-Premier League,2122,EPL,18bb7c10,Arsenal,Jacob Murphy
47,e92d40be,216,0.01,,Off Target,25,Left Foot,,415174.0,Pass (Live),338780.0,Pass (Live),b2b47a98,1,809.0,83.580,105.000,18.088,42.772,141486,0,Newcastle Utd,35,ENG-Premier League,2324,EPL,1df6b87e,Sheffield Utd,Jacob Murphy
48,546e1a3d,405,0.02,0.03,Saved,27,Left Foot,,338780.0,Pass (Live),100599.0,Pass (Dead),b2b47a98,1,1441.0,80.010,101.955,29.036,33.660,141486,0,Newcastle Utd,38,ENG-Premier League,2324,EPL,cd051869,Brentford,Jacob Murphy
49,546e1a3d,400,0.05,,Blocked,16,Right Foot,,338780.0,Pass (Live),100599.0,Pass (Live),b2b47a98,1,1423.0,90.090,92.925,24.752,26.248,141486,0,Newcastle Utd,38,ENG-Premier League,2324,EPL,cd051869,Brentford,Jacob Murphy
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
136880,df360311,1584,0.06,,Blocked,18,Right Foot,,399014.0,Pass (Live),469984.0,Pass (Live),132ebc33,2,2341.0,88.620,92.925,42.160,39.032,469984,0,Nice,34,FRA-Ligue 1,2324,Ligue 1,cb188c0c,Lille,Daouda Traoré
136892,1b4970df,1682,0.10,,Blocked,8,Right Foot,,114169.0,Pass (Dead),202185.0,Shot,d7a486cd,2,3052.0,97.650,98.385,29.444,31.416,373848,0,Nantes,37,FRA-Ligue 1,2223,Ligue 1,cb188c0c,Lille,João Victor
136894,12fd3a4c,1762,0.03,,Blocked,20,Left Foot,,300713.0,Pass (Live),300713.0,Take-On,e2d8892c,2,2573.0,89.040,92.400,45.084,42.704,468859,0,Paris S-G,29,FRA-Ligue 1,2324,Ligue 1,d2c87802,Lorient,Ethan Mbappé
136895,12fd3a4c,1676,0.11,0.03,Saved,11,Left Foot,,300713.0,Pass (Live),408962.0,Pass (Live),e2d8892c,2,2291.0,94.500,102.165,44.880,37.196,468859,0,Paris S-G,29,FRA-Ligue 1,2324,Ligue 1,d2c87802,Lorient,Ethan Mbappé


In [None]:
df_shots_last_5_matches = df_shots_last_5_matches.drop_duplicates()

In [None]:
upsert_to_snowflake("SHOT_MAP", df_shots_last_5_matches, ['MATCH_ID', 'ACTION_ID'])

Upsert completed. 19574 rows processed.


In [167]:
df_shots_last_5_matches = fetch_data(cursor, 'SELECT * FROM  GEGENSTATS.RADAR_CHARTS.SHOT_MAP')

In [168]:
# df_shots = df_shots.merge(df_events[['MATCH_ID', 'ACTION_ID', 'PERIOD_ID','NEW_TIME_SECONDS','START_X','END_X','START_Y', 
#                          'END_Y', 'PLAYER_WS_ID', 'RESULT_ID']], on=['MATCH_ID', 'ACTION_ID'])

In [169]:
# df_shots = df_shots.merge(df_matches[['MATCH_ID', 'HOME_TEAM_ID', 'AWAY_TEAM_ID', 'GAMEWEEK']], on='MATCH_ID', how='left')

In [170]:
# df_shots['OPPO_TEAM_ID'] = df_shots.apply(lambda row: row['AWAY_TEAM_ID'] if row['TEAM_FBREF_ID'] == row['HOME_TEAM_ID'] else row['HOME_TEAM_ID'], axis=1)

In [171]:
# df_shots.drop(['HOME_TEAM_ID', 'AWAY_TEAM_ID'], axis=1, inplace=True)

In [172]:
# df_shots = df_shots.merge(team_names_2, on='OPPO_TEAM_ID')

In [173]:
# df_shots = df_shots.merge(df_players[['PLAYER_WS_ID', 'PLAYER_FBREF_NAME']], on="PLAYER_WS_ID")

In [175]:
team_selected = 'Atalanta'

In [None]:
# df_last_5_matches_filt = df_last_5_matches_filt[(df_last_5_matches_filt['HOME_TEAM_NAME'] == team_selected) | 
#                                                 (df_last_5_matches_filt['AWAY_TEAM_NAME'] == team_selected)]
# df_last_5_matches_filt = df_last_5_matches_filt.sort_values(by='GAMEWEEK')[-5:]
# last_5_matches = list(df_last_5_matches_filt['MATCH_ID'])
# df_shots_last_5_matches = df_shots[df_shots['MATCH_ID'].isin(last_5_matches)]

In [174]:
df_shots_last_5_matches = df_shots_last_5_matches[df_shots_last_5_matches['SEASON'] == 2324]
df_shots_last_5_matches = df_shots_last_5_matches[df_shots_last_5_matches['COMPETITION_ACRONYM'] == 'Serie A']

In [176]:
df_shots_last_5_matches = df_shots_last_5_matches[df_shots_last_5_matches['TEAM_NAME'] == team_selected]

In [177]:
df_shots_last_5_matches['OPPO_TEAM_NAME'].unique()

array(['Torino', 'Empoli', 'Salernitana', 'Roma', 'Lecce'], dtype=object)

In [179]:
df_shots_last_5_matches['GAMEWEEK'].value_counts()

36    24
37    19
38    18
34    17
35    17
Name: GAMEWEEK, dtype: int64

In [None]:
last5_GWs = df_shots_last_5_matches[['GAMEWEEK']].drop_duplicates().sort_values(by="GAMEWEEK")[-5:]
df_shots_last_5_matches = df_shots_last_5_matches.merge(last5_GWs, on="GAMEWEEK")

In [None]:
last5_GWs

Unnamed: 0,GAMEWEEK
11365,34
2665,35
2533,36
2512,37
44,38


In [None]:
df_shots_last_5_matches

Unnamed: 0,MATCH_ID,ACTION_ID,XG,PSXG,OUTCOME,DISTANCE,BODY_PART,NOTES,SCA_1_PLAYER_WS_ID,SCA_1_PLAYER_FBREF_EVENT,SCA_2_PLAYER_WS_ID,SCA_2_PLAYER_FBREF_EVENT,TEAM_FBREF_ID,PERIOD_ID,NEW_TIME_SECONDS,START_X,END_X,START_Y,END_Y,PLAYER_WS_ID,RESULT_ID,TEAM_NAME,GAMEWEEK,COMPETITION,SEASON,COMPETITION_ACRONYM,OPPO_TEAM_ID,OPPO_TEAM_NAME,PLAYER_FBREF_NAME,norm_start_x,norm_start_y,xG_size,hover_text
0,3e33bd98,797,0.05,,Off Target,23,Right Foot,,247454.0,Pass (Live),326413.0,Shot,18bb7c10,1,3027.0,84.315,105.000,29.036,31.076,238940,0,Arsenal,38,ENG-Premier League,2324,EPL,d3fd31cc,Everton,Thomas Partey,0.702625,0.36295,2.153846,"<span style=""font-size: 20px; line-height: 30p..."
1,3e33bd98,1554,0.04,,Blocked,14,Right Foot,,332325.0,Pass (Live),273257.0,Pass (Live),18bb7c10,2,2455.0,93.975,100.590,45.696,37.196,279379,0,Arsenal,38,ENG-Premier League,2324,EPL,d3fd31cc,Everton,Gabriel Jesus,0.783125,0.57120,2.115385,"<span style=""font-size: 20px; line-height: 30p..."
2,3e33bd98,572,0.03,,Blocked,24,Left Foot,,326413.0,Shot,,,18bb7c10,1,1933.0,82.635,85.050,34.340,34.476,334087,0,Arsenal,38,ENG-Premier League,2324,EPL,d3fd31cc,Everton,Gabriel Magalhães,0.688625,0.42925,2.076923,"<span style=""font-size: 20px; line-height: 30p..."
3,3e33bd98,1322,0.03,,Blocked,29,Right Foot,,247454.0,Pass (Live),326413.0,Pass (Live),18bb7c10,2,1692.0,78.750,82.740,37.400,37.060,113994,0,Arsenal,38,ENG-Premier League,2324,EPL,d3fd31cc,Everton,Leandro Trossard,0.656250,0.46750,2.076923,"<span style=""font-size: 20px; line-height: 30p..."
4,3e33bd98,1595,0.10,,Off Target,12,Left Foot,,279379.0,Pass (Live),279379.0,Interception,18bb7c10,2,2591.0,94.080,100.275,29.376,35.360,247454,0,Arsenal,38,ENG-Premier League,2324,EPL,d3fd31cc,Everton,Martin Ødegaard,0.784000,0.36720,2.346154,"<span style=""font-size: 20px; line-height: 30p..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
90,22881ea2,328,0.05,0.29,Saved,22,Right Foot,,332325.0,Pass (Live),367185.0,Pass (Live),18bb7c10,1,851.0,84.630,103.740,37.264,34.476,332325,0,Arsenal,34,ENG-Premier League,2324,EPL,8cec06e1,Wolves,Declan Rice,0.705250,0.46580,2.153846,"<span style=""font-size: 20px; line-height: 30p..."
91,22881ea2,1639,0.04,0.12,Goal,8,Left Foot,,326413.0,Pass (Live),332325.0,Pass (Live),18bb7c10,2,2955.0,103.530,105.000,23.664,35.020,247454,1,Arsenal,34,ENG-Premier League,2324,EPL,8cec06e1,Wolves,Martin Ødegaard,0.862750,0.29580,2.115385,"<span style=""font-size: 20px; line-height: 30p..."
92,22881ea2,413,0.08,,Off Target,22,Right Foot,,113994.0,Pass (Live),326413.0,Pass (Live),18bb7c10,1,1150.0,84.840,105.000,35.020,39.440,332325,0,Arsenal,34,ENG-Premier League,2324,EPL,8cec06e1,Wolves,Declan Rice,0.707000,0.43775,2.269231,"<span style=""font-size: 20px; line-height: 30p..."
93,22881ea2,1583,0.03,0.06,Saved,19,Right Foot,,367185.0,Pass (Live),247454.0,Pass (Live),18bb7c10,2,2840.0,88.410,104.055,21.624,32.504,332325,0,Arsenal,34,ENG-Premier League,2324,EPL,8cec06e1,Wolves,Declan Rice,0.736750,0.27030,2.076923,"<span style=""font-size: 20px; line-height: 30p..."


In [None]:
last_GW = df_shots_last_5_matches['GAMEWEEK'].max()

In [None]:
last_match = fetch_data(cursor, "SELECT * FROM MATCHES WHERE GAMEWEEK = ")

38

In [None]:
df_matches

Unnamed: 0,MATCH_ID,DATE_TIME,HOME_TEAM_ID,AWAY_TEAM_ID,COMPETITION,SEASON,STADIUM,GAMEWEEK,DAY,HOME_TEAM_SCORE,AWAY_TEAM_SCORE,HOME_TEAM_XG,AWAY_TEAM_XG,ATTENDANCE,REFEREE,WS_MATCH_ID
0,f60ad969,2024-05-11 15:30:00,32f3ee20,f0ac8ee6,GER-Bundesliga,2324,Stadion im Borussia-Park,33,Sat,1,1,0.4,1.5,54042.0,Robert Schröder,1743683
1,186f2bd2,2024-04-28 15:30:00,32f3ee20,7a41008f,GER-Bundesliga,2324,Stadion im Borussia-Park,31,Sun,0,0,0.9,1.0,53723.0,Sascha Stegemann,1743666
2,4e276e61,2024-05-04 15:30:00,62add3bf,32f3ee20,GER-Bundesliga,2324,Wohninvest Weserstadion,32,Sat,2,2,1.4,1.9,42100.0,Timo Gerach,1743676
3,b0db8bc6,2024-04-20 15:30:00,033ea6b8,32f3ee20,GER-Bundesliga,2324,PreZero Arena,30,Sat,4,3,2.3,1.0,26078.0,Sören Storks,1743658
4,37ed39d6,2024-04-13 15:30:00,32f3ee20,add600ae,GER-Bundesliga,2324,Stadion im Borussia-Park,29,Sat,1,2,1.1,1.3,54042.0,Florian Badstübner,1743647
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5397,006fb5b5,2024-01-20 12:30:00,18bb7c10,47c64c55,ENG-Premier League,2324,Emirates Stadium,21,Sat,5,0,3.3,0.3,60284.0,Paul Tierney,1729533
5398,9a03e25c,2024-01-21 16:30:00,4ba7cbea,822bd0ba,ENG-Premier League,2324,Vitality Stadium,21,Sun,0,4,1.4,1.5,11228.0,Andy Madley,1729532
5399,17d24e01,2024-01-30 19:45:00,fd962109,d3fd31cc,ENG-Premier League,2324,Craven Cottage,22,Tue,0,0,1.6,1.9,24376.0,Thomas Bramall,1729452
5400,381f12be,2024-01-30 19:45:00,e297cd13,d07537b9,ENG-Premier League,2324,Kenilworth Road Stadium,22,Tue,4,0,3.1,0.9,10994.0,Robert Jones,1729453


In [None]:
last5_GWs

Unnamed: 0,GAMEWEEK
11365,34
2665,35
2533,36
2512,37
44,38


In [None]:
df_shots_last_5_matches['OUTCOME'].value_counts()

Blocked       35
Off Target    27
Saved         21
Goal          10
Woodwork       2
Name: OUTCOME, dtype: int64

In [None]:
df_shots_last_5_matches['norm_start_x'] = df_shots_last_5_matches['START_X'] / 120
df_shots_last_5_matches['norm_start_y'] = df_shots_last_5_matches['START_Y'] / 80

In [None]:
max_size = 5
min_size = 2

In [None]:
xg_scaled = (df_shots_last_5_matches['XG'] - df_shots_last_5_matches['XG'].min()) / (df_shots_last_5_matches['XG'].max() - df_shots_last_5_matches['XG'].min())
df_shots_last_5_matches['xG_size'] = xg_scaled * (max_size - min_size) + min_size

In [None]:
summary_data = df_shots_last_5_matches['OUTCOME'].value_counts().reset_index()
summary_data.columns = ['Outcome', 'Count']

In [None]:
total_xg = df_shots_last_5_matches['XG'].sum()
Goals = df_shots_last_5_matches['OUTCOME'].value_counts()['Goal']
Attempts = df_shots_last_5_matches.shape[0]
with_feet = df_shots_last_5_matches['BODY_PART'].str.contains('Foot', na=False).sum()
with_head = df_shots_last_5_matches['BODY_PART'].value_counts()['Head']
direct_set_pieces = df_shots_last_5_matches['NOTES'].str.contains('Free kick', na=False).sum()

In [None]:
total_xg, Goals, Attempts, with_feet, with_head, direct_set_pieces

(10.13, 10, 95, 78, 17, 1)

In [None]:
import plotly.graph_objects as go


df_shots_last_5_matches['hover_text'] = (
    '<span style="font-size: 20px; line-height: 30px;"><b>' + df_shots_last_5_matches['PLAYER_FBREF_NAME'].astype(str) + '</b></span>' +
    '<span style="font-size: 12px; line-height: 22px;">  vs ' + df_shots_last_5_matches['OPPO_TEAM_NAME'].astype(str) + '</span>'
    '<br><span style="font-size: 15px; line-height: 18px;">Shot - ' + df_shots_last_5_matches['OUTCOME'].astype(str) + '</span>' +
    '<br><span style="font-size: 15px; line-height: 18px;">Shot xG - ' + df_shots_last_5_matches['XG'].astype(str) + '</span>' +
    '<br><span style="font-size: 15px; line-height: 18px;">Game time - ' + ((df_shots_last_5_matches['NEW_TIME_SECONDS']//60 + 1).astype(int)).astype(str) + "'"+ 
    '<span style="font-size: 12px; line-height: 22px;">  Period/Half: ' + df_shots_last_5_matches['PERIOD_ID'].astype(str) + '</span>'
)

outcome_colors = {'Goal': 'forestgreen', 'Saved': 'red', 'Blocked': 'orange', 'Off Target': 'red', 
                  'Woodwork': 'purple', 'Saved off Target': 'red'}
outcome_markers = {'Goal': 'o', 'Saved': 'o', 'Blocked': 'X', 'Off Target': 'X',  'Woodwork': 'h',
                   'Saved off Target': 'X'}
outcome_alpha = {'Goal': .8, 'Saved': 0.6, 'Blocked': 0.6, 'Off Target': 0.5,  'Woodwork': 0.7,
                 'Saved off Target': 0.5}

# Create a Plotly figure
fig = go.Figure()

# Add the pitch as a background image
fig.add_layout_image(
    dict(
        source="https://i.imgur.com/L0GXGh5.png",  # Path to your pitch background image
        xref="x",
        yref="y",
        x=0.,
        y=110,
        sizex=60,
        sizey=95,
        # sizing="stretch",
        opacity=1.0,
        layer="below")
)

# Define marker symbols in Plotly's format
plotly_symbols = {
    'Goal': 'circle',
    'Saved': 'circle',
    'Blocked': 'x',
    'Off Target': 'x',
    'Woodwork': 'hexagon',
    'Saved off Target': 'x'
}

# header_values = []  # No header values, since we're incorporating the title directly in the cell
header_values = [
    ['<span style="font-size: 10px; color: grey;">EXPECTED GOALS</span>', 
     '<span style="font-size: 10px; color: grey;">GOALS</span>',
     '<span style="font-size: 10px; color: grey;">ATTEMPTS</span>',
     '<span style="font-size: 10px; color: grey;">WITH FEET</span>', 
     '<span style="font-size: 10px; color: grey;">WITH HEAD</span>',
     '<span style="font-size: 10px; color: grey;">DIRECT SET PIECES</span>',
     # ... add other outcomes as needed
    '<span style="font-size: 22px; color: white;"><b>total_xg</b></span>',
     '<span style="font-size: 22px; color: white;"><b>Goals</b></span>',
     '<span style="font-size: 22px; color: white;"><b>Attempts</b></span>',
     '<span style="font-size: 22px; color: white;"><b>with_feet</b></span>',
     '<span style="font-size: 22px; color: white;"><b>with_head</b></span>',
     '<span style="font-size: 22px; color: white;"><b>direct_set_pieces</b></span>',
    ]
]

# Add scatter plots for each outcome type
for outcome, df_group in df_shots_last_5_matches.groupby('OUTCOME'):
    fig.add_trace(go.Scatter(
        x=(df_group['START_Y'] - 68)*-0.8825,  # Assuming START_Y is the horizontal axis in your pitch image
        y=df_group['START_X']+0.25,  # Assuming START_X is the vertical axis in your pitch image
        mode='markers',
        marker=dict(
            color=outcome_colors[outcome],
            symbol=plotly_symbols[outcome],
            size=df_group['xG_size']*5,  # Adjust size scaling factor as needed
            opacity=df_group['OUTCOME'].map(outcome_alpha)
        ),
        name=outcome,
        text=df_group['hover_text'],  # This will be displayed on hover
        hoverinfo='text',
        # hovertemplate='<b>%{text}</b>'  # Custom hover template for cleaner text display
    ))

# Set axes to match the background image
fig.update_xaxes(showgrid=False, zeroline=False, visible=False, range=[0, 68])
fig.update_yaxes(showgrid=False, zeroline=False, visible=False, range=[52.5, 110])

# Remove the plot background color
fig.update_layout(plot_bgcolor='rgba(0,0,0,0)',
                    font=dict(
                            family="Calibri, sans-serif",  # Set the font to Calibri, with sans-serif as a fallback
                            size=12,  # You can adjust the base size as needed
                            color="black"  # And also set a global font color if you wish
                        ), width=1150,height=700)

fig.add_trace(go.Table(
    header=dict(
        values=[
            ["" ,"<b>TOTAL xG</b>", "", "<b>GOALS</b>", "", "<b>ATTEMPTS</b>", "", "<b>W/ FEET</b>", "", "<b>W/ HEAD</b>", "","<b>DIRECT FKs</b>"],
            ["", f"{round(total_xg, 1)}", "", f"{Goals}", "", f"{Attempts}", "", f"{with_feet}", "", f"{with_head}", "", f"{direct_set_pieces}"]
        ],
        line_color='#2B2B2B',
        fill_color='#2B2B2B',
        align=['right', 'left'],  # Align each column differently if needed
        font=dict(color=['grey', 'white'], size=[18, 28])  # Specify different styles for each column
    ),
    domain=dict(x=[0.835, 1], y=[0, 1])
))

fig.update_layout(
    paper_bgcolor="#2B2B2B",
    plot_bgcolor="#2B2B2B",
    margin=dict(l=0, r=22, t=0, b=0),
    legend=dict(
        orientation="h",  # Set the legend orientation to horizontal
        yanchor="bottom",
        y=-0.03,  # Negative value to move the legend below the plot
        xanchor="center",
        x=0.45,
        font=dict(  # Update the font size
            size=25,  # Example size, adjust as needed
            color="white"  # Set the text color to white for better contrast on a dark background
        ),
        bgcolor="#2B2B2B",  # Set the background color of the legend
        itemsizing='constant'  # Use the same size for all legend markers
    ),
    title={
            'text': 'SHOT MAP',
            'y':0.97,  # Sets the y position of the title (1 is the top of the figure)
            'x':0.1,  # Centers the title horizontally (0.5 is the center of the figure)
            'xanchor': 'center',  # Ensures the title is centered at the x position
            'yanchor': 'top',  # Ensures the title is at the top of the y position
            'font': dict(
                family="Roboto",  # Specify the font family
                size=23,                     # Specify the font size
                color="gold"                # Specify the font color
            )
        }
)

fig.show()

In [None]:
df_last_5_matches_filt = df_last_5_matches_filt.sort_values(by='GAMEWEEK')[-5:]

NameError: name 'df_last_5_matches_filt' is not defined

In [None]:
df_shots['BODY_PART'].value_counts()

BODY_PART
Right Foot    67883
Left Foot     44304
Head          24200
Other           581
Name: count, dtype: int64

In [None]:
df_shots['NOTES'].value_counts()

NOTES
Volley                  16621
Free kick                4456
Deflected                3126
Deflected, Volley         287
Free kick, Deflected       95
nan                         6
Free kick, Volley           1
Name: count, dtype: int64

In [None]:
df_shots['OUTCOME'].value_counts()

OUTCOME
Off Target          50859
Blocked             36732
Saved               31854
Goal                14728
Woodwork             2658
Saved off Target      137
Name: count, dtype: int64

In [None]:
df_shots = fetch_data(cursor, 'SELECT * FROM SHOT_EVENTS')
df_shots = df_shots.merge(df_events[['MATCH_ID', 'ACTION_ID', 'TEAM_FBREF_ID']], on=['MATCH_ID', 'ACTION_ID'])
df_shots = df_shots.merge(team_names[['TEAM_NAME', 'TEAM_FBREF_ID']], on="TEAM_FBREF_ID")

inds1 = df_shots[df_shots['SCA_1_PLAYER_FBREF_EVENT'].isin(['Pass (Dead)'])].index
inds2 = df_shots[df_shots['SCA_2_PLAYER_FBREF_EVENT'].isin(['Pass (Dead)'])].index

inds_union = np.union1d(inds1, inds2)
df_shots_set_piece = df_shots.loc[inds_union]

df_shots_set_piece = df_events[['MATCH_ID','ACTION_ID']].merge(df_shots_set_piece, on=['MATCH_ID', 'ACTION_ID'])
df_shots_set_piece.loc[:, 'OPPO_TEAM_ID'] = "NaN"
df_shots_set_piece = specify_oppo_team_id_to_events(df_shots_set_piece, df_matches)
df_shots_set_piece = df_shots_set_piece.merge(team_names_2, on="OPPO_TEAM_ID")
df_shots_set_piece = df_shots_set_piece.merge(df_matches[['MATCH_ID','COMPETITION','SEASON']], on='MATCH_ID', how='left')

In [None]:
shots_xg_conceded_from_set_pieces_grouped = df_shots_set_piece[['OPPO_TEAM_NAME','COMPETITION','SEASON', 
                                                                'XG']].groupby(['OPPO_TEAM_NAME','COMPETITION','SEASON']).sum()

In [None]:
shots_xg_from_set_pieces_grouped = df_shots_set_piece[['TEAM_NAME','COMPETITION','SEASON', 
                                                    'XG']].groupby(['TEAM_NAME','COMPETITION','SEASON']).sum()

In [None]:
shots_xg_conceded_from_set_pieces_grouped = team_name_matches_played.merge(shots_xg_conceded_from_set_pieces_grouped.reset_index().rename(
    {'OPPO_TEAM_NAME':'TEAM_NAME'}, axis=1),on=['TEAM_NAME','COMPETITION','SEASON'])

In [None]:
shots_xg_from_set_pieces_grouped = team_name_matches_played.merge(shots_xg_from_set_pieces_grouped.reset_index(),
                                                                  on=['TEAM_NAME','COMPETITION','SEASON'])

In [None]:
shots_xg_conceded_from_set_pieces_grouped['OPPOSITION XG FROM SET PIECE CROSSES PER GAME'] = shots_xg_conceded_from_set_pieces_grouped['XG'] / \
                                                                        shots_xg_conceded_from_set_pieces_grouped['MATCHES_PLAYED']

In [None]:
shots_xg_from_set_pieces_grouped['XG FROM SET PIECE CROSSES PER GAME'] = shots_xg_from_set_pieces_grouped['XG'] / \
                                                                        shots_xg_from_set_pieces_grouped['MATCHES_PLAYED']

In [None]:
# shots_xg_from_set_pieces_grouped
# shots_xg_conceded_from_set_pieces_grouped

In [None]:
df_goals_set_piece = df_shots_set_piece[df_shots_set_piece['OUTCOME'] == 'Goal']

In [None]:
df_goals_set_piece_chart = df_goals_set_piece.groupby(['TEAM_NAME','COMPETITION','SEASON']).count()['MATCH_ID']

In [None]:
df_goals_set_piece_chart = df_goals_set_piece_chart.reset_index()
df_goals_set_piece_chart = df_goals_set_piece_chart.rename({'MATCH_ID':'SET PIECE GOALS SCORED'}, axis= 1)

In [None]:
df_goals_set_piece_conc = df_goals_set_piece.groupby(['OPPO_TEAM_NAME','COMPETITION','SEASON']).count()['MATCH_ID']
df_goals_set_piece_conc = df_goals_set_piece_conc.reset_index()
df_goals_set_piece_conc = df_goals_set_piece_conc.rename({'MATCH_ID':'SET PIECE GOALS CONCEDED',
                                                            'OPPO_TEAM_NAME':'TEAM_NAME'}, axis= 1)

In [None]:
df_goals_set_piece_chart = df_goals_set_piece_chart.merge(df_goals_set_piece_conc, on=['TEAM_NAME', 'COMPETITION', 'SEASON'], how='outer')

In [None]:
df_goals_set_piece_chart = df_goals_set_piece_chart.fillna(0)

In [None]:
df_goals_set_piece_chart = df_goals_set_piece_chart.merge(team_names[['TEAM_NAME', 'TEAM_LOGO_URL']], on='TEAM_NAME')
df_goals_set_piece_chart = df_goals_set_piece_chart.merge(df_competitions, on=['COMPETITION', 'SEASON'])

In [None]:
create_table_query = """
CREATE TABLE IF NOT EXISTS GOALS_FROM_SET_PIECES (
    SEASON INT,
    COMPETITION_ACRONYM VARCHAR(255),
    TEAM_NAME VARCHAR(255),
    TEAM_LOGO_URL VARCHAR(255),
    SET_PIECE_GOALS_SCORED FLOAT,
    SET_PIECE_GOALS_CONCEDED FLOAT,
    PRIMARY KEY (SEASON, COMPETITION_ACRONYM, TEAM_NAME)
);
"""

In [None]:
cur.execute(create_table_query)

<snowflake.connector.cursor.SnowflakeCursor at 0x196ad0e9290>

In [None]:
df_goals_set_piece_chart.columns = df_goals_set_piece_chart.columns.str.replace(" ", "_")

In [None]:
df_goals_set_piece_chart = df_goals_set_piece_chart.drop('COMPETITION', axis=1)

In [None]:
upsert_to_snowflake("GOALS_FROM_SET_PIECES", df_goals_set_piece_chart, ['SEASON', 'COMPETITION_ACRONYM', 'TEAM_NAME'])

Upsert completed. 292 rows processed.


In [None]:
df_goals_set_piece_chart = fetch_data(cursor, 'SELECT * FROM GEGENSTATS.RADAR_CHARTS.GOALS_FROM_SET_PIECES')

In [None]:
df_goals_set_piece_chart.rename(columns={'SET_PIECE_GOALS_SCORED':'SET PIECE GOALS SCORED',
                                'SET_PIECE_GOALS_CONCEDED':'SET PIECE GOALS CONCEDED'}, inplace=True)

In [None]:
filt_df_goals_set_piece_chart = df_goals_set_piece_chart[df_goals_set_piece_chart['SEASON'] == season_selected]
filt_df_goals_set_piece_chart = filt_df_goals_set_piece_chart[filt_df_goals_set_piece_chart['COMPETITION_ACRONYM'] == league_selected]

In [None]:
create_FM_team_scatter_chart(filt_df_goals_set_piece_chart, 'GOALS FROM SET PIECES', "AC Milan", 
                             'SET PIECE GOALS CONCEDED', 'SET PIECE GOALS SCORED', 0.9, 0, 16, 0, 17, 
                                                        "Low no. of set piece goals<br>Low no. of set piece goals conceded", 
                                                        "Low no. of set piece goals<br>High no. of set piece goals conceded",
                                                        "High no. of set piece goals<br>Low no. of set piece goals conceded",
                                                        "High no. of set piece goals<br>High no. of set piece goals conceded",
                                                        "orange", "red", "green", "orange")
