In [None]:
import json
import re
import pandas as pd
import os
import matplotlib.font_manager as font_manager
from mplsoccer import VerticalPitch

In [None]:
#Import any font you want from your pc
## Create a font properties object with the font file

font_path_regular = '/Users/alfonsomarino/Desktop/Teko/static/Teko-Regular.ttf'
font_normal = font_manager.FontProperties(fname=font_path_regular)

font_path_med = "/Users/alfonsomarino/Desktop/Teko/static/Teko-Medium.ttf"
font_med = font_manager.FontProperties(fname = font_path_med)

font_path_semi = "/Users/alfonsomarino/Desktop/Teko/static/Teko-SemiBold.ttf"
font_semi = font_manager.FontProperties(fname = font_path_semi)

In [None]:
cartella = '/Users/alfonsomarino/Desktop/partite/partite arias'

lista_file = [os.path.join(cartella, f) for f in os.listdir(cartella) if os.path.isfile(os.path.join(cartella, f))]

## Scraping HTML file

In [None]:
def extract_json_from_html(html_path, save_output=False):
    html_file = open(html_path, 'r')
    html = html_file.read()
    html_file.close()
    regex_pattern = r'(?<=require\.config\.params\["args"\].=.)[\s\S]*?;'
    data_txt = re.findall(regex_pattern, html)[0]

    # add quotations for json parser
    data_txt = data_txt.replace('matchId', '"matchId"')
    data_txt = data_txt.replace('matchCentreData', '"matchCentreData"')
    data_txt = data_txt.replace('matchCentreEventTypeJson', '"matchCentreEventTypeJson"')
    data_txt = data_txt.replace('formationIdNameMappings', '"formationIdNameMappings"')
    data_txt = data_txt.replace('};', '}')

    if save_output:
        # save json data to txt
        output_file = open(f"{html_path}.txt", "wt")
        n = output_file.write(data_txt)
        output_file.close()

    return data_txt

In [None]:
def extract_data_from_dict(data):
    # load data from json
    event_types_json = data["matchCentreEventTypeJson"]
    formation_mappings = data["formationIdNameMappings"]
    events_dict = data["matchCentreData"]["events"]
    teams_dict = {data["matchCentreData"]['home']['teamId']: data["matchCentreData"]['home']['name'],
                  data["matchCentreData"]['away']['teamId']: data["matchCentreData"]['away']['name']}
    players_dict = data["matchCentreData"]["playerIdNameDictionary"]
    # create players dataframe
    players_home_df = pd.DataFrame(data["matchCentreData"]['home']['players'])
    players_home_df["teamId"] = data["matchCentreData"]['home']['teamId']
    players_away_df = pd.DataFrame(data["matchCentreData"]['away']['players'])
    players_away_df["teamId"] = data["matchCentreData"]['away']['teamId']
    players_df = pd.concat([players_home_df, players_away_df])
    players_ids = data["matchCentreData"]["playerIdNameDictionary"]
    return events_dict, players_df, teams_dict

In [None]:
def process_multiple_files(html_file_paths):
    all_events = []
    all_players = []
    all_teams = {}

    for match_html_path in html_file_paths:
        # Estrai i dati JSON dal file HTML
        json_data_txt = extract_json_from_html(match_html_path)
        data = json.loads(json_data_txt)

        # Estrai i dati dagli eventi, giocatori e squadre
        events_dict, players_df, teams_dict = extract_data_from_dict(data)

        # Aggiungi i dati estratti all'insieme globale
        all_events.extend(events_dict)  # Unisce tutti gli eventi
        all_players.append(players_df)  # Lista dei DataFrame dei giocatori
        all_teams.update(teams_dict)    # Aggiorna il dizionario delle squadre

    # Combina i DataFrame dei giocatori in un unico DataFrame
    combined_players_df = pd.concat(all_players, ignore_index=True)

    return all_events, combined_players_df, all_teams

all_events, combined_players_df, all_teams = process_multiple_files(lista_file)

In [None]:
def get_passes_df(events_dict):
    df = pd.DataFrame(events_dict)
    df['eventType'] = df.apply(lambda row: row['type']['displayName'], axis=1)
    df['outcomeType'] = df.apply(lambda row: row['outcomeType']['displayName'], axis=1)
    df_touch = df[["id","minute", "x", "y", "endX", "endY", "teamId", "playerId", "isTouch", "eventType", "outcomeType"]]

    return df_touch

In [None]:
df_touch = get_passes_df(all_events)
df = df_touch[df_touch["playerId"] == 424039] #whoscored ID
df

In [None]:
df["eventType"].unique()

In [None]:
df = df[df["isTouch"] == True]

## Visualization

In [None]:
from matplotlib.colors import LinearSegmentedColormap

almond_cmap = LinearSegmentedColormap.from_list("Almond - 100 colors",
                                                  ['blanchedalmond', 'darkred'], N =20)

In [None]:
from matplotlib.patches import RegularPolygon
from highlight_text import fig_text, ax_text
import matplotlib.patheffects as path_effects
import os
import matplotlib.image as image
import matplotlib.pyplot as plt

path_eff = [path_effects.Stroke(linewidth=2, foreground='black'),
            path_effects.Normal()]

def getImage(path):
   return OffsetImage(plt.imread(path), zoom=0.7)

pitch = VerticalPitch(pitch_type='opta', corner_arcs=True, pitch_color = "blanchedalmond",
                      line_color="white", line_alpha=1, line_zorder = 2)

fig, axs = pitch.grid(figheight=13, title_height=0.08, endnote_space=0, title_space=0,
                      axis=False,
                      grid_height=0.82, endnote_height=0.03)
fig.patch.set_facecolor('blanchedalmond')
#ax.set_facecolor('blanchedalmond')

bin_statistic = pitch.bin_statistic_positional(df["x"], df["y"], statistic='count',
                                               positional='full', normalize=True)

pitch.heatmap_positional(bin_statistic, ax=axs['pitch'],
                         cmap=almond_cmap, edgecolors='None')

pitch.scatter(df["x"], df["y"], s= 80, ax = axs["pitch"], c= "darkgreen", alpha=.2, zorder = 2)

labels = pitch.label_heatmap(bin_statistic, color='#f4edf0', fontsize=20,
                             ax=axs['pitch'], ha='center', va='center',
                             str_format='{:.0%}', path_effects=path_eff, fontproperties=font_semi)


#1, 0.5
axs['endnote'].text(1, -0.8, 'Dati WhoScored  |  @AlfoMarino0975', va='center', ha='right', fontsize=15, 
                    color = "grey", fontproperties = font_normal)

axs['endnote'].text(0.225, -0.8, "Each dot represents a touch", color='#003153',
                  va='center', ha='center', fontsize=18, fontproperties = font_normal)

# Aggiungi sottotitolo per il primo campo
axs['pitch'].text(0.5, 1.03, "Touches (p90): 68.8  |  Fouls won: 13  |  Pass accuracy: 89.2%", va='top', ha='center', fontsize=25, 
                     fontproperties = font_normal, color='#003153', transform=axs['pitch'].transAxes)

# Add arrow
#axs['pitch'].annotate('', xy=(10, 25), xytext=(10, 5),
#                         arrowprops=dict(edgecolor='grey', arrowstyle='->', lw=2.5))


axs['title'].text(0.5, 0.8, "Arias: the heartbeat of Flu’s game", color='darkred',
                  va='center', ha='center', fontsize=42, fontproperties = font_semi)


logo = plt.imread("/Users/alfonsomarino/Desktop/Progetti/IMG_2978.PNG")
logo_ax = fig.add_axes([0.65, -0.01, 0.4, 0.12])
logo_ax.imshow(logo)
logo_ax.axis('off')

output_path = "/Users/alfonsomarino/Desktop/heatmap_touch2.png"
fig.savefig(output_path, dpi=300, bbox_inches='tight', facecolor=fig.get_facecolor())
