### Following dependencies

In [23]:
import pandas as pd
import numpy as np
import json
import os
import shutil
from tqdm.auto import tqdm


In [30]:
class MatchStatsCalculator:

    def __init__(self, path, match_id, home_team, opponent_team):
        self.path = path
        self.home_team = home_team
        self.opponent_team = opponent_team
        self.match_id = match_id
        
        self.events = self.load_events()
        self.event_types = pd.unique(pd.Series([event['type']['primary'] for event in self.events]))
        self.secondary_event_types = pd.unique(pd.Series([sec for event in self.events for sec in event['type']['secondary']]))
        
        self.shots = self.get_events_by_type('shot')
        self.passes = self.get_events_by_type('pass')
        self.fouls = self.get_events_by_type('infraction')
        
        self.matchstats = self.calculate_match_stats()

    def load_events(self):
        return json.load(open(self.path))['events']
    
    def get_events_by_type(self, event_type: str):
        return pd.json_normalize([event for event in self.events if event['type']['primary'] == event_type])
    
    def get_timestamps_periods(self):
        # Belgium-Sweden (match_id: 5414276) was stopped on half-time score: https://www.rbfa.be/en/news/final-result-belgium-sweden-1-1
        if self.match_id == "5414276":
            return {
                'start_1H': '00:00:00.000',
                'end_1H': [event['matchTimestamp'] for event in self.events if event['matchPeriod'] == '1H'][-1]
            }
        else:
            return {
                'start_1H': '00:00:00.000',
                'start_2H': '00:45:00.000',
                'end_1H': [event['matchTimestamp'] for event in self.events if event['matchPeriod'] == '1H'][-1],
                'end_2H': [event['matchTimestamp'] for event in self.events if event['matchPeriod'] == '2H'][-1]
            }
    
    def calculate_match_stats(self):

            stats = {'Performance': {}, 'Attacking': {}, 'Defending': {}, 'Disciplinary': {}}
            teams = [self.home_team, self.opponent_team]

            # Performance
            possessions = {}
            for event in self.events:
                if 'possession' in event and event['possession'] is not None:
                    possession_id = event['possession']['id']
                    if possession_id not in possessions:
                        possessions[possession_id] = {'team': event['possession']['team']['name'],
                                                    'duration': event['possession']['duration']}
                    else:
                        continue
            total_durations = [0, 0]
            for id in possessions:
                if possessions[id]['team'] == self.home_team:
                    total_durations[0] += float(possessions[id]['duration'])
                elif possessions[id]['team'] == self.opponent_team:
                    total_durations[1] += float(possessions[id]['duration'])
            stats['Performance']['Possession (%)'] = [int(total_durations[0] / sum(total_durations)*100),
                                                    int(total_durations[1] / sum(total_durations)*100)]

            passes = self.passes
            pass_attempts = [(passes['team.name'] == team).sum() for team in teams]
            pass_completes = [((passes['team.name'] == team) & (passes['pass.accurate'])).sum() for team in teams]
            stats['Performance']['Passing accuracy (%)'] = [int(pass_completes[0] / pass_attempts[0] * 100),
                                                            int(pass_completes[1] / pass_attempts[1] * 100)]
            stats['Performance']['Passes attempted'] = pass_attempts
            stats['Performance']['Passes completed'] = pass_completes

            # Attacking
            shots = self.shots
            penalty = self.get_events_by_type('penalty')
            interceptions = self.get_events_by_type('interception')
            blocks = [((interceptions['team.name'] == team) & (interceptions['type.secondary'].astype(str).str.contains('shot_block'))).sum() for team in teams]
            
            if 'team.name' not in penalty.columns:
                penalty['team.name'] = pd.NA
                
            if 'shot.isGoal' not in penalty.columns:
                penalty['shot.isGoal'] = pd.NA
                
            if 'shot.onTarget' not in penalty.columns:
                penalty['shot.onTarget'] = pd.NA
            
            stats['Attacking']['Goals'] = [((shots['team.name'] == team) & (shots['shot.isGoal'])).sum() + ((penalty['team.name'] == team) & (penalty['shot.isGoal'])).sum() for team in teams]
            stats['Attacking']['Total attempts'] = [(shots['team.name'] == team).sum() + (penalty['team.name'] == team).sum() for team in teams]
            stats['Attacking']['On target'] = [((shots['team.name'] == team) & (shots['shot.onTarget'])).sum() + ((penalty['team.name'] == team) & (penalty['shot.onTarget'])).sum() for team in teams]
            stats['Attacking']['Off target'] = [((shots['team.name'] == team) & (~shots['shot.onTarget'])).sum() + ((penalty['team.name'] == team) & (~penalty['shot.onTarget'])).sum() for team in teams]
            stats['Attacking']['Blocked'] = [blocks[1], blocks[0]]
            stats['Attacking']['Total attempts'][0] += stats['Attacking']['Blocked'][0]
            stats['Attacking']['Total attempts'][1] += stats['Attacking']['Blocked'][1]
            stats['Attacking']['Corners taken'] = [(self.get_events_by_type('corner')['team.name'] == team).sum() for team in teams]
            stats['Attacking']['Offsides'] = [(self.get_events_by_type('offside')['team.name'] == team).sum() for team in teams]
            
            # Defending
            duels = self.get_events_by_type('duel')
            stats['Defending']['Balls recovered'] = [((duels['team.name'] == team) & (duels['type.secondary'].astype(str).str.contains('recovery'))).sum() for team in teams]
            stats['Defending']['Tackles'] = [((duels['team.name'] == team) & (duels['type.secondary'].astype(str).str.contains('sliding_tackle'))).sum() for team in teams]
            stats['Defending']['Blocks'] = blocks
            stats['Defending']['Clearances completed'] = [(self.get_events_by_type('clearance')['team.name'] == team).sum() for team in teams]

            # Disciplinary
            infractions = self.get_events_by_type('infraction')
            stats['Disciplinary']['Yellow cards'] = [((infractions['team.name'] == team) & (infractions['type.secondary'].astype(str).str.contains('yellow_card'))).sum() for team in teams]
            stats['Disciplinary']['Red cards'] = [((infractions['team.name'] == team) & (infractions['type.secondary'].astype(str).str.contains('red_card'))).sum() for team in teams]
            stats['Disciplinary']['Fouls committed'] = [(infractions['team.name'] == team).sum() for team in teams]
        
            return stats

    def xgoals_by_team(self):
        teams = [self.home_team, self.opponent_team]
        all_shots = pd.concat((self.shots, self.get_events_by_type('penalty'))).sort_values(['matchPeriod', 'minute', 'second'])
        return tuple(np.sum(all_shots['shot.xg'][all_shots['team.name'] == team]) for team in teams)
    
    def get_events_by_player(self, player_name: str):
        return pd.json_normalize([event for event in self.events if event['player']['name'] == player_name])



class OverallMatchStatsCalculator:
    provider = 'wyscout'
    pitch_dims = (100, 100)

    def __init__(self, path, match_id, home_team, opponent_team):
        self.path = path
        self.match_id = match_id
        self.home_team = home_team
        self.opponent_team = opponent_team
        self.events = self.load_events()
        self.event_types = pd.unique([event['type']['primary'] for event in self.events])
        self.shots = self.get_event_df('shot')
        self.passes = self.get_event_df('pass')

    def load_events(self):
        return json.load(open(self.path))['events']

    def get_event_df(self, event_type: str):
        return pd.json_normalize([event for event in self.events if event['type']['primary'] == event_type])



#from match_stats_engines import MatchStatsCalculator, OverallMatchStatsCalculator


match_info = {
    'path': "./input/wyscout/5414302.json",
    'match_id': "414302",
    'home_team': "Denmark",
    'opponent_team': "Slovenia"
}


match_stats = MatchStatsCalculator(**match_info)
overall_stats = OverallMatchStatsCalculator(**match_info)

print(match_stats.calculate_match_stats())
print(overall_stats.get_event_df('shot'))

{'Performance': {'Possession (%)': [61, 38], 'Passing accuracy (%)': [87, 75], 'Passes attempted': [646, 318], 'Passes completed': [564, 241]}, 'Attacking': {'Goals': [2, 0], 'Total attempts': [12, 1], 'On target': [8, 0], 'Off target': [4, 1], 'Blocked': [0, 0], 'Corners taken': [8, 2], 'Offsides': [2, 3]}, 'Defending': {'Balls recovered': [28, 36], 'Tackles': [2, 6], 'Blocks': [0, 0], 'Clearances completed': [5, 7]}, 'Disciplinary': {'Yellow cards': [1, 1], 'Red cards': [0, 0], 'Fouls committed': [13, 13]}}
            id  matchId matchPeriod  minute  second matchTimestamp  \
0   1854206155  5414302          1H       3      39   00:03:39.691   
1   1854206266  5414302          1H       8      18   00:08:18.877   
2   1854206338  5414302          1H      11      26   00:11:26.286   
3   1854206354  5414302          1H      11      53   00:11:53.835   
4   1854206398  5414302          1H      14      27   00:14:27.553   
5   1854206690  5414302          1H      25      32   00:25:32.31

  self.event_types = pd.unique([event['type']['primary'] for event in self.events])


: 

match_info = {
    'path': "./input/wyscout/414302.json",
    'match_id': "414302",
    'home_team': "Denmark",
    'opponent_team': "Slovenia"
}