In [2]:
import requests
import pandas as pd
from datetime import datetime

In [3]:
def get_match_json(match_id):
    response = requests.get(f'https://www.elgrafico.com.ar/data/html/v3/htmlCenter/data/deportes/futbol/primeraa/events/{match_id}.json?t=28604035')
    json_data = response.json()
    return json_data

In [4]:
def get_match_details(match_json):
    match_info = match_json['match']
    venue_info = match_json['venueInformation']['venue']
    stadium_info = venue_info['stadium']
    
    original_date = match_info.get('date')
    transformed_date = datetime.strptime(original_date, "%Y%m%d").strftime("%d-%m-%Y")
    
    match_details = {
        'date': transformed_date,
        'scheduled_start': match_info.get('scheduledStart'),
        'home_team': match_info.get('homeTeamName'),
        'away_team': match_info.get('awayTeamName'),
        'competition': match_info.get('competition'),
        'stadium_name': stadium_info.get('stadiumName'),
    }

    match_details_df = pd.DataFrame([match_details])
    return match_details_df

In [5]:
def get_players(match_json):
    players_data = match_json['players']
    player_list = []
    
    for player_id, player_info in players_data.items():
        player = {
            'player_id': player_id,
            'team_id': player_info.get('teamId'),
            'pos_id': player_info.get('posnId'),
            'no' : player_info.get('squadNo'),
            'name': player_info.get('name', {}).get('shortName'),
            'substitute': player_info.get('substitute')
        }
        player_list.append(player)
    
    player_df = pd.DataFrame(player_list)
    return player_df

In [6]:
def get_teams(match_json):
    home_team_id = match_json['match']['homeTeamId']
    home_team_name = match_json['match']['homeTeamName']
    
    away_team_id = match_json['match']['awayTeamId']
    away_team_name = match_json['match']['awayTeamName']
    
    df_teams = pd.DataFrame({
        'team_id': [home_team_id, away_team_id],
        'team_name': [home_team_name, away_team_name]
    })
    
    return df_teams

In [7]:
def get_goals(match_json):
    goals_data = match_json['incidences']['goals']
    goals_list = []
    
    for goal_id, goal_info in goals_data.items():
        goal = {
            'type': 'goal',
            'half': goal_info['t'].get('half'),
            'minute': goal_info['t'].get('m'),
            'second': goal_info['t'].get('s'),
            'team': goal_info.get('team'),
            'player_id': goal_info.get('plyrId'),
            'receiver_id': goal_info.get('assBy'),
            'coord_1_x': goal_info['coord']['1'].get('x'),
            'coord_1_y': goal_info['coord']['1'].get('y'),
            'coord_1_z': goal_info['coord']['1'].get('z'),
            'coord_2_x': goal_info['coord']['2'].get('x'),
            'coord_2_y': goal_info['coord']['2'].get('y'),
            'coord_2_z': goal_info['coord']['2'].get('z')
        }
        goals_list.append(goal)

    goals_df = pd.DataFrame(goals_list)
    return goals_df

In [8]:
def get_subs(match_json):
    substitutions_data = match_json['incidences']['substitutions']
    substitutions_list = []
    
    for sub_id, sub_info in substitutions_data.items():
        substitution = {
            'type': 'substitution',
            'half': sub_info['t'].get('half'),
            'minute': sub_info['t'].get('m'),
            'second': sub_info['t'].get('s'),
            'team': sub_info.get('team'),
            # receiver_id is the player coming off
            # This is done to make the columns of the end csv more general
            'receiver_id': sub_info.get('offId'),
            'player_id': sub_info.get('inId')
        }
        substitutions_list.append(substitution)

    substitutions_df = pd.DataFrame(substitutions_list)
    return substitutions_df

In [9]:
def get_clearances(match_json):
    clearances_data = match_json['incidences']['clearances']
    clearances_list = []
    
    for clearance_id, clearance_info in clearances_data.items():
        clearance = {
            'type': 'clearance',
            'half': clearance_info['t'].get('half'),
            'minute': clearance_info['t'].get('m'),
            'second': clearance_info['t'].get('s'),
            'team': clearance_info.get('team'),
            'player_id': clearance_info.get('plyrId')
        }
        clearances_list.append(clearance)
    
    clearances_df = pd.DataFrame(clearances_list)
    return clearances_df

In [10]:
def get_corners(match_json):  
    corner_data = match_json['incidences']['cornerKicks']
    corner_list = []
    
    for corner_id, corner_info in corner_data.items():
        corner = {
            'type': 'corner',
            'half': corner_info['t'].get('half'),
            'minute': corner_info['t'].get('m'),
            'second': corner_info['t'].get('s'),
            'team': corner_info.get('team'),
            'player_id': corner_info.get('plyrId'),
            'coord_1_x': corner_info['coord']['1'].get('x'),
            'coord_1_y': corner_info['coord']['1'].get('y'),
            'coord_1_z': corner_info['coord']['1'].get('z'),
            'coord_2_x': corner_info['coord']['2'].get('x'),
            'coord_2_y': corner_info['coord']['2'].get('y'),
            'coord_2_z': corner_info['coord']['2'].get('z')
        }
        corner_list.append(corner)

    df_corners = pd.DataFrame(corner_list)
    return df_corners

In [11]:
def get_correct_passes(match_json): 
    correct_passes_data = match_json['incidences']['correctPasses']
    correct_passes_list = []
    
    for pass_id, pass_info in correct_passes_data.items():
        correct_pass = {
            'type': 'completed_pass',
            'half': pass_info['t'].get('half'),
            'minute': pass_info['t'].get('m'),
            'second': pass_info['t'].get('s'),
            'team': pass_info.get('team'),
            'receiver_id': pass_info.get('recvId'),
            'player_id': pass_info.get('plyrId'),
            'coord_1_x': pass_info.get('coord', {}).get('1', {}).get('x'),
            'coord_1_y': pass_info.get('coord', {}).get('1', {}).get('y'),
            'coord_1_z': pass_info.get('coord', {}).get('1', {}).get('z'),
            'coord_2_x': pass_info.get('coord', {}).get('2', {}).get('x'),
            'coord_2_y': pass_info.get('coord', {}).get('2', {}).get('y'),
            'coord_2_z': pass_info.get('coord', {}).get('2', {}).get('z')
        }
        correct_passes_list.append(correct_pass)
    
    correct_pass_df = pd.DataFrame(correct_passes_list)
    return correct_pass_df

In [12]:
def get_incorrect_passes(match_json): 
    incorrect_passes_data = match_json['incidences']['incorrectPasses']
    incorrect_passes_list = []
    
    for pass_id, pass_info in incorrect_passes_data.items():
        incorrect_pass = {
            'type': 'incomplete_pass',
            'half': pass_info['t'].get('half'),
            'minute': pass_info['t'].get('m'),
            'second': pass_info['t'].get('s'),
            'team': pass_info.get('team'),
            'player_id': pass_info.get('plyrId'),
            'coord_1_x': pass_info.get('coord', {}).get('1', {}).get('x'),
            'coord_1_y': pass_info.get('coord', {}).get('1', {}).get('y'),
            'coord_1_z': pass_info.get('coord', {}).get('1', {}).get('z'),
            'coord_2_x': pass_info.get('coord', {}).get('2', {}).get('x'),
            'coord_2_y': pass_info.get('coord', {}).get('2', {}).get('y'),
            'coord_2_z': pass_info.get('coord', {}).get('2', {}).get('z')
        }
        incorrect_passes_list.append(incorrect_pass)
    
    incorrect_pass_df = pd.DataFrame(incorrect_passes_list)
    return incorrect_pass_df

In [13]:
def get_offsides(match_json):
    offsides_data = match_json['incidences']['offsides']
    offsides_list = []
    
    for offside_id, offside_info in offsides_data.items():
        offside = {
            'type': 'offside',
            'half': offside_info['t'].get('half'),
            'minute': offside_info['t'].get('m'),
            'second': offside_info['t'].get('s'),
            'team': offside_info.get('team'),
            'player_id': offside_info.get('plyrId'),
            'coord_1_x': offside_info['coord']['1'].get('x'),
            'coord_1_y': offside_info['coord']['1'].get('y'),
            'coord_1_z': offside_info['coord']['1'].get('z')
        }
        offsides_list.append(offside)
    
    offsides_df = pd.DataFrame(offsides_list)
    return offsides_df

In [14]:
def get_shots(match_json):
    shots_data = match_json['incidences']['shots']
    shots_list = []
    
    for shot_id, shot_info in shots_data.items():
        shot = {
            'type': 'shot',
            'half': shot_info['t'].get('half'),
            'minute': shot_info['t'].get('m'),
            'second': shot_info['t'].get('s'),
            'team': shot_info.get('team'),
            # receiver_id corresponds to the goalie who makes the save
            # Done to have column names be more general
            'receiver_id': shot_info.get('ctchBy'),
            'player_id': shot_info.get('plyrId'),
            'coord_1_x': shot_info['coord']['1'].get('x'),
            'coord_1_y': shot_info['coord']['1'].get('y'),
            'coord_1_z': shot_info['coord']['1'].get('z'),
            'coord_2_x': shot_info['coord']['2'].get('x'),
            'coord_2_y': shot_info['coord']['2'].get('y'),
            'coord_2_z': shot_info['coord']['2'].get('z')
        }
        shots_list.append(shot)
    
    shots_df = pd.DataFrame(shots_list)
    return shots_df

In [15]:
def get_steals(match_json):
    stealings_data = match_json['incidences']['stealings']
    stealings_list = []
    
    for stealing_id, stealing_info in stealings_data.items():
        stealing = {
            'type': 'steal',
            'half': stealing_info['t'].get('half'),
            'minute': stealing_info['t'].get('m'),
            'second': stealing_info['t'].get('s'),
            'team': stealing_info.get('team'),
            'player_id': stealing_info.get('plyrId'),
            'coord_1_x': stealing_info.get('coord', {}).get('1', {}).get('x'),
            'coord_1_y': stealing_info.get('coord', {}).get('1', {}).get('y'),
            'coord_1_z': stealing_info.get('coord', {}).get('1', {}).get('z')
        }
        stealings_list.append(stealing)
    
    steals_df = pd.DataFrame(stealings_list)
    return steals_df

In [16]:
def get_cards(match_json):
    yellow_cards_data = match_json['incidences']['yellowCards']
    yellow_cards_list = []
    
    for card_id, card_info in yellow_cards_data.items():
        card = {
            'type': 'yellow',
            'half': card_info['t'].get('half'),
            'minute': card_info['t'].get('m'),
            'second': card_info['t'].get('s'),
            'team': card_info.get('team'),
            'reason': card_info.get('reason'),
            'player_id': card_info.get('plyrId')
        }
        yellow_cards_list.append(card)
    
    red_cards_data = match_json['incidences']['redCards']
    red_cards_list = []
    
    for card_id, card_info in red_cards_data.items():
        card = {
            'type': 'red',
            'half': card_info['t'].get('half'),
            'minute': card_info['t'].get('m'),
            'second': card_info['t'].get('s'),
            'team': card_info.get('team'),
            'reason': card_info.get('reason'),
            'player_id': card_info.get('plyrId')
        }
        red_cards_list.append(card)
    
    cards_list = yellow_cards_list + red_cards_list
    
    cards_df = pd.DataFrame(cards_list)
    return cards_df

In [17]:
def get_throws(match_json):
    throw_ins_data = match_json['incidences']['throwIn']
    throw_ins_list = []
    
    for throw_in_id, throw_in_info in throw_ins_data.items():
        throw_in = {
            'type': 'throw_in',
            'half': throw_in_info['t'].get('half'),
            'minute': throw_in_info['t'].get('m'),
            'second': throw_in_info['t'].get('s'),
            'team': throw_in_info.get('team'),
            'player_id': throw_in_info.get('plyrId'),
            'coord_1_x': throw_in_info['coord']['1'].get('x'),
            'coord_1_y': throw_in_info['coord']['1'].get('y'),
            'coord_1_z': throw_in_info['coord']['1'].get('z'),
            'coord_2_x': throw_in_info['coord']['2'].get('x'),
            'coord_2_y': throw_in_info['coord']['2'].get('y'),
            'coord_2_z': throw_in_info['coord']['2'].get('z')
        }
        throw_ins_list.append(throw_in)
    
    throw_ins_df = pd.DataFrame(throw_ins_list)
    return throw_ins_df


In [18]:
def get_goal_kicks(match_json):
    goal_kicks_data = match_json['incidences']['goalkick']
    goal_kicks_list = []
    
    for goal_kick_id, goal_kick_info in goal_kicks_data.items():
        goal_kick = {
            'type': 'goal_kick',
            'half': goal_kick_info['t'].get('half'),
            'minute': goal_kick_info['t'].get('m'),
            'second': goal_kick_info['t'].get('s'),
            'team': goal_kick_info.get('team'),
            'player_id': goal_kick_info.get('plyrId')
        }
        goal_kicks_list.append(goal_kick)
    
    goal_kicks_df = pd.DataFrame(goal_kicks_list)
    return goal_kicks_df

In [19]:
def get_var(match_json):
    var_data = match_json['incidences']['var']
    var_list = []
    
    for var_id, var_info in var_data.items():
        var = {
            'type': 'var',
            'half': var_info['t'].get('half'),
            'minute': var_info['t'].get('m'),
            'second': var_info['t'].get('s'),
            'team': var_info.get('team'),
            'reason': var_info.get('reason')
        }
        var_list.append(var)
    
    var_df = pd.DataFrame(var_list)
    return var_df

In [20]:
def get_fouls(match_json):
    fouls_data = match_json['incidences']['fouls']
    fouls_list = []
    
    for foul_id, foul_info in fouls_data.items():
        foul = {
            'type': 'foul',
            'half': foul_info['t'].get('half'),
            'minute': foul_info['t'].get('m'),
            'second': foul_info['t'].get('s'),
            'team': foul_info.get('team'),
            'receiver_id': foul_info.get('recvId'), 
            'player_id': foul_info.get('plyrId'),
            'coord_1_x': foul_info.get('coord', {}).get('1', {}).get('x', None), 
            'coord_1_y': foul_info.get('coord', {}).get('1', {}).get('y', None),
            'coord_1_z': foul_info.get('coord', {}).get('1', {}).get('z', None)
        }
        fouls_list.append(foul)
    
    fouls_df = pd.DataFrame(fouls_list)
    return fouls_df

In [21]:
def get_match_events(match_id): 
    match_json = get_match_json(match_id)
    df_steals = get_steals(match_json)
    df_fouls = get_fouls(match_json)
    df_shots = get_shots(match_json)
    df_var = get_var(match_json)
    df_goal_kicks = get_goal_kicks(match_json)
    df_throws = get_throws(match_json)
    df_cards = get_cards(match_json)
    df_offsides = get_offsides(match_json)
    df_correct_passes = get_correct_passes(match_json)
    df_incorrect_passes = get_incorrect_passes(match_json)
    df_corners = get_corners(match_json)
    df_clearances = get_clearances(match_json)
    df_goals = get_goals(match_json)
    df_subs = get_subs(match_json)
    df_match_details = get_match_details(match_json)
    
    all_events_df = pd.concat([df_steals, df_fouls, df_shots, df_var, df_goal_kicks, df_throws,
                               df_cards, df_offsides, df_correct_passes, df_incorrect_passes,
                               df_corners, df_clearances, df_goals, df_subs], ignore_index=True)
    
    df_details = pd.concat([df_match_details] * len(all_events_df), ignore_index=True)
    df_match_events = pd.concat([df_details, all_events_df], axis=1)
    
    df_players = get_players(match_json)
    df_teams = get_teams(match_json)
    player_id_to_name = dict(zip(df_players['player_id'].astype(int), df_players['name']))
    team_id_to_name = dict(zip(df_teams['team_id'], df_teams['team_name']))
    
    df_match_events['player_id'] = df_match_events['player_id'].map(player_id_to_name)
    df_match_events['receiver_id'] = df_match_events['receiver_id'].map(player_id_to_name)
    df_match_events['team'] = df_match_events['team'].map(team_id_to_name)
    
    df_match_events.rename(columns={'player_id': 'player', 'receiver_id': 'receiver'}, inplace=True)
    
    return df_match_events

In [22]:
def get_match_csv(match_id, fixture):
    df_events = get_match_events(match_id)
    match_details = get_match_json(match_id)
    match_info = match_details['match']
    home_team = match_info['homeTeamName']
    away_team = match_info['awayTeamName']
    match_date = match_info['date']
    formatted_date = datetime.strptime(match_date, "%Y%m%d").strftime("%d-%m-%Y")

    file_name = f"{formatted_date}_{home_team}_{away_team}.csv"
    df_events.to_csv(f'C:/Users/tpros/PycharmProjects/Soccer/match_files/liga_argentina_2024/fecha_{fixture}/{file_name}', index=False)

In [23]:
get_match_csv(match_id=2643781, fixture=1)

In [24]:
def get_fixture_data(fixture_start, fixture):
    match_list = []
    for i in range(0, 14):
        match_list.append(fixture_start + i)
    
    for match_id in match_list:
        get_match_csv(match_id, fixture)

In [27]:
fixture_start = 2643795
fixture = 3
get_fixture_data(fixture_start, fixture)