In [None]:
# pip install mysql-connector-python

In [11]:
import pandas as pd
import numpy as np
import json

# Configuración
import warnings  # Importa el módulo warnings, que maneja los mensajes de advertencia en Python.
warnings.filterwarnings("ignore")  # Configura para ignorar todas las advertencias.
warnings.simplefilter(action='ignore', category=FutureWarning)  # Configura para ignorar las advertencias de tipo FutureWarning, que se utilizan para notificar sobre cambios que pueden ocurrir en versiones futuras de las librerías.
pd.set_option('display.max_columns', None)  # Configura pandas para mostrar todas las columnas de un DataFrame al imprimirlo, sin truncar la visualización.
pd.set_option('display.max_rows', None)  # Configura pandas para mostrar todas las filas de un DataFrame al imprimirlo, sin truncar la visualización.

import mysql.connector

In [2]:
with open('partidos/20241001_Borussia Dortmund_Celtic_1866131.json') as file:
    data = json.load(file)


In [3]:
players_df = pd.DataFrame(data['matchCentreData']['playerIdNameDictionary'].items(), columns=['playerId', 'playerName'])
players_df.head()


Unnamed: 0,playerId,playerName
0,288892,Ramy Bensebaini
1,92051,Marcel Sabitzer
2,438951,Julien Duranville
3,421975,Yan Couto
4,71824,Pascal Groß


In [4]:
# Extrae los datos del árbitro
referee_data = data['matchCentreData']['referee']

# Convierte los datos a un DataFrame
referee_df = pd.DataFrame([referee_data])
referee_df.head()

Unnamed: 0,officialId,firstName,lastName,hasParticipatedMatches,name
0,233,José María,Sánchez Martínez,False,José María Sánchez Martínez


In [5]:
match_centre_data = data['matchCentreData']
# Filtrar solo los elementos que no son diccionarios
filtered_data = {k: v for k, v in match_centre_data.items() if not isinstance(v, dict)}

# Convertir a DataFrame
match_centre_df = pd.DataFrame([filtered_data])
match_centre_df.head()


Unnamed: 0,timeStamp,attendance,venueName,weatherCode,elapsed,startTime,startDate,score,htScore,ftScore,...,pkScore,statusCode,periodCode,maxMinute,minuteExpanded,maxPeriod,expandedMaxMinute,commonEvents,events,timeoutInSeconds
0,2024-10-02 09:31:58,81365,SIGNAL IDUNA PARK,,F,2024-10-01T21:00:00,2024-10-01T00:00:00,7 : 1,5 : 1,7 : 1,...,,6,7,93,95,2,95,[],"[{'id': 2729186281.0, 'eventId': 2, 'minute': ...",0


In [6]:
home_data = data['matchCentreData']['home']
away_data = data['matchCentreData']['away']
# Añadir una columna para identificar si es el equipo local o visitante
home_data['teamType'] = 'home'
away_data['teamType'] = 'away'
# Crear DataFrames individuales y luego combinarlos
home_df = pd.DataFrame([home_data])
away_df = pd.DataFrame([away_data])

# Concatenar los DataFrames
teams_df = pd.concat([home_df, away_df], ignore_index=True)
teams_df.head()


Unnamed: 0,teamId,formations,stats,incidentEvents,shotZones,name,countryName,players,managerName,scores,field,averageAge,teamType
0,44,"[{'formationId': 8, 'formationName': '4231', '...","{'minutesWithStats': [0, 2, 3, 4, 5, 6, 7, 8, ...","[{'id': 2729198333.0, 'eventId': 61, 'minute':...","{'missHighLeft': {'stats': {}}, 'missHighCentr...",Borussia Dortmund,Alemania,"[{'playerId': 336028, 'shirtNo': 1, 'name': 'G...",Nuri Sahin,"{'halftime': 5, 'fulltime': 7, 'running': 7}",home,25.8,home
1,103,"[{'formationId': 4, 'formationName': '433', 'c...","{'minutesWithStats': [0, 2, 3, 4, 5, 6, 7, 8, ...","[{'id': 2729195601.0, 'eventId': 40, 'minute':...","{'missHighLeft': {'stats': {}}, 'missHighCentr...",Celtic,Escocia,"[{'playerId': 19545, 'shirtNo': 1, 'name': 'Ka...",Brendan Rodgers,"{'halftime': 1, 'fulltime': 1, 'running': 1}",away,25.8,away


In [None]:
match_id = data['matchId']
home_team_id = data['matchCentreData']['home']['teamId']
home_stats = data['matchCentreData']['home']['stats']

# Crear una lista para almacenar las filas
stats_rows = []

# Recorrer cada estadística en home_stats
for stat_name, stat_data in home_stats.items():
    # Verificar si el valor es un diccionario (con minutos) o una lista
    if isinstance(stat_data, dict):
        # Si es un diccionario, extraer minuto y valor
        for minute, value in stat_data.items():
            stats_rows.append({
                'matchId': match_id,
                'teamId': home_team_id,
                'statsName': stat_name,
                'statsValue': value,
                'minutes': minute
            })
    elif isinstance(stat_data, list):
        # Si es una lista, asignar un valor 'N/A' o similar para 'minutes' si no tiene minutos asociados
        for value in stat_data:
            stats_rows.append({
                'matchId': match_id,
                'teamId': home_team_id,
                'statsName': stat_name,
                'statsValue': value,
                'minutes': 'N/A'
            })

# Convertir a DataFrame
home_stats_df = pd.DataFrame(stats_rows)

home_stats_df.head(25)


Unnamed: 0,matchId,teamId,statsName,statsValue,minutes
0,1866131,44,minutesWithStats,0.0,
1,1866131,44,minutesWithStats,2.0,
2,1866131,44,minutesWithStats,3.0,
3,1866131,44,minutesWithStats,4.0,
4,1866131,44,minutesWithStats,5.0,


In [None]:
home_stats_df['statsName'].value_counts()

In [None]:
match_id = data['matchId']
away_team_id = data['matchCentreData']['away']['teamId']
away_stats = data['matchCentreData']['away']['stats']

# Crear una lista para almacenar las filas
stats_rows = []

# Recorrer cada estadística en away_stats
for stat_name, stat_data in away_stats.items():
    # Verificar si el valor es un diccionario (con minutos) o una lista
    if isinstance(stat_data, dict):
        # Si es un diccionario, extraer minuto y valor
        for minute, value in stat_data.items():
            stats_rows.append({
                'matchId': match_id,
                'teamId': away_team_id,
                'statsName': stat_name,
                'statsValue': value,
                'minutes': minute
            })
    elif isinstance(stat_data, list):
        # Si es una lista, asignar un valor 'N/A' o similar para 'minutes' si no tiene minutos asociados
        for value in stat_data:
            stats_rows.append({
                'matchId': match_id,
                'teamId': away_team_id,
                'statsName': stat_name,
                'statsValue': value,
                'minutes': 'N/A'
            })

# Convertir a DataFrame
away_stats_df = pd.DataFrame(stats_rows)

away_stats_df.head(5)


In [25]:
home_incidents = data['matchCentreData']['home']['incidentEvents']
# Crear una lista para almacenar cada evento
incidents_rows = []

# Recorrer cada evento en incidentEvents
for event in home_incidents:
    incidents_rows.append({
        'id': str(event.get('id', None)),
        'matchId': match_id,
        'teamId': home_team_id,
        'eventId': event.get('eventId', None),
        'minute': event.get('minute', None),
        'second': event.get('second', None),
        'playerId': event.get('playerId', None),
        'x': event.get('x', None),
        'y': event.get('y', None),
        'expandedMinute': event.get('expandedMinute', None),
        'eventType': event.get('type', {}).get('displayName', None),
        'outcomeType': event.get('outcomeType', {}).get('displayName', None),
        'isGoal': event.get('isGoal', False),
        'isShot': event.get('isShot', False)
    })

qualifiers_rows = []

# Recorrer cada evento en incidentEvents
for event in home_incidents:
    incident_id = str(event.get('id', None))  # ID del incidente
    qualifiers = event.get('qualifiers', [])

    # Recorrer cada qualifier dentro de un incidente
    for qualifier in qualifiers:
        qualifiers_rows.append({
            'id': incident_id,
            'qualifierType': qualifier.get('type', {}).get('displayName', None),
            'qualifierTypeValue': qualifier.get('type', {}).get('value', None),
            'qualifierValue': qualifier.get('value', None)
        })
# Convertir a DataFrame
home_incidents_df = pd.DataFrame(incidents_rows)
home_qualifiers_df = pd.DataFrame(qualifiers_rows)


In [None]:
home_incidents_df.head()


Unnamed: 0,id,matchId,teamId,eventId,minute,second,playerId,x,y,expandedMinute,eventType,outcomeType,isGoal,isShot
0,2729198333.0,1866131,44,61,6,46,111212.0,88.5,50.0,6,Goal,Successful,True,True
1,2729207951.0,1866131,44,97,8,38,111212.0,7.6,50.1,8,Error,Successful,False,False
2,2729203995.0,1866131,44,78,10,9,141556.0,55.2,56.3,10,Pass,Successful,False,False
3,2729204001.0,1866131,44,79,10,13,392646.0,87.9,63.2,10,Goal,Successful,True,True
4,2729234717.0,1866131,44,256,28,27,392646.0,87.2,77.7,28,Goal,Successful,True,True


In [27]:
home_qualifiers_df.head()

Unnamed: 0,id,qualifierType,qualifierTypeValue,qualifierValue
0,2729198333.0,BigChance,214,
1,2729198333.0,GoalMouthZ,103,26.6
2,2729198333.0,HighCentre,79,
3,2729198333.0,Zone,56,Center
4,2729198333.0,GoalMouthY,102,49.9


In [28]:
away_incidents = data['matchCentreData']['away']['incidentEvents']
# Crear una lista para almacenar cada evento
incidents_rows = []

# Recorrer cada evento en incidentEvents
for event in away_incidents:
    incidents_rows.append({
        'id': str(event.get('id', None)),
        'matchId': match_id,
        'teamId': away_team_id,
        'eventId': event.get('eventId', None),
        'minute': event.get('minute', None),
        'second': event.get('second', None),
        'playerId': event.get('playerId', None),
        'x': event.get('x', None),
        'y': event.get('y', None),
        'expandedMinute': event.get('expandedMinute', None),
        'eventType': event.get('type', {}).get('displayName', None),
        'outcomeType': event.get('outcomeType', {}).get('displayName', None),
        'isGoal': event.get('isGoal', False),
        'isShot': event.get('isShot', False)
    })

qualifiers_rows = []

# Recorrer cada evento en incidentEvents
for event in away_incidents:
    incident_id = str(event.get('id', None))  # ID del incidente
    qualifiers = event.get('qualifiers', [])

    # Recorrer cada qualifier dentro de un incidente
    for qualifier in qualifiers:
        qualifiers_rows.append({
            'id': incident_id,
            'qualifierType': qualifier.get('type', {}).get('displayName', None),
            'qualifierTypeValue': qualifier.get('type', {}).get('value', None),
            'qualifierValue': qualifier.get('value', None)
        })
# Convertir a DataFrame
away_incidents_df = pd.DataFrame(incidents_rows)
away_qualifiers_df = pd.DataFrame(qualifiers_rows)


In [30]:
away_incidents_df.head()

Unnamed: 0,id,matchId,teamId,eventId,minute,second,playerId,x,y,expandedMinute,eventType,outcomeType,isGoal,isShot
0,2729195601.0,1866131,103,40,5,14,19545.0,0.0,0.0,5,Card,Successful,False,False
1,2729201511.0,1866131,103,59,8,36,410506.0,85.2,10.3,8,Pass,Unsuccessful,False,False
2,2729201515.0,1866131,103,60,8,39,377287.0,96.4,51.9,8,Goal,Successful,True,True
3,2729223017.0,1866131,103,179,21,25,397859.0,27.8,50.0,21,Tackle,Unsuccessful,False,False
4,2729248587.0,1866131,103,304,41,10,377287.0,21.2,81.5,41,Error,Successful,False,False


In [31]:
away_qualifiers_df.head()

Unnamed: 0,id,qualifierType,qualifierTypeValue,qualifierValue
0,2729195601.0,RelatedEventId,55,39
1,2729195601.0,Foul,13,243
2,2729195601.0,Yellow,31,
3,2729201511.0,Zone,56,Center
4,2729201511.0,PassEndY,141,51.8


In [32]:
home_players = data['matchCentreData']['home']['players']
# Lista para almacenar los datos de cada jugador
players_rows = []

# Recorrer cada jugador en el equipo home
for player in home_players:
    players_rows.append({
        'matchId': match_id,
        'teamId': home_team_id,
        'playerId': player.get('playerId', None),
        'shirtNo': player.get('shirtNo', None),
        'name': player.get('name', None),
        'position': player.get('position', None),
        'height': player.get('height', None),
        'weight': player.get('weight', None),
        'age': player.get('age', None),
        'isFirstEleven': player.get('isFirstEleven', None),
        'isManOfTheMatch': player.get('isManOfTheMatch', None),
        'field': player.get('field', None)
    })
home_players_df = pd.DataFrame(players_rows)


In [34]:
home_players_df


Unnamed: 0,matchId,teamId,playerId,shirtNo,name,position,height,weight,age,isFirstEleven,isManOfTheMatch,field
0,1866131,44,336028,1,Gregor Kobel,GK,195,88,26,True,False,home
1,1866131,44,421975,2,Yan Couto,DR,168,60,22,True,False,home
2,1866131,44,371275,4,Nico Schlotterbeck,DC,191,86,24,True,False,home
3,1866131,44,296768,3,Waldemar Anton,DC,189,86,28,True,False,home
4,1866131,44,320436,26,Julian Ryerson,DL,183,86,26,True,False,home
5,1866131,44,71824,13,Pascal Groß,DMC,181,78,33,True,False,home
6,1866131,44,111212,23,Emre Can,DMC,186,86,30,True,False,home
7,1866131,44,392646,27,Karim Adeyemi,AMR,180,75,22,True,True,home
8,1866131,44,141556,10,Julian Brandt,AMC,185,83,28,True,False,home
9,1866131,44,436425,43,Jamie Gittens,AML,175,70,20,True,False,home


In [35]:
away_players = data['matchCentreData']['away']['players']
# Lista para almacenar los datos de cada jugador
players_rows = []

# Recorrer cada jugador en el equipo away
for player in away_players:
    players_rows.append({
        'matchId': match_id,
        'teamId': away_team_id,
        'playerId': player.get('playerId', None),
        'shirtNo': player.get('shirtNo', None),
        'name': player.get('name', None),
        'position': player.get('position', None),
        'height': player.get('height', None),
        'weight': player.get('weight', None),
        'age': player.get('age', None),
        'isFirstEleven': player.get('isFirstEleven', None),
        'isManOfTheMatch': player.get('isManOfTheMatch', None),
        'field': player.get('field', None)
    })
away_players_df = pd.DataFrame(players_rows)


In [36]:
away_players_df.head()

Unnamed: 0,matchId,teamId,playerId,shirtNo,name,position,height,weight,age,isFirstEleven,isManOfTheMatch,field
0,1866131,103,19545,1,Kasper Schmeichel,GK,189,88,37,True,False,away
1,1866131,103,397859,2,Alistair Johnston,DR,180,77,25,True,False,away
2,1866131,103,384863,5,Liam Scales,DC,187,0,26,True,False,away
3,1866131,103,298687,6,Auston Trusty,DC,191,84,26,True,False,away
4,1866131,103,376888,3,Greg Taylor,DL,170,62,26,True,False,away


In [54]:
home_formations = data['matchCentreData']['home']['formations']
home_formation_headers = []

for formation in home_formations:
    home_formation_headers.append({
        'matchId': match_id,
        'teamId': home_team_id,
        'formationId': formation.get('formationId', None),
        'formationName': formation.get('formationName', None),
        'captainPlayerId': formation.get('captainPlayerId', None),
        'period': formation.get('period', None),
        'startMinuteExpanded': formation.get('startMinuteExpanded', None),
        'endMinuteExpanded': formation.get('endMinuteExpanded', None)
    })

# Convertir a DataFrame
home_formations_header_df = pd.DataFrame(home_formation_headers)

home_player_details = []

for formation in home_formations:
    # Obtener los valores de cada lista en la formación
    formation_name = formation.get('formationName', None)
    jersey_numbers = formation.get('jerseyNumbers', [])
    formation_slots = formation.get('formationSlots', [])
    player_ids = formation.get('playerIds', [])
    formation_positions = formation.get('formationPositions', [])

    # Iterar sobre las listas para crear una fila para cada jugador
    for i in range(len(player_ids)):
        home_player_details.append({
            'matchId': match_id,
            'teamId': home_team_id,
            'formationName': formation_name,
            'jerseyNumbers': jersey_numbers[i] if i < len(jersey_numbers) else None,
            'formationSlots': formation_slots[i] if i < len(formation_slots) else None,
            'playerIds': player_ids[i] if i < len(player_ids) else None,
            'formationPosition_x': formation_positions[i].get('horizontal', None) if i < len(formation_positions) else None,
            'formationPosition_y': formation_positions[i].get('vertical', None) if i < len(formation_positions) else None
        })
home_players_details_df = pd.DataFrame(home_player_details)

home_players = data['matchCentreData']['home']['players']
# Lista para almacenar las estadísticas de cada jugador
home_player_stats_rows = []

# Iterar sobre cada jugador
for player in home_players:
    player_id = player.get('playerId', None)
    home_player_stats = player.get('stats', {})
    
    # Recorrer cada tipo de estadística en player_stats
    for stat_name, stat_data in home_player_stats.items():
        # Verificar si los datos de estadísticas son un diccionario (con minutos) o un solo valor
        if isinstance(stat_data, dict):
            # Si es un diccionario, recorrer por minuto y valor
            for minute, value in stat_data.items():
                home_player_stats_rows.append({
                    'matchId': match_id,
                    'teamId': home_team_id,
                    'playerId': player_id,
                    'statsName': stat_name,
                    'statsMinute': minute,
                    'statsValue': value
                })
        else:
            # Si es un valor único sin minutos, usar 'N/A' para statsMinute
            home_player_stats_rows.append({
                'matchId': match_id,
                'teamId': home_team_id,
                'playerId': player_id,
                'statsName': stat_name,
                'statsMinute': 'N/A',
                'statsValue': stat_data
            })
home_player_stats_df = pd.DataFrame(home_player_stats_rows)

# Asegurarse de que `statsValue` es numérico para poder sumar
home_player_stats_df['statsValue'] = pd.to_numeric(home_player_stats_df['statsValue'], errors='coerce').fillna(0)

# Agrupar por jugador y estadística y sumar los valores
home_summary_df = home_player_stats_df.groupby(['matchId', 'teamId', 'playerId', 'statsName'])['statsValue'].sum().reset_index()

# Pivotear el DataFrame para que cada estadística esté en una columna
home_player_summary_df = home_summary_df.pivot_table(
    index=['matchId', 'teamId', 'playerId'],
    columns='statsName',
    values='statsValue',
    fill_value=0
).reset_index()



In [None]:
home_formations_header_df.head()


Unnamed: 0,matchId,teamId,formationId,formationName,captainPlayerId,period,startMinuteExpanded,endMinuteExpanded
0,1866131,44,8,4231,111212,16,0,49
1,1866131,44,8,4231,111212,16,49,62
2,1866131,44,8,4231,111212,16,62,63
3,1866131,44,8,4231,111212,2,63,71
4,1866131,44,8,4231,111212,2,71,93


In [None]:
home_players_details_df.head()


Unnamed: 0,matchId,teamId,formationName,jerseyNumbers,formationSlots,playerIds,formationPosition_x,formationPosition_y
0,1866131,44,4231,1,1,336028,5.0,0.0
1,1866131,44,4231,2,2,421975,1.0,2.5
2,1866131,44,4231,26,3,320436,9.0,2.5
3,1866131,44,4231,13,4,71824,6.5,5.0
4,1866131,44,4231,3,5,296768,3.5,2.5


In [57]:
home_player_stats_df.head()


Unnamed: 0,matchId,teamId,playerId,statsName,statsMinute,statsValue
0,1866131,44,336028,totalSaves,79,2.0
1,1866131,44,336028,collected,79,1.0
2,1866131,44,336028,parriedDanger,79,1.0
3,1866131,44,336028,possession,3,1.0
4,1866131,44,336028,possession,4,1.0


In [49]:
away_formations = data['matchCentreData']['home']['formations']
away_formation_headers = []

for formation in away_formations:
    away_formation_headers.append({
        'matchId': match_id,
        'teamId': away_team_id,
        'formationId': formation.get('formationId', None),
        'formationName': formation.get('formationName', None),
        'captainPlayerId': formation.get('captainPlayerId', None),
        'period': formation.get('period', None),
        'startMinuteExpanded': formation.get('startMinuteExpanded', None),
        'endMinuteExpanded': formation.get('endMinuteExpanded', None)
    })

# Convertir a DataFrame
away_formations_header_df = pd.DataFrame(away_formation_headers)

away_player_details = []

for formation in away_formations:
    # Obtener los valores de cada lista en la formación
    formation_name = formation.get('formationName', None)
    jersey_numbers = formation.get('jerseyNumbers', [])
    formation_slots = formation.get('formationSlots', [])
    player_ids = formation.get('playerIds', [])
    formation_positions = formation.get('formationPositions', [])

    # Iterar sobre las listas para crear una fila para cada jugador
    for i in range(len(player_ids)):
        away_player_details.append({
            'matchId': match_id,
            'teamId': away_team_id,
            'formationName': formation_name,
            'jerseyNumbers': jersey_numbers[i] if i < len(jersey_numbers) else None,
            'formationSlots': formation_slots[i] if i < len(formation_slots) else None,
            'playerIds': player_ids[i] if i < len(player_ids) else None,
            'formationPosition_x': formation_positions[i].get('horizontal', None) if i < len(formation_positions) else None,
            'formationPosition_y': formation_positions[i].get('vertical', None) if i < len(formation_positions) else None
        })
away_players_details_df = pd.DataFrame(away_player_details)

away_players = data['matchCentreData']['home']['players']
# Lista para almacenar las estadísticas de cada jugador
away_player_stats_rows = []

# Iterar sobre cada jugador
for player in away_players:
    player_id = player.get('playerId', None)
    away_player_stats = player.get('stats', {})
    
    # Recorrer cada tipo de estadística en player_stats
    for stat_name, stat_data in away_player_stats.items():
        # Verificar si los datos de estadísticas son un diccionario (con minutos) o un solo valor
        if isinstance(stat_data, dict):
            # Si es un diccionario, recorrer por minuto y valor
            for minute, value in stat_data.items():
                away_player_stats_rows.append({
                    'matchId': match_id,
                    'teamId': away_team_id,
                    'playerId': player_id,
                    'statsName': stat_name,
                    'statsMinute': minute,
                    'statsValue': value
                })
        else:
            # Si es un valor único sin minutos, usar 'N/A' para statsMinute
            away_player_stats_rows.append({
                'matchId': match_id,
                'teamId': away_team_id,
                'playerId': player_id,
                'statsName': stat_name,
                'statsMinute': 'N/A',
                'statsValue': stat_data
            })
away_player_stats_df = pd.DataFrame(away_player_stats_rows)

# Asegurarse de que `statsValue` es numérico para poder sumar
away_player_stats_df['statsValue'] = pd.to_numeric(away_player_stats_df['statsValue'], errors='coerce').fillna(0)

# Agrupar por jugador y estadística y sumar los valores
away_summary_df = away_player_stats_df.groupby(['matchId', 'teamId', 'playerId', 'statsName'])['statsValue'].sum().reset_index()

# Pivotear el DataFrame para que cada estadística esté en una columna
away_player_summary_df = away_summary_df.pivot_table(
    index=['matchId', 'teamId', 'playerId'],
    columns='statsName',
    values='statsValue',
    fill_value=0
).reset_index()



In [None]:
away_formations_header_df.head()


Unnamed: 0,matchId,teamId,formationId,formationName,captainPlayerId,period,startMinuteExpanded,endMinuteExpanded
0,1866131,103,8,4231,111212,16,0,49
1,1866131,103,8,4231,111212,16,49,62
2,1866131,103,8,4231,111212,16,62,63
3,1866131,103,8,4231,111212,2,63,71
4,1866131,103,8,4231,111212,2,71,93


In [None]:
away_players_details_df.head()


Unnamed: 0,matchId,teamId,formationName,jerseyNumbers,formationSlots,playerIds,formationPosition_x,formationPosition_y
0,1866131,103,4231,1,1,336028,5.0,0.0
1,1866131,103,4231,2,2,421975,1.0,2.5
2,1866131,103,4231,26,3,320436,9.0,2.5
3,1866131,103,4231,13,4,71824,6.5,5.0
4,1866131,103,4231,3,5,296768,3.5,2.5


In [53]:
away_player_stats_df.head()

Unnamed: 0,matchId,teamId,playerId,statsName,statsMinute,statsValue
0,1866131,103,336028,totalSaves,79,2.0
1,1866131,103,336028,collected,79,1.0
2,1866131,103,336028,parriedDanger,79,1.0
3,1866131,103,336028,possession,3,1.0
4,1866131,103,336028,possession,4,1.0


In [60]:
events = data['matchCentreData']['events']

events_rows = []

for event in events:
    events_rows.append({
        'matchId': match_id,
        'teamId': event.get('teamId', None),
        'eventId': str(event.get('id', None)),
        'minute': event.get('minute', None),
        'second': event.get('second', None),
        'playerId': event.get('playerId', None),
        'x': event.get('x', None),
        'y': event.get('y', None),
        'expandedMinute': event.get('expandedMinute', None),
        'eventType': event.get('type', {}).get('displayName', None),
        'outcomeType': event.get('outcomeType', {}).get('displayName', None),
        'isGoal': event.get('isGoal', False),
        'isShot': event.get('isShot', False)
    })

# Convertir a DataFrame
events_df = pd.DataFrame(events_rows)


In [61]:
events_df.head()


Unnamed: 0,matchId,teamId,eventId,minute,second,playerId,x,y,expandedMinute,eventType,outcomeType,isGoal,isShot
0,1866131,44,2729186281.0,0,0.0,,0.0,0.0,0,Start,Successful,False,False
1,1866131,103,2729186171.0,0,0.0,,0.0,0.0,0,Start,Successful,False,False
2,1866131,103,2729186323.0,0,0.0,410506.0,50.1,49.3,0,Pass,Successful,False,False
3,1866131,103,2729186357.0,0,1.0,36471.0,44.2,50.9,0,Pass,Successful,False,False
4,1866131,103,2729186497.0,0,3.0,376888.0,42.9,78.1,0,Pass,Successful,False,False


In [64]:
qualifiers_rows = []

for event in events:
    event_id = str(event.get('id', None))  # ID del evento
    qualifiers = event.get('qualifiers', [])

    for qualifier in qualifiers:
        qualifiers_rows.append({
            'eventId': event_id,
            'qualifierType': qualifier.get('type', {}).get('displayName', None),
            'qualifierTypeValue': qualifier.get('type', {}).get('value', None),
            'qualifierValue': qualifier.get('value', None)
        })
qualifiers_df = pd.DataFrame(qualifiers_rows)


In [None]:
qualifiers_df.head(10)


Unnamed: 0,eventId,qualifierType,qualifierTypeValue,qualifierValue
0,2729186323.0,Length,212,8.8
1,2729186323.0,PassEndY,141,48.5
2,2729186323.0,Zone,56,Back
3,2729186323.0,Angle,213,3.20
4,2729186323.0,PassEndX,140,41.7
5,2729186323.0,StandingSave,178,
6,2729186357.0,StandingSave,178,
7,2729186357.0,Length,212,17.0
8,2729186357.0,Angle,213,1.78
9,2729186357.0,PassEndY,141,75.3


In [69]:
events_rows = []

for event in events:
    events_rows.append({
        'matchId': match_id,
        'teamId': event.get('teamId', None),
        'eventId': str(event.get('id', None)),
        'minute': event.get('minute', None),
        'second': event.get('second', None),
        'playerId': event.get('playerId', None),
        'x': event.get('x', None),
        'y': event.get('y', None),
        'expandedMinute': event.get('expandedMinute', None),
        'eventType': event.get('type', {}).get('displayName', None),
        'outcomeType': event.get('outcomeType', {}).get('displayName', None),
        'isGoal': event.get('isGoal', False),
        'isShot': event.get('isShot', False)
    })

# Convertir a DataFrame
events_df = pd.DataFrame(events_rows)

qualifiers_rows = []

for event in events:
    event_id = str(event.get('id', None))  # ID del evento
    qualifiers = event.get('qualifiers', [])

    for qualifier in qualifiers:
        qualifiers_rows.append({
            'eventId': event_id,
            'qualifierType': qualifier.get('type', {}).get('displayName', None),
            'qualifierValue': qualifier.get('value', None)
        })

qualifiers_df = pd.DataFrame(qualifiers_rows)

# Pivotear el DataFrame para que cada `qualifierType` esté en una columna
qualifiers_pivot_df = qualifiers_df.pivot_table(
    index='eventId',
    columns='qualifierType',
    values='qualifierValue',
    aggfunc='first'  # Usamos 'first' en caso de duplicados
).reset_index()

# Combinar ambos DataFrames en uno solo
events_with_qualifiers_df = pd.merge(events_df, qualifiers_pivot_df, on='eventId', how='left')


In [70]:
events_with_qualifiers_df.head()


Unnamed: 0,matchId,teamId,eventId,minute,second,playerId,x,y,expandedMinute,eventType,outcomeType,isGoal,isShot,Angle,BlockedX,BlockedY,CaptainPlayerId,FormationSlot,Foul,GoalMouthY,GoalMouthZ,InvolvedPlayers,JerseyNumber,LeadingToAttempt,LeadingToGoal,Length,OppositeRelatedEvent,PassEndX,PassEndY,PlayerCaughtOffside,PlayerPosition,RelatedEventId,ShotAssist,TeamFormation,TeamPlayerFormation,Zone
0,1866131,44,2729186281.0,0,0.0,,0.0,0.0,0,Start,Successful,False,False,,,,,,,,,,,,,,,,,,,,,,,
1,1866131,103,2729186171.0,0,0.0,,0.0,0.0,0,Start,Successful,False,False,,,,,,,,,,,,,,,,,,,,,,,
2,1866131,103,2729186323.0,0,0.0,410506.0,50.1,49.3,0,Pass,Successful,False,False,3.2,,,,,,,,,,,,8.8,,41.7,48.5,,,,,,,Back
3,1866131,103,2729186357.0,0,1.0,36471.0,44.2,50.9,0,Pass,Successful,False,False,1.78,,,,,,,,,,,,17.0,,40.8,75.3,,,,,,,Back
4,1866131,103,2729186497.0,0,3.0,376888.0,42.9,78.1,0,Pass,Successful,False,False,0.6,,,,,,,,,,,,13.8,,53.7,89.6,,,,,,,Left


In [None]:
players_df.head() # los jugadores que participaron en el partido
referee_df.head()
match_centre_df.head() # datos base del match
teams_df.head() #por team la formacion(dicc) las stats(dicc) los players(dicc) y datos base
home_stats_df.head(25) # stats por minuto del local
away_stats_df.head(5) # stats por minuto del visitante
home_incidents_df.head() # cambios sustituciones goles tarjetas del local
home_qualifiers_df.head() # caracteristicas de los eventos del local
away_incidents_df.head() # cambios sustituciones goles tarjetas del visitante
away_qualifiers_df.head() # caracteristicas de los eventos del visitante
home_players_df.head() # jugadores presentados en el partido del local
away_players_df.head() # jugadores presentados en el partido del visitante
home_formations_header_df.head() # las formaciones utilizadas del local
home_players_details_df.head() # los jugadores por posicion en cada formacion del local
home_player_stats_df.head() # stats por jugador de los locales por minuto
away_formations_header_df.head() # las formaciones utilizadas del visitante
away_players_details_df.head() # los jugadores por posicion en cada formacion del visitante
away_player_stats_df.head() # stats por jugador de los visitantes por minuto
qualifiers_df.head(10) # las caracteristicas de los eventos del partido
events_df.head() # cada evento del partido
events_with_qualifiers_df.head() # cada evento del partido con sus caracteristicas

# CONTINUAR CON LOS INSERT EN LA BASE DE DATOS
