In [130]:
from google.colab import drive

# Monte o Google Drive
drive.mount('/content/drive')

Mounted at /content/drive


# Reading F24 files

In [138]:
import pandas as pd
import xml.etree.ElementTree as ET
import os

def dict_to_dataframe(input_dict):
    df = pd.DataFrame.from_dict([input_dict])
    return df

path = '/content/drive/MyDrive/F24_LaLiga1920'

df = pd.DataFrame()

for xml_file in os.listdir(path):
    if xml_file.endswith('.xml'):
        xml_file_path = os.path.join(path, xml_file)

        tree = ET.parse(xml_file_path)
        root = tree.getroot()

        data = []

        for event_elem in root.findall('.//Event'):
            event_data = {}

            event_data['id'] = event_elem.get('id')
            event_data['event_id'] = event_elem.get('event_id')
            event_data['type_id'] = event_elem.get('type_id')
            event_data['period_id'] = event_elem.get('period_id')
            event_data['min'] = event_elem.get('min')
            event_data['sec'] = event_elem.get('sec')
            event_data['team_id'] = event_elem.get('team_id')
            event_data['outcome'] = event_elem.get('outcome')
            event_data['x'] = event_elem.get('x')
            event_data['y'] = event_elem.get('y')
            event_data['timestamp'] = event_elem.get('timestamp')
            event_data['last_modified'] = event_elem.get('last_modified')
            event_data['version'] = event_elem.get('version')

            for q_elem in event_elem.findall('Q'):
                q_data = {}

                q_data['id'] = q_elem.get('id')
                q_data['qualifier_id'] = q_elem.get('qualifier_id')
                q_data['value'] = q_elem.get('value')

                event_data.setdefault('q', []).append(q_data)

            data.append(event_data)

        for game_elem in root.findall('.//Game'):
            game_data = {}

            game_data['game_id'] = game_elem.get('id')
            game_data['away_score'] = game_elem.get('away_score')
            game_data['away_team_id'] = game_elem.get('away_team_id')
            game_data['away_team_name'] = game_elem.get('away_team_name')
            game_data['competition_id'] = game_elem.get('competition_id')
            game_data['competition_name'] = game_elem.get('competition_name')
            game_data['game_date'] = game_elem.get('game_date')
            game_data['home_score'] = game_elem.get('home_score')
            game_data['home_team_id'] = game_elem.get('home_team_id')
            game_data['home_team_name'] = game_elem.get('home_team_name')
            game_data['matchday'] = game_elem.get('matchday')
            game_data['season_id'] = game_elem.get('season_id')
            game_data['season_name'] = game_elem.get('season_name')

        game_details = dict_to_dataframe(game_data)

        event_data = pd.DataFrame(data)

        expanded_df = pd.concat([game_details]*len(event_data), ignore_index=True)
        result_df = pd.concat([event_data, expanded_df], axis=1)
        result_df = result_df[['id', 'event_id', 'type_id', 'period_id', 'min',
                            'team_id','outcome', 'x', 'y', 'version', 'game_id',
                            'away_score', 'away_team_id', 'away_team_name',
                            'competition_id', 'competition_name', 'home_score',
                            'home_team_id', 'home_team_name', 'matchday',
                            'season_name']]

    df = pd.concat([df, result_df], ignore_index=True)


In [None]:
df.head()

# Reading F30 files for Player data

In [133]:
import pandas as pd
import xml.etree.ElementTree as ET
import os

xml_directory = '/content/drive/MyDrive/F30_LaLiga1920'

final_df = pd.DataFrame()

for xml_file in os.listdir(xml_directory):
    if xml_file.endswith('.xml'):
        xml_file_path = os.path.join(xml_directory, xml_file)

        tree = ET.parse(xml_file_path)
        root = tree.getroot()

        data = []

        team_name = root.find('.//Team').get('name')

        for player_elem in root.findall('.//Player'):
            player_data = {}
            player_data['position'] = player_elem.get('position')
            player_data['player_id'] = player_elem.get('player_id')
            player_data['shirtNumber'] = player_elem.get('shirtNumber')
            player_data['last_name'] = player_elem.get('last_name')
            player_data['first_name'] = player_elem.get('first_name')

            for stat_elem in player_elem.findall('Stat'):
                stat_name = stat_elem.get('name')
                stat_value = stat_elem.text
                player_data[stat_name] = stat_value
                player_data['Team name'] = team_name

            data.append(player_data)

        df = pd.DataFrame(data)

        unique_value = df['Team name'].dropna().unique()[0]
        df['Team name'].fillna(unique_value, inplace=True)

        final_df = pd.concat([final_df, df], ignore_index=True)


In [134]:
final_df.head()

Unnamed: 0,position,player_id,shirtNumber,last_name,first_name,Unsuccessful Crosses open play,Team name,Time Played,Throw Ins to Own Player,Substitute On,...,Penalties Saved,Goalkeeper Smother,Drops,Catches,Saves from Penalty,Set Pieces Goals,Straight Red Cards,Clearances Off the Line,Times Tackled,Winning Goal
0,Defender,246477,3,Agbenyenu,Lumor,68.0,Mallorca,1807,179.0,3.0,...,,,,,,,,,,
1,Goalkeeper,40559,13,Agosto Ramírez,Fabricio,,Mallorca,90,,,...,,,,,,,,,,
2,Forward,91953,10,Alegría Moreno,Alexánder,1.0,Mallorca,103,,6.0,...,,,,,,,,,,
3,Defender,118335,18,Baba,Abdul Rahman,5.0,Mallorca,109,14.0,,...,,,,,,,,,,
4,Midfielder,463170,12,Baba,Iddrisu,2.0,Mallorca,2912,1.0,1.0,...,,,,,,,,,,


# Reading F30 for Squads data

In [136]:
import pandas as pd
import xml.etree.ElementTree as ET
import os

xml_directory = '/content/drive/MyDrive/F30_LaLiga1920'

final_df = pd.DataFrame()

for xml_file in os.listdir(xml_directory):
    if xml_file.endswith('.xml'):
        xml_file_path = os.path.join(xml_directory, xml_file)

        tree = ET.parse(xml_file_path)
        root = tree.getroot()

        team_name = root.find('.//Team').get('name')
        team_id = root.find('.//Team').get('id')

        data = {'name': team_name, 'id': team_id}
        stats_data = {}

        for stat_elem in root.findall('.//Stat'):
            name = stat_elem.get('name')
            value = stat_elem.text
            stats_data[name] = [value]

        data.update(stats_data)

        df = pd.DataFrame(data)

        final_df = pd.concat([final_df, df], ignore_index=True)


In [137]:
final_df.head()

Unnamed: 0,name,id,Total Shots,Possession Percentage,Total Fouls Won,Unsuccessful Dribbles,PutThrough/Blocked Distribution,Penalty Goals,Goals,Headed Goals,...,Own Goal Scored,Through balls,Assists (Intentional),Second Goal Assists,Saves made - parried,Punches,Goalkeeper Smother,Saves from Penalty,Times Tackled,Winning Goal
0,Mallorca,181,14,44,9,4,18,2,5,1,...,1.0,1,3,1.0,43,7,1,1,,
1,Sevilla,179,51,58,37,8,21,5,6,2,...,1.0,5,1,1.0,18,14,1,1,,
2,Getafe,1450,15,44,14,15,9,1,3,3,...,1.0,2,4,,22,9,6,1,,
3,Granada CF,5683,9,43,16,10,20,1,2,2,...,,1,1,1.0,3,13,5,1,,
4,Valencia CF,191,7,48,25,8,17,5,1,1,...,1.0,3,1,1.0,18,5,1,2,,
