In [1]:
import numpy as np
import pandas as pd
import json

In [2]:
data = pd.read_csv('laliga26.csv', usecols=['HomeTeam', 'AwayTeam', 'FTHG', 'FTAG', 'FTR', ])

In [3]:
data.describe()

Unnamed: 0,FTHG,FTAG
count,21.0,21.0
mean,1.380952,1.285714
std,1.116969,1.101946
min,0.0,0.0
25%,1.0,0.0
50%,1.0,1.0
75%,2.0,2.0
max,5.0,3.0


In [4]:
data.head()

Unnamed: 0,HomeTeam,AwayTeam,FTHG,FTAG,FTR
0,Girona,Vallecano,1,3,A
1,Villarreal,Oviedo,2,0,H
2,Mallorca,Barcelona,0,3,A
3,Alaves,Levante,2,1,H
4,Valencia,Sociedad,1,1,D


In [5]:
teams = pd.concat([data['HomeTeam'], data['AwayTeam']]).unique()
teams = pd.DataFrame(teams, columns=['Team'])
teams_dict = {team: i+1 for i, team in enumerate(teams['Team'])}
teams

Unnamed: 0,Team
0,Girona
1,Villarreal
2,Mallorca
3,Alaves
4,Valencia
5,Celta
6,Ath Bilbao
7,Espanol
8,Elche
9,Real Madrid


In [6]:
data['HomeTeam'] = data['HomeTeam'].map(teams_dict)
data['AwayTeam'] = data['AwayTeam'].map(teams_dict)

In [7]:
for index, row in data.iterrows():
    if row['FTR'] == 'H':
        data.at[index, 'FTR'] = row['HomeTeam']
    elif row['FTR'] == 'D':
        data.at[index, 'FTR'] = 0
    elif row['FTR'] == 'A':
        data.at[index, 'FTR'] = row['AwayTeam']

In [8]:
data.rename(columns={"FTHG": "HomeGoals", "FTAG": "AwayGoals", "FTR": "Winner"}, inplace=True)

In [9]:
team_stats_list = []

for team_name in teams['Team']:
    team_id = teams_dict[team_name]  # Get TeamID from the mapping
    gf_home, ga_home, gf_away, ga_away, wins_home, draws_home, losses_home, wins_away, draws_away, losses_away = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0

    gf, ga, wins, draws, losses = 0, 0, 0, 0, 0

    for _, match in data.iterrows():
    # Check home matches
        if match['HomeTeam'] == team_id:
            gf_home += match['HomeGoals']
            ga_home += match['AwayGoals']
            
            if match['Winner'] == team_id:
                wins_home += 1
            elif match['Winner'] == 0:
                draws_home += 1
            else:
                losses_home += 1
                
        # Check away matches
        elif match['AwayTeam'] == team_id:
            gf_away += match['AwayGoals']
            ga_away += match['HomeGoals']
            
            if match['Winner'] == team_id:
                wins_away += 1
            elif match['Winner'] == 0:
                draws_away += 1
            else:
                losses_away += 1

    
    # Append team stats as a dictionary
    team_stats_list.append({
        "Team": team_name,
        "TeamID": team_id,
        "GoalsForHome": gf_home,
        "GoalsAgainstHome": ga_home,
        "GoalsForAway": gf_away,
        "GoalsAgainstAway": ga_away,
        "WinsHome": wins_home,
        "DrawsHome": draws_home,
        "LossesHome": losses_home,
        "WinsAway": wins_away,
        "DrawsAway": draws_away,
        "LossesAway": losses_away,
        "GoalsFor": gf_home + gf_away,
        "GoalsAgainst": ga_home + ga_away,
        "GoalsDifference": gf_home + gf_away - ga_home - ga_away,
        "Wins": wins_home + wins_away,
        "Draws": draws_home + draws_away,
        "Losses": losses_home + losses_away
    })

In [10]:
team_stats = pd.DataFrame(team_stats_list)

In [18]:
standings = pd.read_csv('laliga26-table.csv')
standings

Unnamed: 0,Rk,Squad,Pts,Pts/MP,xG,xGA,xGD,xGD/90,Last 5
0,1,Real Madrid,9,3.0,6.1,1.6,4.6,1.52,W W W
1,2,Athletic Club,9,3.0,5.4,1.9,3.5,1.17,W W W
2,3,Villarreal,7,2.33,5.8,1.7,4.0,1.34,W W D
3,4,Barcelona,7,2.33,5.8,3.9,1.9,0.64,W W D
4,5,Espanyol,7,2.33,4.1,3.5,0.6,0.19,W D W
5,6,Getafe,6,2.0,2.9,2.3,0.5,0.18,W W L
6,7,Elche,5,1.67,2.3,4.3,-2.0,-0.66,D D W
7,8,Betis,5,1.25,4.7,3.7,1.0,0.25,D W D L
8,9,Valencia,4,1.33,3.5,2.4,1.1,0.38,D L W
9,10,Rayo Vallecano,4,1.33,5.5,3.9,1.7,0.55,W L D


In [19]:
standings = standings.drop(columns=['Last 5'])

In [20]:
teams_dict

{'Girona': 1,
 'Villarreal': 2,
 'Mallorca': 3,
 'Alaves': 4,
 'Valencia': 5,
 'Celta': 6,
 'Ath Bilbao': 7,
 'Espanol': 8,
 'Elche': 9,
 'Real Madrid': 10,
 'Betis': 11,
 'Ath Madrid': 12,
 'Levante': 13,
 'Osasuna': 14,
 'Sociedad': 15,
 'Oviedo': 16,
 'Sevilla': 17,
 'Vallecano': 18,
 'Barcelona': 19,
 'Getafe': 20}

In [21]:
squad_dict = {
    'Real Madrid': 'Real Madrid',
    'Barcelona': 'Barcelona',
    'Atlético Madrid': 'Ath Madrid',
    'Sevilla': 'Sevilla',
    'Real Sociedad': 'Sociedad',
    'Real Betis': 'Betis',
    'Villarreal': 'Villarreal',
    'Athletic Club': 'Ath Bilbao',
    'Valencia': 'Valencia',
    'Celta Vigo': 'Celta',
    'Getafe': 'Getafe',
    'Osasuna': 'Osasuna',
    'Granada': 'Granada',
    'Levante': 'Levante',
    'Alavés': 'Alaves',
    'Elche': 'Elche',
    'Rayo Vallecano': 'Vallecano',
    'Espanyol': 'Espanol',
    'Mallorca': 'Mallorca',
    'Oviedo': 'Oviedo',
}

for index, row in standings.iterrows():
    if row['Squad'] in squad_dict:
        standings.at[index, 'Squad'] = squad_dict[row['Squad']]

In [22]:
standings

Unnamed: 0,Rk,Squad,Pts,Pts/MP,xG,xGA,xGD,xGD/90
0,1,Real Madrid,9,3.0,6.1,1.6,4.6,1.52
1,2,Ath Bilbao,9,3.0,5.4,1.9,3.5,1.17
2,3,Villarreal,7,2.33,5.8,1.7,4.0,1.34
3,4,Barcelona,7,2.33,5.8,3.9,1.9,0.64
4,5,Espanol,7,2.33,4.1,3.5,0.6,0.19
5,6,Getafe,6,2.0,2.9,2.3,0.5,0.18
6,7,Elche,5,1.67,2.3,4.3,-2.0,-0.66
7,8,Betis,5,1.25,4.7,3.7,1.0,0.25
8,9,Valencia,4,1.33,3.5,2.4,1.1,0.38
9,10,Vallecano,4,1.33,5.5,3.9,1.7,0.55


In [23]:
for idx, team in team_stats.iterrows():
    for i,row in standings.iterrows():
        if team['Team'] == row['Squad']:
            team_stats.at[idx, "Pts"] = row["Pts"]
            team_stats.at[idx, "Pts/MP"] = row["Pts/MP"]
            team_stats.at[idx, "xG"] = row["xG"]
            team_stats.at[idx, "xGA"] = row["xGA"]
            team_stats.at[idx, "xGD"] = row["xGD"]
            team_stats.at[idx, "xGD/90"] = row["xGD/90"]
            team_stats.at[idx, "Rank"] = row["Rk"]
            

team_stats["Pts"] = team_stats["Pts"].astype('Int64')
team_stats["Rank"] = team_stats["Rank"].astype('Int64')

In [24]:
with open('data/performances.json', 'r', encoding='utf-8') as f:
    sofascore_json = json.load(f)
teams_rates = {}

for team in sofascore_json["topTeams"]["avgRating"]:
    team_name = team['team']["name"]
    teams_rates[team_name] = round(team['statistics']['avgRating'],2)


teams_rates

{'Villarreal': 7.17,
 'Real Madrid': 7.16,
 'Barcelona': 6.96,
 'Elche': 6.89,
 'Osasuna': 6.87,
 'Sevilla': 6.86,
 'Espanyol': 6.85,
 'Real Betis': 6.83,
 'Celta Vigo': 6.81,
 'Deportivo Alavés': 6.79,
 'Valencia': 6.78,
 'Athletic Club': 6.78,
 'Getafe': 6.78,
 'Real Oviedo': 6.77,
 'Real Sociedad': 6.73,
 'Atlético Madrid': 6.73,
 'Rayo Vallecano': 6.72,
 'Levante UD': 6.58,
 'Mallorca': 6.56,
 'Girona FC': 6.47}

In [25]:
team_stats['Rating'] = team_stats['Team'].map(teams_rates)

In [26]:
team_stats

Unnamed: 0,Team,TeamID,GoalsForHome,GoalsAgainstHome,GoalsForAway,GoalsAgainstAway,WinsHome,DrawsHome,LossesHome,WinsAway,...,Draws,Losses,Pts,Pts/MP,xG,xGA,xGD,xGD/90,Rank,Rating
0,Girona,1,1,3,0,5,0,0,1,0,...,0,2,0,0.0,2.7,7.0,-4.3,-1.44,20,
1,Villarreal,2,7,0,0,0,2,0,0,0,...,0,0,7,2.33,5.8,1.7,4.0,1.34,3,7.17
2,Mallorca,3,1,4,0,0,0,1,1,0,...,1,1,1,0.33,1.6,4.6,-3.1,-1.02,18,6.56
3,Alaves,4,2,1,0,1,1,0,0,0,...,0,1,4,1.33,3.3,3.8,-0.5,-0.17,11,
4,Valencia,5,1,1,0,1,0,1,0,0,...,1,1,4,1.33,3.5,2.4,1.1,0.38,9,6.78
5,Celta,6,1,3,1,1,0,1,1,0,...,2,1,3,0.75,3.0,3.6,-0.6,-0.15,14,
6,Ath Bilbao,7,4,2,0,0,2,0,0,0,...,0,0,9,3.0,5.4,1.9,3.5,1.17,2,
7,Espanol,8,2,1,2,2,1,0,0,0,...,1,0,7,2.33,4.1,3.5,0.6,0.19,5,
8,Elche,9,1,1,1,1,0,1,0,0,...,2,0,5,1.67,2.3,4.3,-2.0,-0.66,7,6.89
9,Real Madrid,10,1,0,3,0,1,0,0,1,...,0,0,9,3.0,6.1,1.6,4.6,1.52,1,7.16


In [27]:
team_stats.columns

Index(['Team', 'TeamID', 'GoalsForHome', 'GoalsAgainstHome', 'GoalsForAway',
       'GoalsAgainstAway', 'WinsHome', 'DrawsHome', 'LossesHome', 'WinsAway',
       'DrawsAway', 'LossesAway', 'GoalsFor', 'GoalsAgainst',
       'GoalsDifference', 'Wins', 'Draws', 'Losses', 'Pts', 'Pts/MP', 'xG',
       'xGA', 'xGD', 'xGD/90', 'Rank', 'Rating'],
      dtype='object')

In [28]:
team_stats.to_csv("data/laliga25-26.csv", index=False)