In [30]:
import numpy as np
import pandas as pd
import json

In [31]:
data = pd.read_csv('laliga22.csv', usecols=['HomeTeam', 'AwayTeam', 'FTHG', 'FTAG', 'FTR', ])

In [32]:
data.describe()

Unnamed: 0,FTHG,FTAG
count,380.0,380.0
mean,1.421053,1.081579
std,1.250469,1.12124
min,0.0,0.0
25%,0.75,0.0
50%,1.0,1.0
75%,2.0,2.0
max,6.0,6.0


In [33]:
data.head()

Unnamed: 0,HomeTeam,AwayTeam,FTHG,FTAG,FTR
0,Valencia,Getafe,1,0,H
1,Cadiz,Levante,1,1,D
2,Mallorca,Betis,1,1,D
3,Alaves,Real Madrid,1,4,A
4,Osasuna,Espanol,0,0,D


In [34]:
teams = pd.concat([data['HomeTeam'], data['AwayTeam']]).unique()
teams = pd.DataFrame(teams, columns=['Team'])
teams_dict = {team: i+1 for i, team in enumerate(teams['Team'])}
teams

Unnamed: 0,Team
0,Valencia
1,Cadiz
2,Mallorca
3,Alaves
4,Osasuna
5,Celta
6,Barcelona
7,Sevilla
8,Villarreal
9,Elche


In [35]:
data['HomeTeam'] = data['HomeTeam'].map(teams_dict)
data['AwayTeam'] = data['AwayTeam'].map(teams_dict)

In [36]:
for index, row in data.iterrows():
    if row['FTR'] == 'H':
        data.at[index, 'FTR'] = row['HomeTeam']
    elif row['FTR'] == 'D':
        data.at[index, 'FTR'] = 0
    elif row['FTR'] == 'A':
        data.at[index, 'FTR'] = row['AwayTeam']

In [37]:
data.rename(columns={"FTHG": "HomeGoals", "FTAG": "AwayGoals", "FTR": "Winner"}, inplace=True)

In [38]:
team_stats_list = []

for team_name in teams['Team']:
    team_id = teams_dict[team_name]  # Get TeamID from the mapping
    gf_home, ga_home, gf_away, ga_away, wins_home, draws_home, losses_home, wins_away, draws_away, losses_away = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0

    gf, ga, wins, draws, losses = 0, 0, 0, 0, 0

    for _, match in data.iterrows():
    # Check home matches
        if match['HomeTeam'] == team_id:
            gf_home += match['HomeGoals']
            ga_home += match['AwayGoals']
            
            if match['Winner'] == team_id:
                wins_home += 1
            elif match['Winner'] == 0:
                draws_home += 1
            else:
                losses_home += 1
                
        # Check away matches
        elif match['AwayTeam'] == team_id:
            gf_away += match['AwayGoals']
            ga_away += match['HomeGoals']
            
            if match['Winner'] == team_id:
                wins_away += 1
            elif match['Winner'] == 0:
                draws_away += 1
            else:
                losses_away += 1

    
    # Append team stats as a dictionary
    team_stats_list.append({
        "Team": team_name,
        "TeamID": team_id,
        "GoalsForHome": gf_home,
        "GoalsAgainstHome": ga_home,
        "GoalsForAway": gf_away,
        "GoalsAgainstAway": ga_away,
        "WinsHome": wins_home,
        "DrawsHome": draws_home,
        "LossesHome": losses_home,
        "WinsAway": wins_away,
        "DrawsAway": draws_away,
        "LossesAway": losses_away,
        "GoalsFor": gf_home + gf_away,
        "GoalsAgainst": ga_home + ga_away,
        "GoalsDifference": gf_home + gf_away - ga_home - ga_away,
        "Wins": wins_home + wins_away,
        "Draws": draws_home + draws_away,
        "Losses": losses_home + losses_away
    })

In [39]:
team_stats = pd.DataFrame(team_stats_list)

In [40]:
standings = pd.read_csv('laliga22-table.csv')
standings

Unnamed: 0,Rk,Squad,Pts,Pts/MP,xG,xGA,xGD,xGD/90
0,1,Real Madrid,86,2.26,71.7,43.9,27.8,0.73
1,2,Barcelona,73,1.92,63.4,34.4,29.0,0.76
2,3,Atlético Madrid,71,1.87,54.4,33.4,21.1,0.56
3,4,Sevilla,70,1.84,42.4,36.9,5.4,0.14
4,5,Betis,65,1.71,53.5,43.4,10.1,0.27
5,6,Real Sociedad,62,1.63,51.5,38.7,12.8,0.34
6,7,Villarreal,59,1.55,64.2,44.8,19.4,0.51
7,8,Athletic Club,55,1.45,49.6,38.4,11.3,0.3
8,9,Valencia,48,1.26,39.0,47.7,-8.7,-0.23
9,10,Osasuna,47,1.24,36.7,46.3,-9.6,-0.25


In [41]:
teams_dict

{'Valencia': 1,
 'Cadiz': 2,
 'Mallorca': 3,
 'Alaves': 4,
 'Osasuna': 5,
 'Celta': 6,
 'Barcelona': 7,
 'Sevilla': 8,
 'Villarreal': 9,
 'Elche': 10,
 'Betis': 11,
 'Espanol': 12,
 'Granada': 13,
 'Ath Bilbao': 14,
 'Sociedad': 15,
 'Ath Madrid': 16,
 'Levante': 17,
 'Getafe': 18,
 'Vallecano': 19,
 'Real Madrid': 20}

In [42]:
squad_dict = {
    'Real Madrid': 'Real Madrid',
    'Barcelona': 'Barcelona',
    'Atlético Madrid': 'Ath Madrid',
    'Sevilla': 'Sevilla',
    'Real Sociedad': 'Sociedad',
    'Real Betis': 'Betis',
    'Villarreal': 'Villarreal',
    'Athletic Club': 'Ath Bilbao',
    'Valencia': 'Valencia',
    'Celta Vigo': 'Celta',
    'Getafe': 'Getafe',
    'Osasuna': 'Osasuna',
    'Granada': 'Granada',
    'Levante': 'Levante',
    'Alavés': 'Alaves',
    'Elche': 'Elche',
    'Rayo Vallecano': 'Vallecano',
    'Espanyol': 'Espanol',
    'Mallorca': 'Mallorca',
    'Cádiz': 'Cadiz',
}

for index, row in standings.iterrows():
    if row['Squad'] in squad_dict:
        standings.at[index, 'Squad'] = squad_dict[row['Squad']]

In [43]:
standings

Unnamed: 0,Rk,Squad,Pts,Pts/MP,xG,xGA,xGD,xGD/90
0,1,Real Madrid,86,2.26,71.7,43.9,27.8,0.73
1,2,Barcelona,73,1.92,63.4,34.4,29.0,0.76
2,3,Ath Madrid,71,1.87,54.4,33.4,21.1,0.56
3,4,Sevilla,70,1.84,42.4,36.9,5.4,0.14
4,5,Betis,65,1.71,53.5,43.4,10.1,0.27
5,6,Sociedad,62,1.63,51.5,38.7,12.8,0.34
6,7,Villarreal,59,1.55,64.2,44.8,19.4,0.51
7,8,Ath Bilbao,55,1.45,49.6,38.4,11.3,0.3
8,9,Valencia,48,1.26,39.0,47.7,-8.7,-0.23
9,10,Osasuna,47,1.24,36.7,46.3,-9.6,-0.25


In [44]:
for idx, team in team_stats.iterrows():
    for i,row in standings.iterrows():
        if team['Team'] == row['Squad']:
            team_stats.at[idx, "Pts"] = row["Pts"]
            team_stats.at[idx, "Pts/MP"] = row["Pts/MP"]
            team_stats.at[idx, "xG"] = row["xG"]
            team_stats.at[idx, "xGA"] = row["xGA"]
            team_stats.at[idx, "xGD"] = row["xGD"]
            team_stats.at[idx, "xGD/90"] = row["xGD/90"]
            team_stats.at[idx, "Rank"] = row["Rk"]
            

team_stats["Pts"] = team_stats["Pts"].astype('Int64')
team_stats["Rank"] = team_stats["Rank"].astype('Int64')

In [45]:
with open('data/performances2022.json', 'r', encoding='utf-8') as f:
    sofascore_json = json.load(f)
teams_rates = {}

for team in sofascore_json["topTeams"]["avgRating"]:
    team_name = team['team']["name"]
    teams_rates[team_name] = round(team['statistics']['avgRating'],2)


teams_rates

{'Real Madrid': 7.1,
 'Barcelona': 6.99,
 'Villarreal': 6.92,
 'Betis': 6.9,
 'Ath Madrid': 6.89,
 'Sevilla': 6.89,
 'Celta': 6.84,
 'Sociedad': 6.83,
 'Ath Bilbao': 6.82,
 'Vallecano': 6.78,
 'Osasuna': 6.78,
 'Getafe': 6.78,
 'Cadiz': 6.77,
 'Elche': 6.76,
 'Espanol': 6.76,
 'Valencia': 6.75,
 'Granada': 6.74,
 'Mallorca': 6.72,
 'Alaves': 6.7,
 'Levante': 6.69}

In [46]:
team_stats['Rating'] = team_stats['Team'].map(teams_rates)

In [47]:
team_stats

Unnamed: 0,Team,TeamID,GoalsForHome,GoalsAgainstHome,GoalsForAway,GoalsAgainstAway,WinsHome,DrawsHome,LossesHome,WinsAway,...,Draws,Losses,Pts,Pts/MP,xG,xGA,xGD,xGD/90,Rank,Rating
0,Valencia,1,26,24,22,29,6,8,5,5,...,15,12,48,1.26,39.0,47.7,-8.7,-0.23,9,6.75
1,Cadiz,2,19,24,16,27,3,9,7,5,...,15,15,39,1.03,39.5,48.3,-8.7,-0.23,17,6.77
2,Mallorca,3,20,25,16,38,7,6,6,3,...,9,19,39,1.03,38.8,49.3,-10.5,-0.28,16,6.72
3,Alaves,4,17,21,14,44,7,4,8,1,...,7,23,31,0.82,37.3,57.0,-19.7,-0.52,20,6.7
4,Osasuna,5,17,26,20,25,5,7,7,7,...,11,15,47,1.24,36.7,46.3,-9.6,-0.25,10,6.78
5,Celta,6,26,23,17,20,7,4,8,5,...,10,16,46,1.21,43.1,45.7,-2.6,-0.07,11,6.84
6,Barcelona,7,37,19,31,19,12,2,5,9,...,10,7,73,1.92,63.4,34.4,29.0,0.76,2,6.99
7,Sevilla,8,36,17,17,13,12,6,1,6,...,16,4,70,1.84,42.4,36.9,5.4,0.14,4,6.89
8,Villarreal,9,40,18,23,19,10,6,3,6,...,11,11,59,1.55,64.2,44.8,19.4,0.51,7,6.92
9,Elche,10,24,21,16,31,7,6,6,4,...,9,18,42,1.11,33.7,59.1,-25.4,-0.67,13,6.76


In [48]:
team_stats.columns

Index(['Team', 'TeamID', 'GoalsForHome', 'GoalsAgainstHome', 'GoalsForAway',
       'GoalsAgainstAway', 'WinsHome', 'DrawsHome', 'LossesHome', 'WinsAway',
       'DrawsAway', 'LossesAway', 'GoalsFor', 'GoalsAgainst',
       'GoalsDifference', 'Wins', 'Draws', 'Losses', 'Pts', 'Pts/MP', 'xG',
       'xGA', 'xGD', 'xGD/90', 'Rank', 'Rating'],
      dtype='object')

In [51]:
team_stats.to_csv("data/laliga21-22.csv", index=False)