# Season tables reconstruction

In [1]:
import pandas as pd

df = pd.read_parquet('data/serie_a_matches_processed.parquet')

In [2]:
df.tail()

Unnamed: 0,date,time,comp,round,day,venue,result,gf,ga,opponent,...,pkatt,season,team,opp captain,opp sh,opp sot,opp dist,opp fk,opp pk,opp pkatt
1946,2020-09-20,18:00,Serie A,1,Sun,Home,D,1,1,Cagliari,...,0,2020,Sassuolo,João Pedro,8,4,15.9,1,0,0
1947,2020-09-20,15:00,Serie A,1,Sun,Home,W,4,1,Crotone,...,0,2020,Genoa,Alex Cordaz,13,3,18.8,0,0,0
1948,2020-09-20,12:30,Serie A,1,Sun,Home,L,0,2,Napoli,...,0,2020,Parma,Lorenzo Insigne,17,6,19.4,0,0,0
1949,2020-09-19,20:45,Serie A,1,Sat,Home,W,3,0,Roma,...,0,2020,Hellas Verona,Lorenzo Pellegrini,21,4,,0,0,0
1950,2020-09-19,18:00,Serie A,1,Sat,Home,W,1,0,Torino,...,0,2020,Fiorentina,Andrea Belotti,6,3,18.7,1,0,0


In [3]:
# get unique seasons
seasons = df['season'].unique()
seasons

array([2025, 2024, 2023, 2022, 2021, 2020])

In [4]:
df[df['season'] == 2022]['round'].unique().size

39

In 2022 round with value '0' was a relegation play-off between Spezia and Verona

In [None]:
!pip3 install great_tables

## Calculate points

In [5]:
from great_tables import GT

season = 2020

df_2020 = df[df['season'] == season]

# create empty dataframe for points
points_df = pd.DataFrame(columns=['team', 'points', 'w', 'd', 'l'])
points_df['team'] = pd.unique(df_2020[['team']].values.ravel('K'))
points_df[['points', 'w', 'd', 'l']] = 0
# points_df

## calculate points for each match
def calculate_points(row) -> tuple[int, int]:
  if row['result'] == 'L': #home team lost - 0 pts
    points_df.loc[points_df['team'] == row['opponent'], 'points'] += 3
    points_df.loc[points_df['team'] == row['opponent'], 'w'] += 1
    points_df.loc[points_df['team'] == row['team'], 'l'] += 1
  elif row['result'] == 'W': #home team won - 3 pts
    points_df.loc[points_df['team'] == row['team'], 'points'] += 3
    points_df.loc[points_df['team'] == row['team'], 'w'] += 1
    points_df.loc[points_df['team'] == row['opponent'], 'l'] += 1
  elif row['result'] == 'D': #draw - 1 pt each
    points_df.loc[points_df['team'] == row['team'], 'points'] += 1
    points_df.loc[points_df['team'] == row['opponent'], 'points'] += 1
    points_df.loc[points_df['team'] == row['team'], 'd'] += 1
    points_df.loc[points_df['team'] == row['opponent'], 'd'] += 1

# apply function to each row
df_2020.apply(calculate_points, axis=1)

points_df.sort_values(by='points', ascending=False, inplace=True)
points_df

Unnamed: 0,team,points,w,d,l
6,Internazionale,91,28,7,3
12,Milan,79,24,7,7
0,Atalanta,78,23,9,6
18,Juventus,78,23,9,6
1,Napoli,77,24,5,9
10,Lazio,68,21,5,12
17,Roma,62,18,8,12
2,Sassuolo,62,17,11,10
7,Sampdoria,52,15,7,16
11,Hellas Verona,45,11,12,15


### Additional rules to determine order in table:
1. points scored
2. points scored in direct games
3. goals difference in direct games
4. goals difference in a season
5. bigger goals difference
6. more goals scored
7. more goals conceeded
8. higher place in fair play standings


## Calculate goals scored, conceeded and goals difference

In [6]:
goals_df = pd.DataFrame(columns=['team', 'goals_scored', 'goals_conceded', 'goals_difference', 'xg_for', 'xg_against'])
goals_df['team'] = pd.unique(df_2020[['team']].values.ravel('K'))
goals_df[['goals_scored', 'goals_conceded', 'goals_difference']] = 0
goals_df[['xg_for', 'xg_against']] = 0.0

def calulate_goals(row):
	if pd.isna(row['xg']):
		row['xg'] = 0.0
	if pd.isna(row['xga']):
		row['xga'] = 0.0
	goals_df.loc[goals_df['team'] == row['team'], 'goals_scored'] += row['gf']
	goals_df.loc[goals_df['team'] == row['team'], 'goals_conceded'] += row['ga']
	goals_df.loc[goals_df['team'] == row['team'], 'xg_for'] += row['xg']
	goals_df.loc[goals_df['team'] == row['team'], 'xg_against'] += row['xga']
	
	goals_df.loc[goals_df['team'] == row['opponent'], 'goals_scored'] += row['ga']
	goals_df.loc[goals_df['team'] == row['opponent'], 'goals_conceded'] += row['gf']
	goals_df.loc[goals_df['team'] == row['opponent'], 'xg_for'] += row['xga']
	goals_df.loc[goals_df['team'] == row['opponent'], 'xg_against'] += row['xg']

	goals_df.loc[goals_df['team'] == row['team'], 'goals_difference'] = goals_df['goals_scored'] - goals_df['goals_conceded']

# goals_df['goals_difference'] = goals_df['goals_scored'] - goals_df['goals_conceded']

df_2020.apply(calulate_goals, axis=1)

goals_df.sort_values(by='goals_difference', ascending=False, inplace=True)
goals_df


Unnamed: 0,team,goals_scored,goals_conceded,goals_difference,xg_for,xg_against
6,Internazionale,89,35,54,74.2,40.0
1,Napoli,86,41,43,67.8,42.6
18,Juventus,77,38,39,74.3,39.2
0,Atalanta,90,47,38,74.5,41.0
12,Milan,74,41,33,70.7,47.1
17,Roma,68,58,13,65.4,45.7
2,Sassuolo,64,56,8,58.3,55.6
10,Lazio,61,55,4,58.3,47.2
7,Sampdoria,52,54,1,45.5,56.2
11,Hellas Verona,46,48,-2,45.6,49.3


## Join points and goals dataframes

In [7]:
def join_points_and_goals(df_points: pd.DataFrame, df_goals: pd.DataFrame) -> pd.DataFrame:
  joined_df = df_points.merge(df_goals, on='team')
  joined_df.sort_values(by=['points', 'goals_difference'], ascending=False, inplace=True)
  joined_df.reset_index(drop=True, inplace=True)
  joined_df.index += 1
  return joined_df


join_points_and_goals(points_df, goals_df)

Unnamed: 0,team,points,w,d,l,goals_scored,goals_conceded,goals_difference,xg_for,xg_against
1,Internazionale,91,28,7,3,89,35,54,74.2,40.0
2,Milan,79,24,7,7,74,41,33,70.7,47.1
3,Juventus,78,23,9,6,77,38,39,74.3,39.2
4,Atalanta,78,23,9,6,90,47,38,74.5,41.0
5,Napoli,77,24,5,9,86,41,43,67.8,42.6
6,Lazio,68,21,5,12,61,55,4,58.3,47.2
7,Roma,62,18,8,12,68,58,13,65.4,45.7
8,Sassuolo,62,17,11,10,64,56,8,58.3,55.6
9,Sampdoria,52,15,7,16,52,54,1,45.5,56.2
10,Hellas Verona,45,11,12,15,46,48,-2,45.6,49.3
