In [158]:
import sqlite3
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [159]:
with sqlite3.connect("../laliga.sqlite") as conn:
    df = pd.read_sql("SELECT * FROM Matches", con = conn)

df = df.loc[-df.score.isnull()]

In [160]:
def parse_score(score_as_string):
        if score_as_string is None: 
            return None
        home_goals, away_goals = score_as_string.split(":")
        home_goals = int(home_goals)
        away_goals = int(away_goals)
        if home_goals > away_goals: 
            return "1"
        if away_goals > home_goals: 
            return "2" 
        else: 
            return "X"

In [161]:
df['result'] = df['score'].apply(parse_score)
df[['home_goals','away_goals']] = df['score'].str.split(":",expand=True)
df = df.astype({"home_goals": int, "away_goals": int})
df['Difference_goals'] = abs(df['home_goals'] - df['away_goals'])


In [162]:
index_match = []
for i in range(len(df)):
    index_match.append(i)

df["id_match"] = index_match

In [164]:
def identity_col(col):
    return col

def id_int(col):
    return col.astype(int)

In [165]:
def result(row): 
    if row["W"] == 1 : 
        return "W"
    if row["L"] == 1 :
        return "L"
    if row["T"] == 1: 
        return "T"
    else: 
        return "NaN" 


In [166]:
df4 = df[['season','division','home_team','result','matchday','home_goals','away_team','away_goals','id_match']]
MatchdayStanding = df4.groupby(['season','division','matchday','home_team']).agg(
    GF = ('home_goals', sum),
    Away_team = ('away_team',identity_col),
    index_match = ('id_match', identity_col),
    GA = ('away_goals', sum),
    W = ('result', lambda x: x.eq('1').sum()),
    L = ('result', lambda x: x.eq('2').sum()),
    T = ('result', lambda x: x.eq('X').sum()),
).reset_index()
MatchdayStanding.rename({'home_team':'team'}, axis=1, inplace=True)
MatchdayStanding.sort_values(['season','division','team','matchday'])

df4_1 = df[["season","division","away_team","result",'matchday',"home_goals",'home_team',"away_goals",'id_match']]
df4_1 = df4_1.groupby(["season",'division','matchday',"away_team"], as_index=False).agg(
    GF = ("away_goals", sum),
    GA = ("home_goals", sum),
    index_match = ('id_match', identity_col),
    Away_team = ('home_team',identity_col),
    W_away = ('result', lambda x: x.eq('2').sum()),
    L_away = ('result', lambda x: x.eq('1').sum()),
    T_away = ('result', lambda x: x.eq('X').sum())
)
df4_1.sort_values(['season','division','away_team','matchday'])
df4_1.rename({'away_team': 'team', 'W_away': 'W', 'L_away': 'L', 'T_away': 'T'}, axis=1, inplace=True) 

MatchdayStanding = MatchdayStanding.append(df4_1).sort_values(['season','division','matchday'])
MatchdayStanding.reset_index()
MatchdayStanding.sort_values(['season','division','matchday'])
MatchdayStanding.reset_index()
MatchdayStanding['result'] = MatchdayStanding.apply(lambda row: result(row), axis=1)


MatchdayStanding['GD'] = MatchdayStanding['GF'] - MatchdayStanding['GA']

# Cummulative results
MatchdayStanding['GF'] = MatchdayStanding.groupby(["season",'division','team'], as_index=False)['GF'].cumsum()
MatchdayStanding['GA'] = MatchdayStanding.groupby(["season",'division','team'], as_index=False)['GA'].cumsum()
MatchdayStanding['W'] = MatchdayStanding.groupby(["season",'division','team'], as_index=False)['W'].cumsum()
MatchdayStanding['L'] = MatchdayStanding.groupby(["season",'division','team'], as_index=False)['L'].cumsum()
MatchdayStanding['T'] = MatchdayStanding.groupby(["season",'division','team'], as_index=False)['T'].cumsum()


MatchdayStanding['GD_cum'] = MatchdayStanding['GF'] - MatchdayStanding['GA']
MatchdayStanding['Pts'] = 3*MatchdayStanding['W'] + MatchdayStanding['T']

# Raking by matchday
team_count = MatchdayStanding.groupby(['season','division','matchday'])['team'].count().tolist()
rank_column = []
for i in team_count :
    j = list(range(1,i+1,1))
    rank_column += j

MatchdayStanding = MatchdayStanding.sort_values(['season','division','matchday', 'Pts', 'GD', 'GF'], ascending=[True,True, True, False, False, False])
MatchdayStanding['rank'] = rank_column  

In [167]:
MatchdayStanding["GD_home"] = MatchdayStanding["GD_cum"]
MatchdayStanding["GD_away"] = - MatchdayStanding["GD_cum"]
MatchdayStanding = MatchdayStanding.drop(['GA','GF','GD'], axis=1)

In [168]:
MatchdayStanding['GD_home'] = MatchdayStanding.groupby(['season','division','team']).GD_home.shift()

In [169]:
MatchdayStanding['GD_away'] = MatchdayStanding.groupby(['season','division','team']).GD_away.shift()


Unnamed: 0,season,division,matchday,team,Away_team,index_match,W,L,T,result,GD_cum,Pts,rank,GD_home,GD_away
4,1928-1929,1,1,Real Madrid,Catalunya,2,1,0,0,W,5,3,1,,
2,1928-1929,1,1,Barcelona,Racing,4,1,0,0,W,2,3,2,,
2,1928-1929,1,1,Espanyol,Real Unión,1,1,0,0,W,1,3,3,,
1,1928-1929,1,1,Athletic Madrid,Arenas Club,0,1,0,0,W,1,3,4,,
1,1928-1929,1,1,Donostia,Athletic,3,0,0,1,T,0,1,5,,
0,1928-1929,1,1,Athletic,Donostia,3,0,0,1,T,0,1,6,,
0,1928-1929,1,1,Arenas Club,Athletic Madrid,0,0,1,0,L,-1,0,7,,
4,1928-1929,1,1,Real Unión,Espanyol,1,0,1,0,L,-1,0,8,,
3,1928-1929,1,1,Racing,Barcelona,4,0,1,0,L,-2,0,9,,
3,1928-1929,1,1,Catalunya,Real Madrid,2,0,1,0,L,-5,0,10,,


In [170]:
MatchdayStanding['rank'] = MatchdayStanding.groupby(['season','division','team'])['rank'].shift()


In [171]:
MatchdayStanding['W'] = MatchdayStanding.groupby(['season','division','team'])['W'].shift()
MatchdayStanding['T'] = MatchdayStanding.groupby(['season','division','team'])['T'].shift()
MatchdayStanding['L'] = MatchdayStanding.groupby(['season','division','team'])['L'].shift()

In [172]:
MatchdayStanding = MatchdayStanding.drop(['GD_cum','Pts'], axis=1)

In [173]:
MatchdayStanding

Unnamed: 0,season,division,matchday,team,Away_team,index_match,W,L,T,result,rank,GD_home,GD_away
4,1928-1929,1,1,Real Madrid,Catalunya,2,,,,W,,,
2,1928-1929,1,1,Barcelona,Racing,4,,,,W,,,
2,1928-1929,1,1,Espanyol,Real Unión,1,,,,W,,,
1,1928-1929,1,1,Athletic Madrid,Arenas Club,0,,,,W,,,
1,1928-1929,1,1,Donostia,Athletic,3,,,,T,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
47995,2021-2022,2,3,Real Oviedo,CD Tenerife,48234,0.0,1.0,1.0,T,17.0,-1.0,1.0
47992,2021-2022,2,3,CD Lugo,Real Valladolid,48237,0.0,0.0,2.0,L,11.0,0.0,0.0
47991,2021-2022,2,3,CD Leganés,UD Ibiza,48233,0.0,1.0,1.0,L,15.0,-1.0,1.0
47990,2021-2022,2,3,Burgos CF,SD Eibar,48239,0.0,1.0,1.0,L,16.0,-1.0,1.0


In [175]:
MatchdayStanding = MatchdayStanding.sort_values(['index_match'], ascending=[True])
MatchdayStanding_1 = MatchdayStanding.iloc[::2]
MatchdayStanding_2 = MatchdayStanding.iloc[1::2]


In [176]:
MatchdayStanding_1.head(10)

Unnamed: 0,season,division,matchday,team,Away_team,index_match,W,L,T,result,rank,GD_home,GD_away
1,1928-1929,1,1,Athletic Madrid,Arenas Club,0,,,,W,,,
2,1928-1929,1,1,Espanyol,Real Unión,1,,,,W,,,
4,1928-1929,1,1,Real Madrid,Catalunya,2,,,,W,,,
1,1928-1929,1,1,Donostia,Athletic,3,,,,T,,,
2,1928-1929,1,1,Barcelona,Racing,4,,,,W,,,
9,1928-1929,1,2,Real Madrid,Barcelona,5,1.0,0.0,0.0,W,1.0,5.0,-5.0
5,1928-1929,1,2,Athletic,Espanyol,6,0.0,0.0,1.0,W,6.0,0.0,0.0
6,1928-1929,1,2,Athletic Madrid,Donostia,7,1.0,0.0,0.0,L,4.0,1.0,-1.0
8,1928-1929,1,2,Racing,Real Unión,8,0.0,1.0,0.0,L,9.0,-2.0,2.0
8,1928-1929,1,2,Catalunya,Arenas Club,9,0.0,1.0,0.0,W,10.0,-5.0,5.0


In [177]:
MatchdayStanding_2.head(10)

Unnamed: 0,season,division,matchday,team,Away_team,index_match,W,L,T,result,rank,GD_home,GD_away
0,1928-1929,1,1,Arenas Club,Athletic Madrid,0,,,,L,,,
4,1928-1929,1,1,Real Unión,Espanyol,1,,,,L,,,
3,1928-1929,1,1,Catalunya,Real Madrid,2,,,,L,,,
0,1928-1929,1,1,Athletic,Donostia,3,,,,T,,,
3,1928-1929,1,1,Racing,Barcelona,4,,,,L,,,
7,1928-1929,1,2,Barcelona,Real Madrid,5,1.0,0.0,0.0,L,2.0,2.0,-2.0
7,1928-1929,1,2,Espanyol,Athletic,6,1.0,0.0,0.0,L,3.0,1.0,-1.0
6,1928-1929,1,2,Donostia,Athletic Madrid,7,0.0,0.0,1.0,W,5.0,0.0,0.0
9,1928-1929,1,2,Real Unión,Racing,8,0.0,1.0,0.0,W,8.0,-1.0,1.0
5,1928-1929,1,2,Arenas Club,Catalunya,9,0.0,1.0,0.0,L,7.0,-1.0,1.0
