Take input goals file and create a list of results.

In [1]:
import pandas as pd

In [2]:
goles = pd.read_csv(
    'raw/goles - liga de campeones - Sheet1.tsv',
    sep='\t',
    dtype=str
)

In [3]:
goles.head()

Unnamed: 0,season,round,tie,game,minute,away,extra,shootout,note
0,2017.0,first,city-monaco,1.0,26,,,,
1,,,,,32,a,,,
2,,,,,40,a,,,
3,,,,,58,,,,
4,,,,,61,a,,,


In [4]:
goles['season'].fillna(method='pad', inplace=True)
goles['round'].fillna(method='pad', inplace=True)
goles['tie'].fillna(method='pad', inplace=True)
goles['game'].fillna(method='pad', inplace=True)

In [5]:
goles['t1'] = goles.tie.apply(lambda x: x.split('-')[0])
goles['t2'] = goles.tie.apply(lambda x: x.split('-')[1])

In [6]:
goles.head()

Unnamed: 0,season,round,tie,game,minute,away,extra,shootout,note,t1,t2
0,2017,first,city-monaco,1,26,,,,,city,monaco
1,2017,first,city-monaco,1,32,a,,,,city,monaco
2,2017,first,city-monaco,1,40,a,,,,city,monaco
3,2017,first,city-monaco,1,58,,,,,city,monaco
4,2017,first,city-monaco,1,61,a,,,,city,monaco


In [7]:
with open('raw/teamcrosswalk.psv') as readfile:
    lines = [s.split('|') for s in readfile.read().split('\n')[1:]]
    lookup = {l[1]: l[0] for l in lines}

In [8]:
def sum_goals(part):
    '''
    Gets the goals and away goals for a part of a game.
    A "part" is regular or extra time.
    '''
    t1g =\
    len(part[(part.game=='1') & (part.away.isnull())]) +\
    len(part[(part.game=='2') & (part.away.notnull())])
    
    t2g =\
    len(part[(part.game=='1') & (part.away.notnull())]) +\
    len(part[(part.game=='2') & (part.away.isnull())])
    
    t1ag = len(part[(part.game=='2') & (part.away.notnull())])
    t2ag = len(part[(part.game=='1') & (part.away.notnull())])
    
    return t1g, t2g, t1ag, t2ag

In [9]:
def get_tie_result(goals):
    winner = None
    pk = agr = aet = False
    result = ""
    
    t1 = goals.t1.unique()[0]
    t2 = goals.t2.unique()[0] 
    
    reg = goals[
        (goals.minute != 'pk') &
        (goals.extra.isnull()) &
        (goals.minute.notnull())
    ]
    
    et = goals[
        (goals.minute != 'pk') &
        (goals.extra.notnull()) &
        (goals.minute.notnull())
    ]
    
    pkg = goals[
        goals.minute == 'pk'
    ]
    
    # goals and away goals in regulation
    t1g, t2g, t1ag, t2ag = sum_goals(reg)
    
    # goals and away goals in extra time
    t1etg, t2etg, t1aetg, t2aetg = sum_goals(et)
    
    # pk shootout result
    t1pkwin = len(pkg[pkg.away == 'a'])
    
    # if goal sums differ
    if (t1g+t1etg) != (t2g+t2etg):
        # outright win
        winner = t1 if t1g > t2g else t2
    else:
        # if away goal sums differ
        if (t1ag+t1aetg) != (t2ag+t2aetg):
            # away goals win
            winner = t1 if t1ag > t2ag else t2
            agr = True
        else:
            # pk shootout
            winner = t1 if t1pkwin else t2
            pk = True
    
    # if goals in regulation are tied 
    # and away goals in regulation are tied
    # this went to extra time
    if (t1g == t2g) & (t1ag == t2ag):
        aet = True
    
    # a string summarizing the result
    result = "{} ({}-{}{}{}{}".format(
        lookup[t1],
        t1g+t1etg,
        t2g+t2etg,
        " aet) " if aet else ") ",
        lookup[t2],
        ", {} won on penalties".format(lookup[winner]) if pk else (
            ", {} won on away goals".format(lookup[winner]) if agr else ""
        )
    )
    
    return pd.Series(
        [winner, pk, agr, aet, result],
        index=['winner', 'pk', 'agr', 'aet', 'result']
    )

In [10]:
results = goles.groupby(['season','round','tie']).apply(get_tie_result)

In [11]:
results

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,winner,pk,agr,aet,result
season,round,tie,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2008,first,arsenal-milan,arsenal,False,False,False,Arsenal (2-0) AC Milan
2008,first,celtic-barcelona,barcelona,False,False,False,Celtic (2-4) Barcelona
2008,first,fenerbahce-sevilla,fenerbahce,True,False,True,"Fenerbahce (5-5 aet) Sevilla, Fenerbahce won o..."
2008,first,liverpool-inter,liverpool,False,False,False,Liverpool (3-0) Inter
2008,first,lyon-united,united,False,False,False,Lyon (1-2) Manchester Utd
2008,first,olympiacos-chelsea,chelsea,False,False,False,Olympiakos Piraeus (0-3) Chelsea
2008,first,roma-madrid,roma,False,False,False,AS Roma (4-2) Real Madrid
2008,first,schalke-porto,schalke,True,False,True,"Schalke (1-1 aet) FC Porto, Schalke won on pen..."
2008,qtr,arsenal-liverpool,liverpool,False,False,False,Arsenal (3-5) Liverpool
2008,qtr,fenerbahce-chelsea,chelsea,False,False,False,Fenerbahce (2-3) Chelsea


In [12]:
results.to_csv('processed/results.csv')