In [1]:
import re
import numpy as np
import pandas as pd

In [2]:
def get_string(data):
    data = str(data*100)
    data = data[0:4]
    return data

def get_long_name(string,team_ref):
    for short in team_ref['short']:
        row = team_ref[team_ref['short'] == short]
        if string == short:
            string = row.iloc[0]['team']
    return string

def get_shortest_name(string,team_ref):
    for team in team_ref['team']:
        row = team_ref[team_ref['team'] == team]
        if string == team:
            string = str(row.iloc[0]['short'])
    return string

def get_schedule(data):
    db = data.copy()
    db = db[db['s'] <= 1]
    #db = db.tail(4)
    db = db[['game','home','away']]
    db = index_reset(db)
    db = db.sort_values(by=['game'])
    return db

def fix_db_na(data):
    db = data.copy()
    if db['team'].isnull().values.any():
        for row in range(db.shape[0]):
            if pd.isna(db.iloc[row]['team']) == True:
                print(True)
                db.iloc[row]['team'] = get_long_name(db.iloc[row]['team'],data)
    return db

def index_reset(data):
    data = data.reset_index()
    data.pop('index')
    return data

def get_team_results(data,query):
    db = data[data['home'] == query]
    da = data[data['away'] == query]
    db = pd.concat([db,da])
    db = index_reset(db)
    return db

def get_team_brief(data,query,df):
    db = get_team_results(data,query)
    cols = ['game','s','csh','csa','combined','venue','links']
    for col in cols:
        db.pop(col)
    db = db.sort_values(by=['m','d'])
    db = index_reset(db)
    db['summary'] = '0'
    for i in range(0,db.shape[0]):
        if db.iloc[i]['home'] == query:
            x = df[df['team'] == db.iloc[i]['away']]#.reset_index()
            opponent = x.iloc[0]['short']
            outcome = db.iloc[i]['hr'] + ' A'
        else:
            x = df[df['team'] == db.iloc[i]['home']]#.reset_index() NOT SURE WHY THIS BROKE SUDDENLY
            opponent = x.iloc[0]['short']
            outcome = db.iloc[i]['ar'] + ' A'
        score = str(db.iloc[i]['hs']) + ' - ' + str(db.iloc[i]['as'])
        db.loc[i,'summary'] = outcome + ' ' + score +  ' ' + opponent
    db['team'] = query
    return db

def get_results_brief(data,dc):
    db = pd.DataFrame()
    for team in dc['team']:
        df = get_team_brief(data,team,dc)
        db = pd.concat([db,df])
    db = index_reset(db)
    return db

def get_club_statistics(data,query):
    df = data
    a = []
    cols = df.columns
    for row in range(0,df.shape[0]):
        if df.iloc[row]['home'] == query:
            if df.iloc[row]['hr'] == 'W':
                vals = [1,3,3,1,0,0,df.iloc[row]['hs'],df.iloc[row]['as'],df.iloc[row]['hs'],df.iloc[row]['as'],0,0]
                a.append(vals)
            if df.iloc[row]['hr'] == 'L':
                vals = [1,0,3,0,1,0,df.iloc[row]['hs'],df.iloc[row]['as'],df.iloc[row]['hs'],df.iloc[row]['as'],0,0]
                a.append(vals)
            if df.iloc[row]['hr'] == 'D':
                vals = [1,1,3,0,0,1,df.iloc[row]['hs'],df.iloc[row]['as'],df.iloc[row]['hs'],df.iloc[row]['as'],0,0]
                a.append(vals)
        if df.iloc[row]['away'] == query:
            if df.iloc[row]['ar'] == 'W':
                vals = [1,3,3,1,0,0,df.iloc[row]['as'],df.iloc[row]['hs'],0,0,df.iloc[row]['as'],df.iloc[row]['hs']]
                a.append(vals)
            if df.iloc[row]['ar'] == 'L':
                vals = [1,0,3,0,1,0,df.iloc[row]['as'],df.iloc[row]['hs'],0,0,df.iloc[row]['as'],df.iloc[row]['hs']]
                a.append(vals)
            if df.iloc[row]['ar'] == 'D':
                vals = [1,1,3,0,0,1,df.iloc[row]['as'],df.iloc[row]['hs'],0,0,df.iloc[row]['as'],df.iloc[row]['hs']]
                a.append(vals)
    db= pd.DataFrame(a,columns=['gp','pts','tpp','w','l','d','gf','ga','gfh','gah','gfa','gaa'])
    db = pd.DataFrame(db.sum())
    db = db.T
    return db

def get_standings(data,season,ref):
    db = pd.DataFrame()
    if season == 1:
        data = data[data['s'] <= 1]
    if season == 2:
        data = data[data['s'] > 1]
    teams = ref['team']
    #teams = np.sort(teams,axis=-1)
    for team in teams:
        df = get_team_brief(data,team,ref)
        df = get_club_statistics(df,team)
        ppg = round(df['pts']/df['gp'],2)
        gd = df['gf'] - df['ga']
        df.insert(0,'team',team)
        df.insert(4,'ppg',ppg)
        df.insert(8,'gd',gd)
        db = pd.concat([db,df])
    db = db.sort_values(by=['pts','w','gf'],ascending=False)
    db = index_reset(db)
    db = db.reset_index()
    db = db.rename(columns={'index':'rank'})
    db['rank'] = db['rank'] + 1
    db = db.fillna(0)
    return db

def compare_standings(db,df,dc):
    a = []
    for team in dc['team']:
        rank1 = df[df['team'] == team]
        rank2 = db[db['team'] == team]
        if rank1.iloc[0]['rank'] == rank2.iloc[0]['rank']:
            change = 0
        else:
            change = rank1.iloc[0]['rank'] - rank2.iloc[0]['rank']
        a.append([team,change])
    db = pd.DataFrame(a)
    db = pd.DataFrame({'team': db.iloc[:][0], 'change': db.iloc[:][1]})
    db = db.sort_values(by=['change'],ascending=False)
    db = index_reset(db)
    return db

def clean_team_game(data,db,check): # Fix this section for teams that haven't played yet
    if check == 0:
        df = data.iloc[0]['team'] # Getting the name of the top team
    else:
        df = data.iloc[-1]['team'] # Getting the name of the bottom placed team
    if data.iloc[-1]['gp'] == 0 and check == 1:
        db = pd.DataFrame([(df,0,df,0)],columns=['home','hs','away','as']) # make an empty set if the game is empty
    else:
        df = db[(db['home'] == df) | (db['away'] == df)] # get appropirate game results for specified team
        db = index_reset(df)
        db = db.iloc[0][['home','hs','away','as']]
        db = pd.DataFrame(db)
        db = db.T
    return db

def get_longest_name(da,db,dc,team_ref):
    def get_long(data,dd):
        db = data.copy()
        for team in db['home']:
            row = dd[dd['short'] == team]
            db.at[0,'home'] = row.iloc[0]['team']
        for team in db['away']:
            row = dd[dd['short'] == team]
            db.at[0,'away'] = row.iloc[0]['team']
        return db
    da = get_long(da,team_ref)
    db = get_long(db,team_ref)
    dc = get_long(dc,team_ref)
    teams_in = pd.DataFrame([da.iloc[0]['home'],da.iloc[0]['away'],db.iloc[0]['home'],db.iloc[0]['away'],dc.iloc[0]['home'],dc.iloc[0]['away']],columns=['teams'])
    teams_in = teams_in.teams.unique()
    return teams_in
    
def get_short_name(data,dc):
    for team in data['home']:
        row = dc[dc['team'] == team]
        data.at[0,'home'] = row.iloc[0]['short']
    for team in data['away']:
        row = dc[dc['team'] == team]
        data.at[0,'away'] = row.iloc[0]['short']
    return data

def get_weeks_results(data,standings,team_ref):
    if data.iloc[0]['hr'] == 'E':
        db = pd.DataFrame([('NA',0,'NA',0)],columns=['home','hs','away','as'])
        big_win, top_team, low_team,other_team = db,db,db,db
        goals = 0
        return db,goals,big_win,top_team,low_team,other_team
    df = data
    month = df.iloc[-1]['m']
    week = df.iloc[-1]['d']
    db = df[df['m'] == month]
    db = db[db['d'] >= week - 6]
    db = db.sort_values(by=['game'],ascending=False)
    goals = db['hs'].sum() + db['as'].sum()
    max_home = db[db['hs'] == db['hs'].max()]
    max_away = db[db['as'] == db['as'].max()]
    if max_home.iloc[0]['hs'] > max_away.iloc[0]['as']:
        max_home_win = max_home
    else:
        max_home_win = max_away
    big_win = max_home_win[['home','hs','away','as']]
    big_win = index_reset(big_win)
    big_win = get_short_name(big_win,team_ref)
    big_win = pd.DataFrame(big_win.loc[0])
    big_win = big_win.T
    top_team = clean_team_game(standings,db,0)
    top_team = get_short_name(top_team,team_ref)
    low_team = clean_team_game(standings,db,1)
    low_team = get_short_name(low_team,team_ref)
    teams_in = get_longest_name(big_win,top_team,low_team,team_ref)
    other_team = db[(~db['home'].isin(teams_in)) | (~db['away'].isin(teams_in))]
    other_team = index_reset(other_team)
    other_team = pd.DataFrame(other_team.loc[0][['home','hs','away','as']])
    other_team = other_team.T
    other_team = get_short_name(other_team,team_ref)
    return db,goals,big_win,top_team,low_team,other_team

def get_team_stats(data,query):
    db = data[data['team'] == query]
    names = db['name'].unique()
    information = data.copy()
    db.pop('number')
    db = db.groupby(['name']).sum()
    db.insert(0,'last','empty')
    db.insert(0,'first','empty')
    db.insert(0,'position','empty')
    db.insert(0,'number',0)
    #db.insert(0,'team',team)
    i = 0
    for name in names:
        player = information[information['name'] == name].head(1)
        db.at[name,'first'] = player.iloc[0]['first']
        db.at[name,'last'] = player.iloc[0]['last']
        db.at[name,'number'] = int(player.iloc[0]['number'])
        db.at[name,'position'] = player.iloc[0]['position']
        db.at[name,'pass-acc'] = player.iloc[0]['pass-acc'].mean()
        db.at[name,'cross-acc'] = player.iloc[0]['cross-acc'].mean()
    db = db.reset_index()
    return db

def get_stats_all(data,dc):
    db = pd.DataFrame()
    for team in dc['team']:
        df = get_team_stats(data,team)
        df.insert(0,'team',team)
        db = pd.concat([db,df])
    db = index_reset(db)
    return db

# get associated information for players league wide and calculate an overall score for each position
def get_evaluation(condensed_player_info,full_player_info):
    names = condensed_player_info.name.unique() # grab the list of names at the specified position
    eval_ = condensed_player_info.describe().T # get the evalution scores
    checks = condensed_player_info.columns[4:] # slice away the first three columns (name,number,postion) not needed
    condensed_player_info['overall'] = 0.0 # create the final column overall
    condensed_player_info = condensed_player_info.set_index('name') # set the index to the player name to search for a specific player
    for name in names: # iterate through the names in the lisst
        player = full_player_info[full_player_info['name'] == name].head(1) # get the players details
        a = [] # create an empty array to store the scores
        for check in checks: # iterate through the columns of remaining data
            result = player.iloc[0][check] / eval_['max'][check] # calculate the score for the value found value/max
            a.append(result) # append the result into the list
            score = sum(a) / len(checks) #calculate the final score sum(list) / num of checks
            overall = str(score)
            overall = overall[0:4]
            condensed_player_info.at[name,'overall'] = overall # assign the value as the overall score
    condensed_player_info = condensed_player_info.reset_index() # reset the index, making the name column a column again
    condensed_player_info = condensed_player_info.sort_values(by=['overall'],ascending=False) # sort using overall, descending
    return condensed_player_info

def top_tracked(team_stats,tracked):
    if team_stats.minutes.sum() == 0:
        tracked_player_stat = pd.DataFrame([('NA',0,0,0,0)],columns=['team','name','number','minutes','goals'])
        return tracked_player_stat
    df = team_stats.copy()
    cols = ['team','name','position','number','minutes',tracked]
    tracked_player_stat = df[cols]
    #tracked_player_stat = get_evaluation(tracked_player_stat,df)
    tracked_player_stat = tracked_player_stat.sort_values(by=[tracked],ascending=False)
    tracked_player_stat = tracked_player_stat.reset_index()
    tracked_player_stat.pop('index')
    team = tracked_player_stat.pop('team')
    tracked_player_stat.insert(0,'team',team)
    tracked_player_stat = tracked_player_stat[tracked_player_stat[tracked] >= 1]
    rank = tracked_player_stat.index + 1
    tracked_player_stat.insert(0,'rank',rank)
    return tracked_player_stat

def top_position(team_stats,position): # get the forwards in the league
    if team_stats.minutes.sum() == 0:
        if position == 'f':
            condensed_player_info = pd.DataFrame([('NA',0,0,0,0,0,0,0,0,0,0,0,0,0,0)],columns=['team','name','number','position','minutes','goals','chances','assists','shots','s-target','passes','crosses','duels','tackles','overall'])
        if position == 'm':
            condensed_player_info = pd.DataFrame([('NA',0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)],columns=['team','name','number','position','minutes','goals','assists','touches','passes','pass-acc','crosses','cross-acc','chances','duels','tackles','overall'])
        if position == 'd':
            condensed_player_info = pd.DataFrame([('NA',0,0,0,0,0,0,0,0,0,0,0)],columns=['team','name','number','position','minutes','tackles','t-won','clearances','interceptions','duels','d-won','overall'])
        if position == 'g':
            condensed_player_info = pd.DataFrame([('NA',0,0,0,0,0,0,0,0,0)],columns=['team','name','number','position','minutes','cs','saves','shots faced','claimed crosses','overall'])
        condensed_player_info = pd.DataFrame([('NA',0,0,0,0,0,0,0,0,0,0,0,0,0,0)],columns=['team','name','number','position','minutes','goals','chances','assists','shots','shots on target','passes','crosses','duels','tackles','overall'])
        return condensed_player_info
    player_information = team_stats.copy() # load player information
    if position == 'f':
        cols = ['team','name','number','position','minutes','goals','chances','assists','shots','s-target','passes','crosses','duels','tackles']
    if position == 'm':
        cols = ['team','name','number','position','minutes','goals','assists','touches','passes','pass-acc','crosses','cross-acc','chances','duels','tackles']
    if position == 'd':
        cols = ['team','name','number','position','minutes','tackles','t-won','clearances','interceptions','duels','d-won']
    if position == 'g':
        cols = ['team','name','number','position','minutes','cs','saves','shots faced','claimed crosses']
    full_player_info = player_information[player_information['position'] == position] # get the forwards where position = f
    condensed_player_info = full_player_info[cols] # select specific columns associated with the evaluation
    condensed_player_info = get_evaluation(condensed_player_info,full_player_info) # condensed Dataframe and full Dataframe being passes
    condensed_player_info = index_reset(condensed_player_info)
    names = condensed_player_info.name.unique() # get the names of the players who fit the criteria
    condensed_player_info = condensed_player_info.set_index('name') # set the index to the name column to make the search possible
    if position == 'f':
        for name in names:
            player = full_player_info[full_player_info['name'] == name].head(1) # forwards main purpose is to score goals
            if (player.iloc[0]['goals'] <= 2.0 and player.iloc[0]['minutes'] >= 1000.0): # if player scores less than 2 & has minutes greater than 1000
                condensed_player_info.at[name,'overall'] = condensed_player_info.at[name,'overall'] - 0.1
            if player.iloc[0]['goals'] >= 8.0: # reward forwards scoring greater than 8 goals
                condensed_player_info.at[name,'overall'] = condensed_player_info.at[name,'overall'] + 0.15
    condensed_player_info = condensed_player_info.sort_values(by=['overall'],ascending=False)
    condensed_player_info = condensed_player_info.reset_index()
    team = condensed_player_info.pop('team')
    condensed_player_info.insert(0,'team',team)
    return condensed_player_info

def top_offenders(data):  # get the offences handed out in the league
    if data.minutes.sum() == 0:
        db = pd.DataFrame([('NA',0,0,0,0,0,0)],columns=['team','name','number','minutes','yellow','red','f-conceded'])
        return db
    player_information = data.copy()
    cols = ['team','name','position','number','minutes','yellow','red','f-conceded']
    df = player_information
    db = df[cols]
    db = get_evaluation(db,df)
    db = db.sort_values(by=['red','yellow'],ascending=False)
    db = db.reset_index()
    db.pop('index')
    team = db.pop('team')
    db.insert(0,'team',team)
    return db

def get_team_form(data,query):
    db = data[data['team'] == query]
    db = pd.DataFrame(db['summary'])
    return db

def get_form_results(data,dc):
    db = pd.DataFrame()
    form = get_results_brief(data[data['s'] <= 1],dc)
    teams = data.home.unique()
    teams = np.sort(teams,axis=-1)
    for team in teams:
        df = get_team_form(form,team)
        #print(team,'\n',df)
        db[team] = pd.Series(df['summary'].values)
    db = db.T
    db = db.reset_index()
    db = db.fillna('E')
    return db

def get_roster(query,stats,team_ref): # use team stats to get the player information
    roster = get_stats_all(stats,team_ref)
    roster = roster[roster['team'] == query]
    roster = roster[['name','number','position']]
    roster.insert(3,'overall',0)
    roster = index_reset(roster)
    return roster

def get_home_away_comparison(stats,game,team):
    db = stats[stats['game'] == game].copy()
    db = db[db['team'] == team]
    db = db.sort_values(by=['minutes'],ascending=False)
    db = db#[0:11]
    db = db['name']
    return db

def get_compare_roster(results,query,stats,team_ref,rated_forwards,rated_midfielders,rated_defenders,rated_keepers):
    # going through the rated players to get the best players for each position
    # using game_h,rated_forwards,rated_midfielders,rated_defenders,rated_keepers,results,team_stats
    def get_player_info(data,player,position,num):
        player_info = data[data['name'] == name]
        if player_info.empty:
            player_row = [name,player.iloc[0][1],position,0,num]
            
        else:
            player_row = player_info.iloc[0].copy()
            player_row['asc'] = num
        return player_row
    roster = get_roster(query,stats,team_ref)
    keepers = [] # lists for each position
    forwards = []
    midfields = []
    defenders = []
    for name in roster['name']:
        player = roster[roster['name'] == name]
        if player.iloc[0][2] == 'f':
            player_info = get_player_info(rated_forwards,player,'f',0)
            forwards.append(player_info)
        if player.iloc[0][2] == 'm':
            player_info = get_player_info(rated_midfielders,player,'m',1)
            midfields.append(player_info)
        if player.iloc[0][2] == 'd':
            player_info = get_player_info(rated_defenders,player,'d',2)
            defenders.append(player_info)
        if player.iloc[0][2] == 'g':
            player_info = get_player_info(rated_keepers,player,'g',3)
            keepers.append(player_info)
    db = pd.DataFrame(keepers,columns=['name','number','position','overall','asc'])
    db = db.sort_values(by=['asc','overall'],ascending=False)
    dd = pd.DataFrame(defenders,columns=['name','number','position','overall','asc'])
    dd = dd.sort_values(by=['asc','overall'],ascending=False)
    dm = pd.DataFrame(midfields,columns=['name','number','position','overall','asc'])
    dm = dm.sort_values(by=['asc','overall'],ascending=False)
    df = pd.DataFrame(forwards,columns=['name','number','position','overall','asc'])
    df = df.sort_values(by=['asc','overall'],ascending=False)
    db = pd.concat([db[0:1],dd[0:4],dm[0:4],df[0:2]])
    db = db[['name','number','position','overall','asc']]
    if results.iloc[0]['hr'] == 'E': # check if games haven't been played
        db = db.sort_values(by=['asc'],ascending=False)
    else:
        db = db.sort_values(by=['asc','overall'],ascending=False)
    db = index_reset(db)
    db.pop('asc')
    return db

def get_roster_overall(query,stats,team_ref,rated_forwards,rated_midfielders,rated_defenders,rated_keepers,player_info): # use team stats to get the player information
    def get_score(data,name):
        db = data[data['name'] == name]
        if db.empty:
            db = 0
        else:
            db = db['overall'].values
            db = db[0]
        return db
    def get_image(data,name):
        db = data[data['name'] == name]
        if db['image'].empty:
            db = 'empty.jpg'
        else:
            db = db['image'].values
            db = db[0]
        return db
    def get_link(data,name):
        db = data[data['name'] == name]
        if db['link'].empty:
            db = 'https://en.wikipedia.org/wiki/Canadian_Premier_League'
        else:
            db = db['link'].values
            db = db[0]
        return db
    def get_flag(data,name):
        db = data[data['name'] == name]
        if db['flag'].empty:
            db = 'empty.png'
        else:
            db = db['flag'].values
            db = db[0]
        return db
    roster = get_stats_all(stats,team_ref)
    roster = roster[roster['team'] == query].copy()
    roster = roster[['name','first','last','number','position']] # scale the dataframe down to what we need
    #roster.insert(3,'overall',a)
    a = []
    b = []
    c = []
    d = []
    for i in range(0,roster.shape[0]):
        if roster.iloc[i]['position'] == 'f':
            score = str(get_score(rated_forwards,roster.iloc[i]['name']))
            a.append(score[0:4])
            b.append(get_image(player_info,roster.iloc[i]['name']))
            c.append(get_flag(player_info,roster.iloc[i]['name']))
            d.append(get_link(player_info,roster.iloc[i]['name']))
        if roster.iloc[i]['position'] == 'm':
            score = str(get_score(rated_midfielders,roster.iloc[i]['name']))
            a.append(score[0:4])
            b.append(get_image(player_info,roster.iloc[i]['name']))
            c.append(get_flag(player_info,roster.iloc[i]['name']))
            d.append(get_link(player_info,roster.iloc[i]['name']))
        if roster.iloc[i]['position'] == 'd':
            score = str(get_score(rated_defenders,roster.iloc[i]['name']))
            a.append(score[0:4])
            b.append(get_image(player_info,roster.iloc[i]['name']))
            c.append(get_flag(player_info,roster.iloc[i]['name']))
            d.append(get_link(player_info,roster.iloc[i]['name']))
        if roster.iloc[i]['position'] == 'g':
            score = str(get_score(rated_keepers,roster.iloc[i]['name']))
            a.append(score[0:4])
            b.append(get_image(player_info,roster.iloc[i]['name']))
            c.append(get_flag(player_info,roster.iloc[i]['name']))
            d.append(get_link(player_info,roster.iloc[i]['name']))
    roster['overall'] = a
    roster['flag'] = c
    roster['link'] = d
    roster.insert(0,'image',b)
    #roster['image'] = b
    roster = index_reset(roster)
    roster.pop('name')
    return roster

def get_power_rankings(db,df,dc):
    a = []
    for team in dc['team']:
        crest = dc[dc['team'] == team]
        colour = crest['colour'].values
        colour = colour[0]
        crest = crest['crest'].values
        crest = crest[0]
        
        rank1 = df[df['team'] == team]
        rank2 = db[db['team'] == team]
        
        if rank1.iloc[0]['rank'] == 1:
            bonus = 4
        elif rank1.iloc[0]['rank'] == 2:
            bonus = 3
        elif rank1.iloc[0]['rank'] == 3:
            bonus = 2
        else:
            bonus =0
        
        if db.iloc[0]['gp'] == 0:
            bonus = 0
            
        if rank1.iloc[0]['rank'] == rank2.iloc[0]['rank']:
            change = 0
        else:
            change = (rank1.iloc[0]['rank'] - rank2.iloc[0]['rank']) * - 1
            
        if rank1.iloc[0]['gd'] == rank2.iloc[0]['gd']:
            gd_bonus = 0
        else:
            gd_bonus = (rank1.iloc[0]['gd'] - rank2.iloc[0]['gd']) * - 1
        
        if rank1.iloc[0]['ga'] == rank2.iloc[0]['ga']:
            ga_nerf = 0
        else:
            ga_nerf = (rank1.iloc[0]['ga'] - rank2.iloc[0]['ga']) * - 1
            
        if rank1.iloc[0]['w'] == rank2.iloc[0]['w']:
            w_bonus = 0
        else:
            w_bonus = (rank1.iloc[0]['w'] - rank2.iloc[0]['w']) * - 1
        
        goal_bonus = gd_bonus - ga_nerf
        change = change + bonus + goal_bonus + w_bonus
        
        a.append([team,change,goal_bonus,w_bonus,crest,colour])
    db = pd.DataFrame(a,columns = ['team','change','goal_bonus','w_bonus','crest','colour'])
    #db = pd.DataFrame(a)
    #db = pd.DataFrame({'team': db.iloc[:][0], 'change': db.iloc[:][1]})
    db = db.sort_values(by=['change'],ascending=False)
    db = index_reset(db)
    rank = db.index + 1
    db.insert(0,'rank',rank)
    return db

In [3]:
team_ref = pd.read_csv('datasets/teams.csv')

In [4]:
year = '2019'

In [5]:
results = pd.read_csv(f'datasets/{year}/cpl-{year}-results.csv')
stats = pd.read_csv(f'datasets/{year}/cpl-{year}-stats.csv')
player_info = pd.read_csv(f'datasets/{year}/player-{year}-info.csv')

In [6]:
if year == '2019':
    team_ref = team_ref[1:]
    results_old = results[:-7].copy()
else:
    results_old = results[results['hr'] != 'E'].copy()

In [7]:
results.tail(5)

Unnamed: 0,game,s,d,m,hs,as,home,hr,away,ar,csh,csa,combined,venue,links
95,I96,1,19,10,0,2,York9 FC,L,HFX Wanderers FC,W,0,1,10-19-2019 York9 FC L 0-2 W HFX Wanderers FC,York Lions Stadium,https://canpl.ca/matchcentre/5opkqc01qgmjdcbho...
96,I97,1,19,10,3,1,Cavalry FC,W,FC Edmonton,L,0,0,10-19-2019 Cavalry FC W 3-1 L FC Edmonton,ATCO Field,https://canpl.ca/matchcentre/5nw6ub2q95ptd6n71...
97,I98,1,19,10,2,0,Pacific FC,W,Valour FC,L,1,0,10-19-2019 Pacific FC W 2-0 L Valour FC,Westhills Stadium,https://canpl.ca/matchcentre/5opkqc01qgmjdcbho...
98,I99,2,26,10,1,0,Forge FC,W,Cavalry FC,L,1,0,10-26-2019 Forge FC W 1-0 L Cavalry FC,Tim Hortons Field,https://canpl.ca/matchcentre/7exzoqwijjljpmysg...
99,I100,2,2,11,0,1,Cavalry FC,L,Forge FC,W,0,1,11-2-2019 Cavalry FC L 0-1 W Forge FC,ATCO Field,https://canpl.ca/matchcentre/7f2xl06opfb8koy1j...


In [8]:
results_old.tail(5)

Unnamed: 0,game,s,d,m,hs,as,home,hr,away,ar,csh,csa,combined,venue,links
88,I89,1,6,10,1,0,Forge FC,W,York9 FC,L,1,0,10-6-2019 Forge FC W 1-0 L York9 FC,Tim Hortons Field,https://canpl.ca/matchcentre/5lomje72gq8r3x9xt...
89,I90,1,9,10,1,1,HFX Wanderers FC,D,Pacific FC,D,0,0,10-9-2019 HFX Wanderers FC D 1-1 D Pacific FC,Wanderers Grounds,https://canpl.ca/matchcentre/5m1xy5ksfqgdrzx3g...
90,I91,1,9,10,2,1,Cavalry FC,W,Forge FC,L,0,0,10-9-2019 Cavalry FC W 2-1 L Forge FC,ATCO Field,https://canpl.ca/matchcentre/548tpbwhe2nvkm81p...
91,I92,1,12,10,4,0,York9 FC,W,Forge FC,L,1,0,10-12-2019 York9 FC W 4-0 L Forge FC,York Lions Stadium,https://canpl.ca/matchcentre/5ncl09fzjg81svtrb...
92,I93,1,16,10,1,0,Forge FC,W,Cavalry FC,L,1,0,10-16-2019 Forge FC W 1-0 L Cavalry FC,Tim Hortons Field,https://canpl.ca/matchcentre/5nossz36w45wbtz1z...


In [9]:
results_diff = pd.concat([results, results_old]).drop_duplicates(keep=False)

In [10]:
results_diff.head(2)

Unnamed: 0,game,s,d,m,hs,as,home,hr,away,ar,csh,csa,combined,venue,links
93,I94,1,16,10,0,4,Valour FC,L,York9 FC,W,0,1,10-16-2019 Valour FC L 0-4 W York9 FC,IG Field,https://canpl.ca/matchcentre/5oyl3jwgr2padsvpk...
94,I95,1,16,10,3,1,FC Edmonton,W,Pacific FC,L,0,0,10-16-2019 FC Edmonton W 3-1 L Pacific FC,Clarke Stadium,https://canpl.ca/matchcentre/5nw6ub2q95ptd6n71...


In [11]:
standings = get_standings(results,1,team_ref)
if results_old.empty == True:
    standings_old = get_standings(results,1,team_ref)
else:
    standings_old = get_standings(results_old,1,team_ref)

In [12]:
standings

Unnamed: 0,rank,team,gp,pts,tpp,ppg,w,l,d,gd,gf,ga,gfh,gah,gfa,gaa
0,1,Cavalry FC,28,62,84,2.21,19,4,5,32,51,19,26,8,25,11
1,2,Forge FC,28,56,84,2.0,17,6,5,19,45,26,25,10,20,16
2,3,York9 FC,28,34,84,1.21,9,12,7,2,39,37,18,19,21,18
3,4,FC Edmonton,28,32,84,1.14,8,12,8,-6,27,33,15,14,12,19
4,5,Pacific FC,28,31,84,1.11,8,13,7,-11,35,46,22,20,13,26
5,6,Valour FC,28,28,84,1.0,8,16,4,-22,30,52,17,32,13,20
6,7,HFX Wanderers FC,28,28,84,1.0,6,12,10,-14,21,35,13,9,8,26


In [13]:
standings_old

Unnamed: 0,rank,team,gp,pts,tpp,ppg,w,l,d,gd,gf,ga,gfh,gah,gfa,gaa
0,1,Cavalry FC,27,59,81,2.19,18,4,5,30,48,18,23,7,25,11
1,2,Forge FC,28,56,84,2.0,17,6,5,19,45,26,25,10,20,16
2,3,York9 FC,26,31,78,1.19,8,11,7,0,35,35,18,17,17,18
3,4,FC Edmonton,26,29,78,1.12,7,11,8,-6,23,29,12,13,11,16
4,5,Valour FC,26,28,78,1.08,8,14,4,-16,30,46,17,28,13,18
5,6,Pacific FC,26,28,78,1.08,7,12,7,-11,32,43,20,20,12,23
6,7,HFX Wanderers FC,27,25,81,0.93,5,12,10,-16,19,35,13,9,6,26


In [14]:
compare_standings_test = compare_standings(standings,standings_old,team_ref)

In [15]:
compare_standings_test

Unnamed: 0,team,change
0,Pacific FC,1
1,Cavalry FC,0
2,FC Edmonton,0
3,Forge FC,0
4,HFX Wanderers FC,0
5,York9 FC,0
6,Valour FC,-1


In [16]:
power_rankings = get_power_rankings(standings,standings_old,team_ref)
power_rankings

Unnamed: 0,rank,team,change,goal_bonus,w_bonus,crest,colour
0,1,Cavalry FC,6,1,1,cavalry_fc_nav.png,w3-2019-fiesta
1,2,Forge FC,3,0,0,Forge_FC_nav.png,w3-2019-turmeric
2,3,HFX Wanderers FC,3,2,1,HFX_Wanderers_FC.png,w3-vivid-blue
3,4,York9 FC,3,0,1,York_9_FC_nav.png,w3-vivid-yellow-green
4,5,FC Edmonton,-3,-4,1,FC_Edmonton_nav.png,w3-2019-princess-blue
5,6,Pacific FC,-3,-3,1,Pacific_FC_nav.png,w3-vivid-reddish-purple
6,7,Valour FC,-11,-12,0,Valour_FC_nav.png,w3-2019-biking-red


In [17]:
game_week, goals, big_win, top_result, low_result,other_result = get_weeks_results(results[results['s'] <= 1],standings,team_ref)
game_week

Unnamed: 0,game,s,d,m,hs,as,home,hr,away,ar,csh,csa,combined,venue,links
97,I98,1,19,10,2,0,Pacific FC,W,Valour FC,L,1,0,10-19-2019 Pacific FC W 2-0 L Valour FC,Westhills Stadium,https://canpl.ca/matchcentre/5opkqc01qgmjdcbho...
96,I97,1,19,10,3,1,Cavalry FC,W,FC Edmonton,L,0,0,10-19-2019 Cavalry FC W 3-1 L FC Edmonton,ATCO Field,https://canpl.ca/matchcentre/5nw6ub2q95ptd6n71...
95,I96,1,19,10,0,2,York9 FC,L,HFX Wanderers FC,W,0,1,10-19-2019 York9 FC L 0-2 W HFX Wanderers FC,York Lions Stadium,https://canpl.ca/matchcentre/5opkqc01qgmjdcbho...
94,I95,1,16,10,3,1,FC Edmonton,W,Pacific FC,L,0,0,10-16-2019 FC Edmonton W 3-1 L Pacific FC,Clarke Stadium,https://canpl.ca/matchcentre/5nw6ub2q95ptd6n71...
93,I94,1,16,10,0,4,Valour FC,L,York9 FC,W,0,1,10-16-2019 Valour FC L 0-4 W York9 FC,IG Field,https://canpl.ca/matchcentre/5oyl3jwgr2padsvpk...
92,I93,1,16,10,1,0,Forge FC,W,Cavalry FC,L,1,0,10-16-2019 Forge FC W 1-0 L Cavalry FC,Tim Hortons Field,https://canpl.ca/matchcentre/5nossz36w45wbtz1z...


In [18]:
big_win

Unnamed: 0,home,hs,away,as
0,VFC,0,Y9,4


In [19]:
top_result

Unnamed: 0,home,hs,away,as
0,CFC,3,FCE,1


In [20]:
other_result

Unnamed: 0,home,hs,away,as
0,PFC,2,VFC,0


In [21]:
low_result

Unnamed: 0,home,hs,away,as
0,Y9,0,HFX,2


In [22]:
goals

17

In [23]:
#championship = get_standings(results,2)
#championship = championship[championship['gp'] > 1]
#championship

In [24]:
#championship.to_csv(f'datasets/{year}/cpl-{year}-championship.csv',index=False)

In [25]:
#standings.to_csv(f'datasets/{year}/cpl-{year}-standings.csv',index=False)

In [26]:
results_brief = get_results_brief(results,team_ref)

In [27]:
results_brief.head(5)

Unnamed: 0,d,m,hs,as,home,hr,away,ar,summary,team
0,4,5,2,1,Cavalry FC,W,York9 FC,L,W A 2 - 1 Y9,Cavalry FC
1,8,5,1,0,Cavalry FC,W,Valour FC,L,W A 1 - 0 VFC,Cavalry FC
2,12,5,1,2,Forge FC,L,Cavalry FC,W,W A 1 - 2 FFC,Cavalry FC
3,18,5,1,0,Cavalry FC,W,FC Edmonton,L,W A 1 - 0 FCE,Cavalry FC
4,25,5,2,0,Cavalry FC,W,HFX Wanderers FC,L,W A 2 - 0 HFX,Cavalry FC


In [28]:
#results_brief.to_csv(f'datasets/{year}/cpl-{year}-results_brief.csv',index=False)

In [29]:
stats.head(5)

Unnamed: 0,game,team,position,number,name,first,last,minutes,touches,passes,...,clearances,interceptions,yellow,red,f-won,f-conceded,shots faced,saves,claimed crosses,cs
0,I1,Forge FC,m,1,Alexander Achinioti-Jönsson,Alexander,Achinioti-Jönsson,77.0,47.0,41.0,...,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,I1,Forge FC,d,16,Bertrand Owundi,Bertrand,Owundi,77.0,69.0,56.0,...,5.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,I1,Forge FC,f,13,Christopher Nanco,Christopher,Nanco,56.0,42.0,27.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
3,I1,Forge FC,d,22,Dominic Samuel,Dominic,Samuel,90.0,59.0,48.0,...,5.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
4,I1,Forge FC,m,6,Elimane Cissé,Elimane,Cissé,90.0,75.0,61.0,...,1.0,5.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0


In [30]:
stats.describe()

Unnamed: 0,number,minutes,touches,passes,pass-acc,crosses,cross-acc,assists,chances,goals,...,clearances,interceptions,yellow,red,f-won,f-conceded,shots faced,saves,claimed crosses,cs
count,2735.0,2735.0,2735.0,2735.0,2735.0,2735.0,2735.0,2735.0,2735.0,2735.0,...,2735.0,2735.0,2735.0,2735.0,2735.0,2735.0,2735.0,2735.0,2735.0,2735.0
mean,13.82925,71.540768,44.576965,30.244241,0.73657,1.116271,0.093119,0.060695,0.628154,0.087751,...,1.210238,0.790128,0.112249,0.006216,0.938574,0.995978,0.318099,0.227422,0.036563,0.019744
std,11.355872,28.545983,23.277512,18.645196,0.233204,1.904354,0.235873,0.246352,0.98912,0.311297,...,1.859761,1.13105,0.31573,0.078609,1.119698,1.125445,1.295603,0.956352,0.251072,0.139145
min,1.0,1.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,6.0,61.0,28.0,16.0,0.67,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,12.0,90.0,45.0,29.0,0.78,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0
75%,19.0,90.0,61.0,42.0,0.85,2.0,0.0,0.0,1.0,0.0,...,2.0,1.0,0.0,0.0,2.0,2.0,0.0,0.0,0.0,0.0
max,92.0,90.0,118.0,112.0,1.0,15.0,1.0,2.0,7.0,3.0,...,11.0,7.0,1.0,1.0,6.0,7.0,13.0,10.0,4.0,1.0


In [31]:
team_stats = get_stats_all(stats,team_ref)

In [32]:
team_stats

Unnamed: 0,team,name,number,position,first,last,minutes,touches,passes,pass-acc,...,clearances,interceptions,yellow,red,f-won,f-conceded,shots faced,saves,claimed crosses,cs
0,Cavalry FC,Aribim Pepple,24,f,Aribim,Pepple,60.0,21.0,9.0,1.00,...,0.0,0.0,0.0,0.0,1.0,4.0,0.0,0.0,0.0,0.0
1,Cavalry FC,Carlos Patiño,20,m,Carlos,Patiño,320.0,238.0,122.0,0.88,...,1.0,5.0,0.0,0.0,13.0,12.0,0.0,0.0,0.0,0.0
2,Cavalry FC,Dean Northover,12,d,Dean,Northover,609.0,478.0,255.0,0.67,...,10.0,15.0,2.0,1.0,14.0,13.0,0.0,0.0,0.0,0.0
3,Cavalry FC,Dominick Zator,4,d,Dominick,Zator,2335.0,1866.0,1402.0,0.83,...,75.0,51.0,0.0,0.0,19.0,15.0,0.0,0.0,0.0,0.0
4,Cavalry FC,Dominique Malonga,19,f,Dominique,Malonga,1871.0,760.0,489.0,0.80,...,12.0,2.0,1.0,0.0,31.0,8.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
159,York9 FC,Ryan Telfer,18,m,Ryan,Telfer,2216.0,1533.0,832.0,0.60,...,8.0,23.0,2.0,0.0,45.0,25.0,0.0,0.0,0.0,0.0
160,York9 FC,Simon Karlsson Adjei,12,f,Simon,Karlsson Adjei,1422.0,575.0,341.0,0.88,...,6.0,2.0,0.0,0.0,18.0,18.0,0.0,0.0,0.0,0.0
161,York9 FC,Stefan Lamanna,21,f,Stefan,Lamanna,77.0,55.0,31.0,0.86,...,0.0,0.0,0.0,0.0,2.0,1.0,0.0,0.0,0.0,0.0
162,York9 FC,Steven Furlano,77,d,Steven,Furlano,517.0,325.0,225.0,0.85,...,6.0,10.0,4.0,0.0,4.0,15.0,0.0,0.0,0.0,0.0


In [36]:
team_stats.describe()

Unnamed: 0,number,minutes,touches,passes,pass-acc,crosses,cross-acc,assists,chances,goals,...,clearances,interceptions,yellow,red,f-won,f-conceded,shots faced,saves,claimed crosses,cs
count,164.0,164.0,164.0,164.0,164.0,164.0,164.0,164.0,164.0,164.0,...,164.0,164.0,164.0,164.0,164.0,164.0,164.0,164.0,164.0,164.0
mean,15.0,1193.073171,743.402439,504.378049,0.734939,18.615854,0.06189,1.012195,10.47561,1.463415,...,20.182927,13.176829,1.871951,0.103659,15.652439,16.609756,5.304878,3.792683,0.609756,0.329268
std,12.78947,694.973598,500.156058,367.663695,0.224651,28.796174,0.200023,1.356592,11.323168,2.509834,...,25.700793,13.268319,1.796914,0.305751,13.363073,12.806015,19.381662,13.906557,2.492965,1.325122
min,1.0,9.0,10.0,3.0,-1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,7.0,560.0,322.5,198.75,0.67,1.0,0.0,0.0,2.0,0.0,...,3.0,2.0,0.0,0.0,5.0,7.0,0.0,0.0,0.0,0.0
50%,13.0,1243.0,664.0,453.5,0.77,6.0,0.0,0.0,7.0,0.0,...,9.0,9.0,2.0,0.0,12.0,14.0,0.0,0.0,0.0,0.0
75%,20.0,1775.0,1134.75,824.25,0.86,22.0,0.0,2.0,16.0,2.0,...,27.25,21.0,3.0,0.0,23.25,24.0,0.0,0.0,0.0,0.0
max,92.0,2520.0,1985.0,1536.0,1.0,164.0,1.0,5.0,66.0,13.0,...,117.0,51.0,9.0,1.0,57.0,62.0,122.0,88.0,17.0,9.0


In [37]:
rated_goalscorers = top_tracked(team_stats,'goals')
rated_assists = top_tracked(team_stats,'assists')
rated_assists.head(2)

Unnamed: 0,rank,team,name,position,number,minutes,assists
0,1,Pacific FC,Blake Smith,d,4,2004.0,5.0
1,2,Valour FC,Michael Petrasso,m,9,1301.0,5.0


In [38]:
rated_goalscorers.head(2)

Unnamed: 0,rank,team,name,position,number,minutes,goals
0,1,Forge FC,Tristan Borges,m,4,1884.0,13.0
1,2,Cavalry FC,Dominique Malonga,f,19,1871.0,11.0


In [39]:
'''rated_g10 = rated_goalscorers.head(10)
rated_g10 = rated_g10[['rank','team','name','position','goals']]
rated_g10'''

"rated_g10 = rated_goalscorers.head(10)\nrated_g10 = rated_g10[['rank','team','name','position','goals']]\nrated_g10"

In [40]:
rated_forwards = top_position(team_stats,'f')
rated_midfielders = top_position(team_stats,'m')
rated_defenders = top_position(team_stats,'d')
rated_keepers = top_position(team_stats,'g')
rated_offenders = top_offenders(team_stats)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [41]:
rated_forwards.head(10)

Unnamed: 0,team,name,number,position,minutes,goals,chances,assists,shots,s-target,passes,crosses,duels,tackles,overall
0,Pacific FC,Terran Campbell,14,f,2334.0,11.0,31.0,2.0,53.0,23.0,569.0,29.0,324.0,39.0,0.85
1,York9 FC,Rodrigo Gattas,22,f,1958.0,9.0,23.0,0.0,77.0,36.0,728.0,38.0,214.0,17.0,0.75
2,Cavalry FC,Dominique Malonga,19,f,1871.0,11.0,19.0,1.0,68.0,34.0,489.0,5.0,185.0,2.0,0.69
3,HFX Wanderers FC,Mohamed Kourouma,12,f,1988.0,1.0,44.0,3.0,68.0,17.0,702.0,156.0,264.0,56.0,0.67
4,Cavalry FC,Nico Pasquotti,17,f,1853.0,5.0,24.0,3.0,33.0,16.0,457.0,61.0,374.0,47.0,0.63
5,FC Edmonton,Oumar Diouck,45,f,1977.0,6.0,30.0,2.0,45.0,16.0,583.0,53.0,269.0,36.0,0.6
6,FC Edmonton,Easton Ongaro,19,f,1167.0,10.0,11.0,2.0,38.0,20.0,249.0,6.0,188.0,5.0,0.57
7,York9 FC,Simon Karlsson Adjei,12,f,1422.0,7.0,21.0,2.0,53.0,16.0,341.0,4.0,210.0,15.0,0.47
8,Cavalry FC,Oliver Minatel,7,f,1450.0,7.0,17.0,1.0,37.0,12.0,348.0,21.0,194.0,29.0,0.43
9,Forge FC,Christopher Nanco,13,f,1429.0,4.0,21.0,2.0,29.0,12.0,380.0,22.0,208.0,19.0,0.42


In [42]:
rated_midfielders.head(2)

Unnamed: 0,team,name,number,position,minutes,goals,assists,touches,passes,pass-acc,crosses,cross-acc,chances,duels,tackles,overall
0,Forge FC,Tristan Borges,4,m,1884.0,13.0,5.0,1307.0,680.0,0.74,77.0,0.0,42.0,356.0,59.0,0.7
1,York9 FC,Ryan Telfer,18,m,2216.0,8.0,1.0,1533.0,832.0,0.6,164.0,0.0,66.0,372.0,52.0,0.69


In [43]:
rated_defenders.head(2)

Unnamed: 0,team,name,number,position,minutes,tackles,t-won,clearances,interceptions,duels,d-won,overall
0,FC Edmonton,Mélé Temguia,8,d,2430.0,61.0,41.0,82.0,51.0,244.0,140.0,0.86
1,HFX Wanderers FC,André Bona,3,d,2053.0,66.0,50.0,76.0,32.0,257.0,150.0,0.83


In [44]:
rated_keepers.head(2)

Unnamed: 0,team,name,number,position,minutes,cs,saves,shots faced,claimed crosses,overall
0,York9 FC,Nathan Ingham,1,g,2340.0,6.0,88.0,122.0,17.0,0.93
1,Cavalry FC,Marco Carducci,1,g,2160.0,9.0,65.0,81.0,11.0,0.79


In [45]:
rated_offenders.head(2)

Unnamed: 0,team,name,position,number,minutes,yellow,red,f-conceded,overall
0,York9 FC,Manuel Aparicio,m,10,2159.0,5.0,1.0,56.0,0.82
1,Forge FC,Dominic Samuel,d,22,2044.0,5.0,1.0,38.0,0.74


In [46]:
def get_match_tables(data,query):
    db = data[data['home'] == query]
    db = pd.concat([db,data[data['away'] == query]])
    db = db.sort_values(by=['m','d'])
    return db

def likelihood_input(array,a_list):
    b = a_list[0]
    c = a_list[1]
    d = a_list[2]
    array.append(b)
    array.append(c)
    array.append(d)
    return array

def likelihood_table(data,query):
    df = get_match_tables(data,query)
    array = []
    cols = data.columns
    for row in range(0,df.shape[0]):
        if df.iloc[row]['home'] == query:
            if df.iloc[row]['hr'] == 'W':
                array = likelihood_input(array,[[1,2,1],[1,0,0],[1,1,0]])
            if df.iloc[row]['hr'] == 'L':
                array = likelihood_input(array,[[1,2,0],[1,0,1],[1,1,0]])
            if df.iloc[row]['hr'] == 'D':
                array = likelihood_input(array,[[1,2,0],[1,0,0],[1,1,1]])
        if df.iloc[row]['away'] == query:
            if df.iloc[row]['ar'] == 'W':
                array = likelihood_input(array,[[2,2,1],[2,0,0],[2,1,0]])
            if df.iloc[row]['ar'] == 'L':
                array = likelihood_input(array,[[2,2,0],[2,0,1],[2,1,0]])
            if df.iloc[row]['ar'] == 'D':
                array = likelihood_input(array,[[2,2,1],[2,0,0],[2,1,1]])
    db= pd.DataFrame(array,columns=['h/a','w/l/d','y/n'])
    return db

def get_team_comparison(data,q1,q2):
    # getting games with q1 in both home or away
    db = data[data['team'] == q1]
    db = db.reset_index()
    db.pop('index')
    # filering down more to get only the games against q2
    db = db.sort_values(by=['m','d'])
    db = db[(db['home'] == q2) | (db['away'] == q2)]
    db = db.reset_index()
    db.pop('index')
    if db.empty == True:
        db = pd.DataFrame([(0,0,0,0,q1,'D',q2,'D','empty',q1)],columns=['d','m','hs','as','home','hr','away','ar','summary','team'])
    return db

def get_NB_data(data,query):
    db = likelihood_table(data,query)
    dy = db.pop('y/n').to_list()
    dx = [tuple(x) for x in db.values]
    return dx, dy

In [47]:
schedule = get_schedule(results_diff)
schedule

Unnamed: 0,game,home,away
0,I94,Valour FC,York9 FC
1,I95,FC Edmonton,Pacific FC
2,I96,York9 FC,HFX Wanderers FC
3,I97,Cavalry FC,FC Edmonton
4,I98,Pacific FC,Valour FC


In [48]:
# home side
q1 = schedule.iloc[0]['home']
# away side
q2 = schedule.iloc[0]['away']
print(q1,q2)

Valour FC York9 FC


In [49]:
compare = get_team_comparison(results_brief,q1,q2)

In [50]:
compare

Unnamed: 0,d,m,hs,as,home,hr,away,ar,summary,team
0,1,7,1,3,Valour FC,L,York9 FC,W,L A 1 - 3 Y9,Valour FC
1,10,8,0,2,York9 FC,L,Valour FC,W,W A 0 - 2 Y9,Valour FC
2,15,9,2,4,York9 FC,L,Valour FC,W,W A 2 - 4 Y9,Valour FC
3,16,10,0,4,Valour FC,L,York9 FC,W,L A 0 - 4 Y9,Valour FC


In [51]:
t1_x, t1_y = get_NB_data(compare,q1)
t2_x, t2_y = get_NB_data(compare,q2)

### Game Prediction

use Gaussian Naive Bayes model to predict the outcome of a home match.

In [52]:
#Import Gaussian Naive Bayes model
from sklearn.naive_bayes import GaussianNB,BernoulliNB
import statistics

In [53]:
def get_gnb_prediction(query,x,y,result):
    
    gnb = GaussianNB()
    bnb = BernoulliNB()
    # Train the model using the training sets
    
    gnb.fit(x,y)
    bnb.fit(x,y)
    
    # use below instead of predicted = model.predict([result]) because we want the probability
    gnb_pred = np.round(gnb.predict_proba([result])[:, 1],decimals=2)
    bnb_pred = np.round(bnb.predict_proba([result])[:, 1],decimals=2)
    
    pred = round((gnb_pred[0] + bnb_pred[0]) / 2,2)
    #print(gnb_pred[0], bnb_pred[0], pred)
    
    return pred

def get_match_prediction_result(query,x,y,array):
    prediction = get_gnb_prediction(query,x,y,array)
    return prediction

def get_match_prediction(q1,q2,x1,y1,x2,y2):
    if len(x1) == 0:
        x = round(1/3,2)
        home_win, away_win,draw = x,x,x
        return home_win,away_win,draw
    home_win = get_match_prediction_result(q1,x1,y1,[1,2])
    draw = get_match_prediction_result(q1,x1,y1,[1,1])
    away_win = get_match_prediction_result(q2,x2,y2,[2,2])
    return home_win, draw, away_win

In [54]:
home_win, draw, away_win = get_match_prediction(q1,q2,t1_x,t1_y,t2_x,t2_y)

In [55]:
print(q1,'\nwin probability: ', round(home_win,2))

Valour FC 
win probability:  0.31


In [56]:
print(q2,'\nwin probability: ', round(away_win,2))

York9 FC 
win probability:  0.31


In [57]:
print('Draw probability: ', round(draw,2))

Draw probability:  0.26


In [58]:
round(home_win + draw + away_win,1)

0.9

In [59]:
team_form_results = get_form_results(results,team_ref)
team_form_results

Unnamed: 0,index,0,1,2,3,4,5,6,7,8,...,18,19,20,21,22,23,24,25,26,27
0,Cavalry FC,W A 2 - 1 Y9,W A 1 - 0 VFC,W A 1 - 2 FFC,W A 1 - 0 FCE,W A 2 - 0 HFX,W A 0 - 3 FCE,W A 1 - 2 HFX,L A 0 - 1 FFC,W A 0 - 2 Y9,...,D A 1 - 1 PFC,W A 0 - 8 VFC,W A 0 - 1 FCE,W A 4 - 1 PFC,D A 1 - 1 Y9,W A 4 - 1 VFC,W A 2 - 0 HFX,W A 2 - 1 FFC,L A 1 - 0 FFC,W A 3 - 1 FCE
1,FC Edmonton,W A 1 - 2 VFC,D A 0 - 0 PFC,L A 1 - 0 CFC,L A 2 - 0 FFC,L A 0 - 1 VFC,L A 0 - 3 CFC,D A 0 - 0 Y9,W A 1 - 3 PFC,W A 1 - 0 FFC,...,D A 2 - 2 Y9,D A 0 - 0 HFX,L A 0 - 1 CFC,L A 1 - 0 PFC,L A 1 - 3 Y9,D A 1 - 1 HFX,L A 0 - 1 FFC,L A 3 - 1 VFC,W A 3 - 1 PFC,L A 3 - 1 CFC
2,Forge FC,D A 1 - 1 Y9,L A 2 - 1 HFX,W A 3 - 0 PFC,L A 1 - 2 CFC,W A 0 - 2 VFC,W A 0 - 2 Y9,W A 2 - 0 FCE,W A 2 - 1 VFC,W A 0 - 1 CFC,...,W A 2 - 1 Y9,W A 1 - 3 VFC,D A 2 - 2 HFX,D A 1 - 1 HFX,W A 3 - 0 PFC,W A 0 - 1 FCE,W A 1 - 0 Y9,L A 2 - 1 CFC,L A 4 - 0 Y9,W A 1 - 0 CFC
3,HFX Wanderers FC,L A 1 - 0 PFC,W A 2 - 1 FFC,L A 1 - 0 VFC,L A 2 - 0 CFC,D A 1 - 1 Y9,W A 2 - 1 PFC,L A 1 - 2 CFC,D A 0 - 0 Y9,W A 2 - 0 VFC,...,L A 2 - 0 VFC,D A 0 - 0 FCE,D A 2 - 2 FFC,D A 1 - 1 FFC,D A 0 - 0 VFC,D A 1 - 1 FCE,D A 1 - 1 PFC,L A 2 - 0 CFC,D A 1 - 1 PFC,W A 0 - 2 Y9
4,Pacific FC,W A 1 - 0 HFX,L A 1 - 2 VFC,L A 3 - 0 FFC,D A 0 - 0 FCE,D A 2 - 2 Y9,L A 2 - 1 HFX,L A 1 - 0 Y9,W A 1 - 2 VFC,L A 1 - 3 FCE,...,D A 1 - 1 CFC,D A 1 - 1 FFC,L A 0 - 2 Y9,W A 1 - 0 FCE,L A 4 - 1 CFC,L A 3 - 0 FFC,D A 1 - 1 HFX,D A 1 - 1 HFX,L A 3 - 1 FCE,W A 2 - 0 VFC
5,Valour FC,W A 1 - 2 PFC,L A 1 - 2 FCE,L A 1 - 0 CFC,W A 1 - 0 HFX,L A 0 - 2 FFC,W A 0 - 1 FCE,L A 2 - 1 FFC,L A 1 - 2 PFC,L A 2 - 0 HFX,...,L A 2 - 1 PFC,W A 2 - 0 HFX,L A 0 - 8 CFC,L A 1 - 3 FFC,W A 2 - 4 Y9,D A 0 - 0 HFX,L A 4 - 1 CFC,W A 3 - 1 FCE,L A 0 - 4 Y9,L A 2 - 0 PFC
6,York9 FC,D A 1 - 1 FFC,L A 2 - 1 CFC,D A 2 - 2 PFC,L A 0 - 2 FFC,D A 1 - 1 HFX,W A 1 - 0 PFC,D A 0 - 0 FCE,D A 0 - 0 HFX,L A 0 - 2 CFC,...,D A 2 - 2 FCE,L A 2 - 1 FFC,W A 0 - 2 PFC,L A 2 - 4 VFC,W A 1 - 3 FCE,D A 1 - 1 CFC,L A 1 - 0 FFC,W A 4 - 0 FFC,W A 0 - 4 VFC,L A 0 - 2 HFX


In [60]:
standings.to_csv(f'datasets/{year}/cpl-{year}-standings.csv',index=False)
#championship.to_csv(f'datasets/{year}/cpl-{year}-championship.csv',index=False)
power_rankings.to_csv(f'datasets/{year}/cpl-{year}-power_rankings.csv',index=False)
results_brief.to_csv(f'datasets/{year}/cpl-{year}-results_brief.csv',index=False)
schedule.to_csv(f'datasets/{year}/cpl-{year}-schedule.csv',index=False)
team_stats.to_csv(f'datasets/{year}/cpl-{year}-team_stats.csv',index=False)
rated_forwards.to_csv(f'datasets/{year}/cpl-{year}-forwards.csv',index=False)
rated_midfielders.to_csv(f'datasets/{year}/cpl-{year}-midfielders.csv',index=False)
rated_defenders.to_csv(f'datasets/{year}/cpl-{year}-defenders.csv',index=False)
rated_keepers.to_csv(f'datasets/{year}/cpl-{year}-keepers.csv',index=False)
rated_offenders.to_csv(f'datasets/{year}/cpl-{year}-discipline.csv',index=False)
rated_goalscorers.to_csv(f'datasets/{year}/cpl-{year}-rated_goalscorers.csv',index=False)
rated_assists.to_csv(f'datasets/{year}/cpl-{year}-rated_assists.csv',index=False)
team_form_results.to_csv(f'datasets/{year}/cpl-{year}-team_form.csv',index=False)
#h1_roster.to_csv(f'datasets/{year}/teams/cpl-{year}-{q1}_final_scores.csv')
#team_form_results.to_csv(f'datasets/{year}/cpl-{year}-team_form.csv',index=False)

In [59]:
h1_roster = get_compare_roster(results,q1,team_stats,team_ref,rated_forwards,rated_midfielders,rated_defenders,rated_keepers)
h1_roster

Unnamed: 0,name,number,position,overall
0,Tyson Farago,1,g,0.55
1,Skylar Thomas,3,d,0.63
2,Martín Arguiñarena,6,d,0.61
3,Jordan Murrell,4,d,0.6
4,Raphaël Garcia,2,d,0.45
5,Marco Bustos,22,m,0.61
6,Louis Béland-Goyette,5,m,0.54
7,Diego Gutiérrez,8,m,0.41
8,Michael Petrasso,9,m,0.4
9,Michele Paolucci,17,f,0.3


In [60]:
roster = get_roster_overall('HFX Wanderers FC',stats,team_ref,rated_forwards,rated_midfielders,rated_defenders,rated_keepers,player_info)
roster

Unnamed: 0,image,first,last,number,position,overall,flag,link
0,Akeem-Garcia.jpg,Akeem,Garcia,11,m,0.37,Trinidad and Tobago.png,https://en.wikipedia.org/wiki/Akeem_Garcia
1,Alex-DeCarolis.jpg,Alex,De Carolis,24,d,0.4,Canada.png,https://en.wikipedia.org/wiki/Alex_De_Carolis
2,Andre-Rampersad.jpg,Andre,Rampersad,18,m,0.47,Trinidad and Tobago.png,https://en.wikipedia.org/wiki/Andre_Rampersad
3,André-Bona.jpg,André,Bona,3,d,0.83,France.png,https://en.wikipedia.org/wiki/André_Bona
4,Chakib-Hocine.jpg,Chakib,Hocine,4,d,0.08,Algeria.png,https://en.wikipedia.org/wiki/Chakib_Hocine
5,Chrisnovic-N_sa.jpg,Chrisnovic,N'sa,6,d,0.41,Canada.png,https://en.wikipedia.org/wiki/Chrisnovic_N'sa
6,Christian-Oxner.jpg,Christian,Oxner,50,g,0.54,Canada.png,https://en.wikipedia.org/wiki/Christian_Oxner
7,empty.jpg,Duran,Lee,16,d,0.28,Canada.png,https://en.wikipedia.org/wiki/Duran_Lee
8,Elliot-Simmons.jpg,Elliot,Simmons,8,m,0.27,Canada.png,https://en.wikipedia.org/wiki/Elliot_Simmons
9,Elton-John.jpg,Elton,John,5,m,0.35,Trinidad and Tobago.png,https://en.wikipedia.org/wiki/Elton_John_(foot...


In [62]:
game_info = schedule[schedule['home'] == q1]
game_info

Unnamed: 0,game,home,away
0,I94,Valour FC,York9 FC


In [66]:
q1 = game_info.iloc[0]['home']
q2 = game_info.iloc[0]['away']

'York9 FC'

In [67]:
game_h = get_home_away_comparison(stats,game,q1)
game_a = get_home_away_comparison(stats,game,q2)

In [68]:
h1_roster = get_compare_roster(results,q1,team_stats,team_ref,rated_forwards,rated_midfielders,rated_defenders,rated_keepers)
h1_roster

Unnamed: 0,name,number,position,overall
0,Tyson Farago,1,g,0.55
1,Skylar Thomas,3,d,0.63
2,Martín Arguiñarena,6,d,0.61
3,Jordan Murrell,4,d,0.6
4,Raphaël Garcia,2,d,0.45
5,Marco Bustos,22,m,0.61
6,Louis Béland-Goyette,5,m,0.54
7,Diego Gutiérrez,8,m,0.41
8,Michael Petrasso,9,m,0.4
9,Michele Paolucci,17,f,0.3


In [84]:
h2_roster = get_compare_roster(results,q2,team_stats,team_ref,rated_forwards,rated_midfielders,rated_defenders,rated_keepers)
h2_roster

Unnamed: 0,name,number,position,overall
0,Nathan Ingham,1,g,0.93
1,Luca Gasparotto,13,d,0.83
2,Diyaeddine Abzi,20,d,0.72
3,Morey Doner,3,d,0.66
4,Daniel Gogarty,2,d,0.41
5,Ryan Telfer,18,m,0.69
6,Manuel Aparicio,10,m,0.64
7,Kyle Porter,19,m,0.55
8,Joseph Di Chiara,8,m,0.46
9,Rodrigo Gattas,22,f,0.75


In [70]:
def get_team_history(data,query):
    df = data[data['away'] == query].copy()
    df = df[['d','m','as','hs','away','ar','home','hr']]
    df = df.rename(columns={'as':'hs','hs':'as','away':'home','ar':'hr','home':'away','hr':'ar'})
    db = data[data['home'] == query].copy()
    db = db[['d','m','hs','as','home','hr','away','ar']]
    db = pd.concat([db,df])
    db = db.tail(5)
    db = db.sort_values(by=['m','d'],ascending=False)
    return db

def get_five_game_form(data,query):
    db = get_team_history(data,query)
    db = db.pop('hr')
    a = []
    for i in db:
        if i == 'W':
            j = [1,0,0]
            a.append(j)
        if i == 'L':
            j = [0,1,0]
            a.append(j)
        if i == 'D':
            j = [0,0,1]
            a.append(j)
    db = pd.DataFrame(a,columns=['w','l','d'])
    db = pd.DataFrame(db.sum())
    return db

In [71]:
team1_history = get_team_history(results,q1)
team1_history

Unnamed: 0,d,m,hs,as,home,hr,away,ar
97,19,10,0,2,Valour FC,L,Pacific FC,W
83,2,10,1,4,Valour FC,L,Cavalry FC,W
77,21,9,0,0,Valour FC,D,HFX Wanderers FC,D
75,15,9,4,2,Valour FC,W,York9 FC,L
61,24,8,1,2,Valour FC,L,Pacific FC,W


In [72]:
team1_history = get_five_game_form(results,q1)

In [73]:
team1_history

Unnamed: 0,0
w,1
l,3
d,1


In [74]:
for i in team1_history:
    if i == 'W':
        print(3)
    if i == 'L':
        print(0)
    if i == 'D':
        print(1)

In [75]:
team2_history = get_team_history(results,q2)
team2_history

Unnamed: 0,d,m,hs,as,home,hr,away,ar
93,16,10,4,0,York9 FC,W,Valour FC,L
88,6,10,0,1,York9 FC,L,Forge FC,W
78,21,9,3,1,York9 FC,W,FC Edmonton,L
71,11,9,2,0,York9 FC,W,Pacific FC,L
69,8,9,1,2,York9 FC,L,Forge FC,W
