In [1]:
import re
import numpy as np
import pandas as pd

In [2]:
def get_string(data):
    data = str(data*100)
    data = data[0:4]
    return data

def get_long_name(string,team_ref):
    for short in team_ref['short']:
        row = team_ref[team_ref['short'] == short]
        if string == short:
            string = row.iloc[0]['team']
    return string

def get_shortest_name(string,team_ref):
    for team in team_ref['team']:
        row = team_ref[team_ref['team'] == team]
        if string == team:
            string = str(row.iloc[0]['short'])
    return string

def get_schedule(data):
    db = data.copy()
    db = db[db['s'] <= 1]
    #db = db.tail(4)
    db = db[['game','home','away']]
    db = index_reset(db)
    db = db.sort_values(by=['game'])
    return db

def fix_db_na(data):
    db = data.copy()
    if db['team'].isnull().values.any():
        for row in range(db.shape[0]):
            if pd.isna(db.iloc[row]['team']) == True:
                print(True)
                db.iloc[row]['team'] = get_long_name(db.iloc[row]['team'],data)
    return db

def index_reset(data):
    data = data.reset_index()
    data.pop('index')
    return data

def get_team_results(data,query):
    db = data[data['home'] == query]
    da = data[data['away'] == query]
    db = pd.concat([db,da])
    db = index_reset(db)
    return db

def get_team_brief(data,query,df):
    db = get_team_results(data,query)
    cols = ['game','s','csh','csa','combined','venue','links']
    for col in cols:
        db.pop(col)
    db = db.sort_values(by=['m','d'])
    db = index_reset(db)
    db['summary'] = '0'
    for i in range(0,db.shape[0]):
        if db.iloc[i]['home'] == query:
            x = df[df['team'] == db.iloc[i]['away']]#.reset_index()
            opponent = x.iloc[0]['short']
            outcome = db.iloc[i]['hr'] + ' A'
        else:
            x = df[df['team'] == db.iloc[i]['home']]#.reset_index() NOT SURE WHY THIS BROKE SUDDENLY
            opponent = x.iloc[0]['short']
            outcome = db.iloc[i]['ar'] + ' A'
        score = str(db.iloc[i]['hs']) + ' - ' + str(db.iloc[i]['as'])
        db.loc[i,'summary'] = outcome + ' ' + score +  ' ' + opponent
    db['team'] = query
    return db

def get_results_brief(data,dc):
    db = pd.DataFrame()
    for team in dc['team']:
        df = get_team_brief(data,team,dc)
        db = pd.concat([db,df])
    db = index_reset(db)
    return db

def get_club_statistics(data,query):
    df = data
    a = []
    cols = df.columns
    for row in range(0,df.shape[0]):
        if df.iloc[row]['home'] == query:
            if df.iloc[row]['hr'] == 'W':
                vals = [1,3,3,1,0,0,df.iloc[row]['hs'],df.iloc[row]['as'],df.iloc[row]['hs'],df.iloc[row]['as'],0,0]
                a.append(vals)
            if df.iloc[row]['hr'] == 'L':
                vals = [1,0,3,0,1,0,df.iloc[row]['hs'],df.iloc[row]['as'],df.iloc[row]['hs'],df.iloc[row]['as'],0,0]
                a.append(vals)
            if df.iloc[row]['hr'] == 'D':
                vals = [1,1,3,0,0,1,df.iloc[row]['hs'],df.iloc[row]['as'],df.iloc[row]['hs'],df.iloc[row]['as'],0,0]
                a.append(vals)
        if df.iloc[row]['away'] == query:
            if df.iloc[row]['ar'] == 'W':
                vals = [1,3,3,1,0,0,df.iloc[row]['as'],df.iloc[row]['hs'],0,0,df.iloc[row]['as'],df.iloc[row]['hs']]
                a.append(vals)
            if df.iloc[row]['ar'] == 'L':
                vals = [1,0,3,0,1,0,df.iloc[row]['as'],df.iloc[row]['hs'],0,0,df.iloc[row]['as'],df.iloc[row]['hs']]
                a.append(vals)
            if df.iloc[row]['ar'] == 'D':
                vals = [1,1,3,0,0,1,df.iloc[row]['as'],df.iloc[row]['hs'],0,0,df.iloc[row]['as'],df.iloc[row]['hs']]
                a.append(vals)
    db= pd.DataFrame(a,columns=['gp','pts','tpp','w','l','d','gf','ga','gfh','gah','gfa','gaa'])
    db = pd.DataFrame(db.sum())
    db = db.T
    return db

def get_standings(data,season,ref):
    db = pd.DataFrame()
    if season == 1:
        data = data[data['s'] <= 1]
    if season == 2:
        data = data[data['s'] > 1]
    teams = ref['team']
    #teams = np.sort(teams,axis=-1)
    for team in teams:
        df = get_team_brief(data,team,ref)
        df = get_club_statistics(df,team)
        ppg = round(df['pts']/df['gp'],2)
        gd = df['gf'] - df['ga']
        df.insert(0,'team',team)
        df.insert(4,'ppg',ppg)
        df.insert(8,'gd',gd)
        db = pd.concat([db,df])
    db = db.sort_values(by=['pts','w','gf'],ascending=False)
    db = index_reset(db)
    db = db.reset_index()
    db = db.rename(columns={'index':'rank'})
    db['rank'] = db['rank'] + 1
    db = db.fillna(0)
    return db

def compare_standings(db,df,dc):
    a = []
    for team in dc['team']:
        rank1 = df[df['team'] == team]
        rank2 = db[db['team'] == team]
        if rank1.iloc[0]['rank'] == rank2.iloc[0]['rank']:
            change = 0
        else:
            change = rank1.iloc[0]['rank'] - rank2.iloc[0]['rank']
        a.append([team,change])
    db = pd.DataFrame(a)
    db = pd.DataFrame({'team': db.iloc[:][0], 'change': db.iloc[:][1]})
    db = db.sort_values(by=['change'],ascending=False)
    db = index_reset(db)
    return db

def clean_team_game(data,db,check): # Fix this section for teams that haven't played yet
    if check == 0:
        df = data.iloc[0]['team'] # Getting the name of the top team
    else:
        df = data.iloc[-1]['team'] # Getting the name of the bottom placed team
    if data.iloc[-1]['gp'] == 0 and check == 1:
        db = pd.DataFrame([(df,0,df,0)],columns=['home','hs','away','as']) # make an empty set if the game is empty
    else:
        df = db[(db['home'] == df) | (db['away'] == df)] # get appropirate game results for specified team
        db = index_reset(df)
        db = db.iloc[0][['home','hs','away','as']]
        db = pd.DataFrame(db)
        db = db.T
    return db

def get_longest_name(da,db,dc,team_ref):
    def get_long(data,dd):
        db = data.copy()
        for team in db['home']:
            row = dd[dd['short'] == team]
            db.at[0,'home'] = row.iloc[0]['team']
        for team in db['away']:
            row = dd[dd['short'] == team]
            db.at[0,'away'] = row.iloc[0]['team']
        return db
    da = get_long(da,team_ref)
    db = get_long(db,team_ref)
    dc = get_long(dc,team_ref)
    teams_in = pd.DataFrame([da.iloc[0]['home'],da.iloc[0]['away'],db.iloc[0]['home'],db.iloc[0]['away'],dc.iloc[0]['home'],dc.iloc[0]['away']],columns=['teams'])
    teams_in = teams_in.teams.unique()
    return teams_in
    
def get_short_name(data,dc):
    for team in data['home']:
        row = dc[dc['team'] == team]
        data.at[0,'home'] = row.iloc[0]['short']
    for team in data['away']:
        row = dc[dc['team'] == team]
        data.at[0,'away'] = row.iloc[0]['short']
    return data

def get_weeks_results(data,standings,team_ref):
    if data.iloc[0]['hr'] == 'E':
        db = pd.DataFrame([('NA',0,'NA',0)],columns=['home','hs','away','as'])
        big_win, top_team, low_team,other_team = db,db,db,db
        goals = 0
        return db,goals,big_win,top_team,low_team,other_team
    df = data
    month = df.iloc[-1]['m']
    week = df.iloc[-1]['d']
    db = df[df['m'] == month]
    db = db[db['d'] >= week - 6]
    db = db.sort_values(by=['game'],ascending=False)
    goals = db['hs'].sum() + db['as'].sum()
    max_home = db[db['hs'] == db['hs'].max()]
    max_away = db[db['as'] == db['as'].max()]
    if max_home.iloc[0]['hs'] > max_away.iloc[0]['as']:
        max_home_win = max_home
    else:
        max_home_win = max_away
    big_win = max_home_win[['home','hs','away','as']]
    big_win = index_reset(big_win)
    big_win = get_short_name(big_win,team_ref)
    big_win = pd.DataFrame(big_win.loc[0])
    big_win = big_win.T
    top_team = clean_team_game(standings,db,0)
    top_team = get_short_name(top_team,team_ref)
    low_team = clean_team_game(standings,db,1)
    low_team = get_short_name(low_team,team_ref)
    teams_in = get_longest_name(big_win,top_team,low_team,team_ref)
    other_team = db[(~db['home'].isin(teams_in)) | (~db['away'].isin(teams_in))]
    other_team = index_reset(other_team)
    other_team = pd.DataFrame(other_team.loc[0][['home','hs','away','as']])
    other_team = other_team.T
    other_team = get_short_name(other_team,team_ref)
    return db,goals,big_win,top_team,low_team,other_team

def get_team_stats(data,query):
    db = data[data['team'] == query]
    names = db['name'].unique()
    information = data.copy()
    db.pop('number')
    db = db.groupby(['name']).sum()
    db.insert(0,'last','empty')
    db.insert(0,'first','empty')
    db.insert(0,'position','empty')
    db.insert(0,'number',0)
    #db.insert(0,'team',team)
    i = 0
    for name in names:
        player = information[information['name'] == name].head(1)
        db.at[name,'first'] = player.iloc[0]['first']
        db.at[name,'last'] = player.iloc[0]['last']
        db.at[name,'number'] = int(player.iloc[0]['number'])
        db.at[name,'position'] = player.iloc[0]['position']
        db.at[name,'pass-acc'] = player.iloc[0]['pass-acc'].mean()
        db.at[name,'cross-acc'] = player.iloc[0]['cross-acc'].mean()
    db = db.reset_index()
    return db

def get_stats_all(data,dc):
    db = pd.DataFrame()
    for team in dc['team']:
        df = get_team_stats(data,team)
        df.insert(0,'team',team)
        db = pd.concat([db,df])
    db = index_reset(db)
    return db

# get associated information for players league wide and calculate an overall score for each position
def get_evaluation(condensed_player_info,full_player_info):
    names = condensed_player_info.name.unique() # grab the list of names at the specified position
    eval_ = condensed_player_info.describe().T # get the evalution scores
    checks = condensed_player_info.columns[4:] # slice away the first three columns (name,number,postion) not needed
    condensed_player_info['overall'] = 0.0 # create the final column overall
    condensed_player_info = condensed_player_info.set_index('name') # set the index to the player name to search for a specific player
    for name in names: # iterate through the names in the lisst
        player = full_player_info[full_player_info['name'] == name].head(1) # get the players details
        a = [] # create an empty array to store the scores
        for check in checks: # iterate through the columns of remaining data
            result = player.iloc[0][check] / eval_['max'][check] # calculate the score for the value found value/max
            a.append(result) # append the result into the list
            score = sum(a) / len(checks) #calculate the final score sum(list) / num of checks
            overall = str(score)
            overall = overall[0:4]
            condensed_player_info.at[name,'overall'] = overall # assign the value as the overall score
    condensed_player_info = condensed_player_info.reset_index() # reset the index, making the name column a column again
    condensed_player_info = condensed_player_info.sort_values(by=['overall'],ascending=False) # sort using overall, descending
    return condensed_player_info

def top_tracked(team_stats,tracked):
    if team_stats.minutes.sum() == 0:
        tracked_player_stat = pd.DataFrame([('NA',0,0,0,0)],columns=['team','name','number','minutes','goals'])
        return tracked_player_stat
    df = team_stats.copy()
    cols = ['team','name','position','number','minutes',tracked]
    tracked_player_stat = df[cols]
    #tracked_player_stat = get_evaluation(tracked_player_stat,df)
    tracked_player_stat = tracked_player_stat.sort_values(by=[tracked],ascending=False)
    tracked_player_stat = tracked_player_stat.reset_index()
    tracked_player_stat.pop('index')
    team = tracked_player_stat.pop('team')
    tracked_player_stat.insert(0,'team',team)
    tracked_player_stat = tracked_player_stat[tracked_player_stat[tracked] >= 1]
    rank = tracked_player_stat.index + 1
    tracked_player_stat.insert(0,'rank',rank)
    return tracked_player_stat

def top_position(team_stats,position): # get the forwards in the league
    if team_stats.minutes.sum() == 0:
        if position == 'f':
            condensed_player_info = pd.DataFrame([('NA',0,0,0,0,0,0,0,0,0,0,0,0,0,0)],columns=['team','name','number','position','minutes','goals','chances','assists','shots','s-target','passes','crosses','duels','tackles','overall'])
        if position == 'm':
            condensed_player_info = pd.DataFrame([('NA',0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)],columns=['team','name','number','position','minutes','goals','assists','touches','passes','pass-acc','crosses','cross-acc','chances','duels','tackles','overall'])
        if position == 'd':
            condensed_player_info = pd.DataFrame([('NA',0,0,0,0,0,0,0,0,0,0,0)],columns=['team','name','number','position','minutes','tackles','t-won','clearances','interceptions','duels','d-won','overall'])
        if position == 'g':
            condensed_player_info = pd.DataFrame([('NA',0,0,0,0,0,0,0,0,0)],columns=['team','name','number','position','minutes','cs','saves','shots faced','claimed crosses','overall'])
        condensed_player_info = pd.DataFrame([('NA',0,0,0,0,0,0,0,0,0,0,0,0,0,0)],columns=['team','name','number','position','minutes','goals','chances','assists','shots','shots on target','passes','crosses','duels','tackles','overall'])
        return condensed_player_info
    player_information = team_stats.copy() # load player information
    if position == 'f':
        cols = ['team','name','number','position','minutes','goals','chances','assists','shots','s-target','passes','crosses','duels','tackles']
    if position == 'm':
        cols = ['team','name','number','position','minutes','goals','assists','touches','passes','pass-acc','crosses','cross-acc','chances','duels','tackles']
    if position == 'd':
        cols = ['team','name','number','position','minutes','tackles','t-won','clearances','interceptions','duels','d-won']
    if position == 'g':
        cols = ['team','name','number','position','minutes','cs','saves','shots faced','claimed crosses']
    full_player_info = player_information[player_information['position'] == position] # get the forwards where position = f
    condensed_player_info = full_player_info[cols] # select specific columns associated with the evaluation
    condensed_player_info = get_evaluation(condensed_player_info,full_player_info) # condensed Dataframe and full Dataframe being passes
    condensed_player_info = index_reset(condensed_player_info)
    names = condensed_player_info.name.unique() # get the names of the players who fit the criteria
    condensed_player_info = condensed_player_info.set_index('name') # set the index to the name column to make the search possible
    
    for name in names:
        player = full_player_info[full_player_info['name'] == name].head(1) # forwards main purpose is to score goals
        if player.iloc[0]['assists'] > 2.0: # reward getting more than 3 assists
            new_overall = str(condensed_player_info.at[name,'overall'] + 0.1)
            new_overall = new_overall[0:4]
            condensed_player_info.at[name,'overall'] = new_overall
        if position == 'm':
            if player.iloc[0]['goals'] >= 5.0: # reward scoring greater than 5 goals
                new_overall = str(condensed_player_info.at[name,'overall'] + 0.1)
                new_overall = new_overall[0:4]
                condensed_player_info.at[name,'overall'] = new_overall
            if player.iloc[0]['pass-acc'] >= 0.85: # reward scoring greater than 5 goals
                new_overall = str(condensed_player_info.at[name,'overall'] + 0.1)
                new_overall = new_overall[0:4]
                condensed_player_info.at[name,'overall'] = new_overall
        if position == 'f':
            if (player.iloc[0]['goals'] <= 2.0 and player.iloc[0]['minutes'] >= 1000.0): # if player scores less than 2 & has minutes greater than 1000
                new_overall = str(condensed_player_info.at[name,'overall'] - 0.1)
                new_overall = new_overall[0:4]
                condensed_player_info.at[name,'overall'] = new_overall
            if player.iloc[0]['goals'] >= 8.0: # reward scoring greater than 8 goals
                new_overall = str(condensed_player_info.at[name,'overall'] + 0.1)
                new_overall = new_overall[0:4]
                condensed_player_info.at[name,'overall'] = new_overall
        if position == 'd':
            if (player.iloc[0]['interceptions'] > 200.0 and player.iloc[0]['minutes'] >= 1000.0): # if player scores less than 2 & has minutes greater than 1000
                new_overall = str(condensed_player_info.at[name,'overall'] + 0.1)
                new_overall = new_overall[0:4]
                condensed_player_info.at[name,'overall'] = new_overall
            if player.iloc[0]['d-won'] > 110.0: # reward scoring greater than 8 goals
                new_overall = str(condensed_player_info.at[name,'overall'] + 0.1)
                new_overall = new_overall[0:4]
                condensed_player_info.at[name,'overall'] = new_overall

    condensed_player_info = condensed_player_info.sort_values(by=['overall'],ascending=False)
    condensed_player_info = condensed_player_info.reset_index()
    team = condensed_player_info.pop('team')
    condensed_player_info.insert(0,'team',team)
    return condensed_player_info

def top_offenders(data):  # get the offences handed out in the league
    if data.minutes.sum() == 0:
        db = pd.DataFrame([('NA',0,0,0,0,0,0)],columns=['team','name','number','minutes','yellow','red','f-conceded'])
        return db
    player_information = data.copy()
    cols = ['team','name','position','number','minutes','yellow','red','f-conceded']
    df = player_information
    db = df[cols]
    db = get_evaluation(db,df)
    db = db.sort_values(by=['red','yellow'],ascending=False)
    db = db.reset_index()
    db.pop('index')
    team = db.pop('team')
    db.insert(0,'team',team)
    return db

def get_team_form(data,query):
    db = data[data['team'] == query]
    db = pd.DataFrame(db['summary'])
    return db

def get_form_results(data,dc):
    db = pd.DataFrame()
    form = get_results_brief(data[data['s'] <= 1],dc)
    teams = data.home.unique()
    teams = np.sort(teams,axis=-1)
    for team in teams:
        df = get_team_form(form,team)
        #print(team,'\n',df)
        db[team] = pd.Series(df['summary'].values)
    db = db.T
    db = db.reset_index()
    db = db.fillna('E')
    return db

def get_roster(query,stats,team_ref): # use team stats to get the player information
    roster = get_stats_all(stats,team_ref)
    roster = roster[roster['team'] == query]
    roster = roster[['name','number','position']]
    roster.insert(3,'overall',0)
    roster = index_reset(roster)
    return roster

def get_home_away_comparison(stats,game,team):
    db = stats[stats['game'] == game].copy()
    db = db[db['team'] == team]
    db = db.sort_values(by=['minutes'],ascending=False)
    db = db#[0:11]
    db = db['name']
    return db

def get_compare_roster(results,query,stats,team_ref,rated_forwards,rated_midfielders,rated_defenders,rated_keepers,player_info):
    roster = get_roster_overall(query,stats,team_ref,rated_forwards,rated_midfielders,rated_defenders,rated_keepers,player_info)
    def get_player(data,string):
        dz = data[data['position'] == string]
        dz = dz[['first','last','number','position','overall']]
        dz.insert(0,'name',dz['first'] + ' ' + dz['last'])
        dz.pop('first')
        dz.pop('last')
        return dz
    dk = get_player(roster,'g')
    dk = dk.sort_values(by=['overall'],ascending=False)
    dd = get_player(roster,'d')
    dd = dd.sort_values(by=['overall'],ascending=False)
    dm = get_player(roster,'m')
    dm = dm.sort_values(by=['overall'],ascending=False)
    df = get_player(roster,'f')
    df = df.sort_values(by=['overall'],ascending=False)
    db = pd.concat([dk[0:1],dd[0:4],dm[0:4],df[0:2]])
    db = index_reset(db)
    return db

def get_roster_overall(query,stats,team_ref,rated_forwards,rated_midfielders,rated_defenders,rated_keepers,player_info): # use team stats to get the player information
    def get_score(data,name):
        db = data[data['name'] == name]
        if db.empty:
            previous = player_info[player_info['name'] == name]
            if previous.empty:
                db = 0
            else:
                previous = previous['overall'].values
                db = previous[0]
        else:
            db = db['overall'].values
            db = db[0]
        return db
    def get_image(data,name):
        db = data[data['name'] == name]
        if db['image'].empty:
            db = 'empty.jpg'
        else:
            db = db['image'].values
            db = db[0]
        return db
    def get_link(data,name):
        db = data[data['name'] == name]
        if db['link'].empty:
            db = 'https://en.wikipedia.org/wiki/Canadian_Premier_League'
        else:
            db = db['link'].values
            db = db[0]
        return db
    def get_flag(data,name):
        db = data[data['name'] == name]
        if db['flag'].empty:
            db = 'empty.png'
        else:
            db = db['flag'].values
            db = db[0]
        return db
    roster = get_stats_all(stats,team_ref)
    roster = roster[roster['team'] == query].copy()
    roster = roster[['name','first','last','number','position']] # scale the dataframe down to what we need
    #roster.insert(3,'overall',a)
    a = []
    b = []
    c = []
    d = []
    for i in range(0,roster.shape[0]):
        if roster.iloc[i]['position'] == 'f':
            score = str(get_score(rated_forwards,roster.iloc[i]['name']))
            a.append(score[0:4])
            b.append(get_image(player_info,roster.iloc[i]['name']))
            c.append(get_flag(player_info,roster.iloc[i]['name']))
            d.append(get_link(player_info,roster.iloc[i]['name']))
        if roster.iloc[i]['position'] == 'm':
            score = str(get_score(rated_midfielders,roster.iloc[i]['name']))
            a.append(score[0:4])
            b.append(get_image(player_info,roster.iloc[i]['name']))
            c.append(get_flag(player_info,roster.iloc[i]['name']))
            d.append(get_link(player_info,roster.iloc[i]['name']))
        if roster.iloc[i]['position'] == 'd':
            score = str(get_score(rated_defenders,roster.iloc[i]['name']))
            a.append(score[0:4])
            b.append(get_image(player_info,roster.iloc[i]['name']))
            c.append(get_flag(player_info,roster.iloc[i]['name']))
            d.append(get_link(player_info,roster.iloc[i]['name']))
        if roster.iloc[i]['position'] == 'g':
            score = str(get_score(rated_keepers,roster.iloc[i]['name']))
            a.append(score[0:4])
            b.append(get_image(player_info,roster.iloc[i]['name']))
            c.append(get_flag(player_info,roster.iloc[i]['name']))
            d.append(get_link(player_info,roster.iloc[i]['name']))
    roster['overall'] = a
    roster['flag'] = c
    roster['link'] = d
    roster.insert(0,'image',b)
    #roster['image'] = b
    roster = index_reset(roster)
    roster.pop('name')
    roster = roster.sort_values(by=['overall'],ascending=False)
    return roster

def get_power_rankings(standings,standings_old,team_ref,results,previous_rankings):
    a = []
    for team in team_ref['team']:
        old_rank = previous_rankings[previous_rankings['team'] == team]
        old_rank = old_rank['rank'].values
        old_rank = old_rank[0]
        form = get_five_game_form(results,team)
        form = str(round(form.at['w',0],1))+'-'+str(form.at['l',0])+'-'+str(form.at['d',0])
        crest = team_ref[team_ref['team'] == team]
        colour = crest['colour'].values
        colour = colour[0]
        crest = crest['crest'].values
        crest = crest[0]
        
        rank1 = standings_old[standings_old['team'] == team]
        rank2 = standings[standings['team'] == team]
        
        if rank1.iloc[0]['rank'] == 1:
            bonus = 4
        elif rank1.iloc[0]['rank'] == 2:
            bonus = 3
        elif rank1.iloc[0]['rank'] == 3:
            bonus = 2
        else:
            bonus =0
        
        if standings.iloc[0]['gp'] == 0:
            bonus = 0
            
        if rank1.iloc[0]['rank'] == rank2.iloc[0]['rank']:
            change = 0
        else:
            change = (rank1.iloc[0]['rank'] - rank2.iloc[0]['rank']) * - 1
            
        if rank1.iloc[0]['gd'] == rank2.iloc[0]['gd']:
            gd_bonus = 0
        else:
            gd_bonus = (rank1.iloc[0]['gd'] - rank2.iloc[0]['gd']) * - 1
        
        if rank1.iloc[0]['ga'] == rank2.iloc[0]['ga']:
            ga_nerf = 0
        else:
            ga_nerf = (rank1.iloc[0]['ga'] - rank2.iloc[0]['ga']) * - 1
            
        if rank1.iloc[0]['w'] == rank2.iloc[0]['w']:
            w_bonus = 0
        else:
            w_bonus = (rank1.iloc[0]['w'] - rank2.iloc[0]['w']) * - 1
        
        goal_bonus = gd_bonus - ga_nerf
        change = change + bonus + goal_bonus + w_bonus
        
        a.append([team,form,old_rank,change,goal_bonus,w_bonus,crest,colour])
    power_rankings = pd.DataFrame(a,columns = ['team','form','old_rank','change','goal_bonus','w_bonus','crest','colour'])
    power_rankings = power_rankings.sort_values(by=['change'],ascending=False)
    power_rankings = index_reset(power_rankings)
    rank = power_rankings.index + 1
    power_rankings.insert(0,'rank',rank)
    power_rankings['previous'] = (power_rankings['rank'] - power_rankings['old_rank'])*-1
    return power_rankings

def get_best_eleven(team_stats,team_ref,rated_forwards,rated_midfielders,rated_defenders,rated_keepers,player_info):
    def get_image(data,name):
        db = data[data['name'] == name]
        if db['image'].empty:
            db = 'empty.jpg'
        else:
            db = db['image'].values
            db = db[0]
        return db
    def get_link(data,name):
        db = data[data['name'] == name]
        if db['link'].empty:
            db = 'https://en.wikipedia.org/wiki/Canadian_Premier_League'
        else:
            db = db['link'].values
            db = db[0]
        return db
    def get_flag(data,name):
        db = data[data['name'] == name]
        if db['flag'].empty:
            db = 'empty.png'
        else:
            db = db['flag'].values
            db = db[0]
        return db
    
    check = team_stats.describe()
    if check.loc['max']['minutes'] == 0:
        best_eleven = pd.read_csv('datasets/2019/cpl-2019-best_eleven.csv')
        #best_eleven = pd.DataFrame([['empty.jpg','empty.png',0,'NA',0,'NA','NA','https://canpl.ca/']],columns=['image','flag','number','position','overall','first','last','link'])
        #best_eleven = pd.concat([best_eleven]*11)
        return best_eleven
    else:
        roster = team_stats.copy()
        roster = roster[['name','first','last']]

        top_keeper = rated_keepers.head(1)
        top_keeper = top_keeper[['name','number','position','overall']]
        top_defenders = rated_defenders.iloc[0:3][['name','number','position','overall']]
        top_midfielders = rated_midfielders.iloc[0:5][['name','number','position','overall']]
        top_forwards = rated_forwards.iloc[0:2][['name','number','position','overall']]
        best_eleven = pd.DataFrame(columns=['name','number','position','overall'])
        best_eleven = pd.concat([best_eleven,top_keeper,top_defenders,top_midfielders,top_forwards])
        a,b,c,d,e = [],[],[],[],[]


        names = best_eleven['name'].values

        for i in range(0,best_eleven.shape[0]):
            player = roster[roster['name'] == best_eleven.iloc[i]['name']]
            player= index_reset(player)
            first = player.iloc[0]['first']
            last = player.iloc[0]['last']
            a.append(first)
            b.append(last)
            c.append(get_image(player_info,best_eleven.iloc[i]['name']))
            d.append(get_flag(player_info,best_eleven.iloc[i]['name']))
            e.append(get_link(player_info,best_eleven.iloc[i]['name']))

        best_eleven.insert(0,'image',c)
        best_eleven.insert(1,'first',a)
        best_eleven.insert(2,'last',b)
        best_eleven.insert(3,'flag',d)
        best_eleven['link'] = e
        best_eleven.pop('name')
        best_eleven = index_reset(best_eleven)
        return best_eleven
    
def get_match_tables(data,query):
    db = data[data['home'] == query]
    db = pd.concat([db,data[data['away'] == query]])
    db = db.sort_values(by=['m','d'])
    return db

def likelihood_input(array,a_list):
    b = a_list[0]
    c = a_list[1]
    d = a_list[2]
    array.append(b)
    array.append(c)
    array.append(d)
    return array

def likelihood_table(data,query):
    df = get_match_tables(data,query)
    array = []
    cols = data.columns
    for row in range(0,df.shape[0]):
        if df.iloc[row]['home'] == query:
            if df.iloc[row]['hr'] == 'W':
                array = likelihood_input(array,[[1,2,1],[1,0,0],[1,1,0]])
            if df.iloc[row]['hr'] == 'L':
                array = likelihood_input(array,[[1,2,0],[1,0,1],[1,1,0]])
            if df.iloc[row]['hr'] == 'D':
                array = likelihood_input(array,[[1,2,0],[1,0,0],[1,1,1]])
        if df.iloc[row]['away'] == query:
            if df.iloc[row]['ar'] == 'W':
                array = likelihood_input(array,[[2,2,1],[2,0,0],[2,1,0]])
            if df.iloc[row]['ar'] == 'L':
                array = likelihood_input(array,[[2,2,0],[2,0,1],[2,1,0]])
            if df.iloc[row]['ar'] == 'D':
                array = likelihood_input(array,[[2,2,1],[2,0,0],[2,1,1]])
    db= pd.DataFrame(array,columns=['h/a','w/l/d','y/n'])
    return db

def get_team_comparison(data,q1,q2):
    # getting games with q1 in both home or away
    db = data[data['team'] == q1]
    db = db.reset_index()
    db.pop('index')
    # filering down more to get only the games against q2
    db = db.sort_values(by=['m','d'])
    db = db[(db['home'] == q2) | (db['away'] == q2)]
    db = db.reset_index()
    db.pop('index')
    if db.empty == True:
        db = pd.DataFrame([(0,0,0,0,q1,'D',q2,'D','empty',q1)],columns=['d','m','hs','as','home','hr','away','ar','summary','team'])
    return db

def get_NB_data(data,query):
    db = likelihood_table(data,query)
    dy = db.pop('y/n').to_list()
    dx = [tuple(x) for x in db.values]
    return dx, dy

def get_team_history(data,query):
    df = data[data['away'] == query].copy()
    df = df[['d','m','as','hs','away','ar','home','hr']]
    df = df.rename(columns={'as':'hs','hs':'as','away':'home','ar':'hr','home':'away','hr':'ar'})
    db = data[data['home'] == query].copy()
    db = db[['d','m','hs','as','home','hr','away','ar']]
    db = pd.concat([db,df])
    db = db.tail(5)
    db = db.sort_values(by=['m','d'],ascending=False)
    return db

def get_five_game_form(data,query):
    db = get_team_history(data,query)
    db = db.pop('hr')
    a = []
    for i in db:
        if i == 'W':
            j = [1,0,0]
            a.append(j)
        if i == 'L':
            j = [0,1,0]
            a.append(j)
        if i == 'D':
            j = [0,0,1]
            a.append(j)
    db = pd.DataFrame(a,columns=['w','l','d'])
    db = pd.DataFrame(db.sum())
    return db

In [3]:
team_ref = pd.read_csv('datasets/teams.csv')

In [4]:
year = input('enter the year: ')

enter the year: 2020


In [5]:
results = pd.read_csv(f'datasets/{year}/cpl-{year}-results.csv')
stats = pd.read_csv(f'datasets/{year}/cpl-{year}-stats.csv')
player_info = pd.read_csv(f'datasets/{year}/player-{year}-info.csv')

In [6]:
if year == '2019':
    team_ref = team_ref[1:]
    results_old = results[:-7].copy()
else:
    results_old = results[results['hr'] != 'E'].copy()

In [7]:
results.tail(5)

Unnamed: 0,game,s,d,m,hs,as,home,hr,away,ar,csh,csa,combined,venue,links
3,II4,1,25,5,0,0,Valour FC,E,Atletico Ottawa,E,0,0,5-25-2019 Valour FC E 0-0 D Atletico Ottawa,IG Field,https://canpl.ca/matchcentre/4ilfbdmlp4zuj7k3c...
4,II5,1,4,6,0,0,FC Edmonton,E,Valour FC,E,0,0,6-4-2019 FC Edmonton E 0-0 D Valour FC,Clarke Stadium,https://canpl.ca/matchcentre/4ilfbdmlp4zuj7k3c...
5,II6,1,4,6,0,0,Atletico Ottawa,E,York9 FC,E,0,0,6-4-2019 Atletico Ottawa E 0-0 D York9 FC,TD Place Stadium,https://canpl.ca/matchcentre/4ilfbdmlp4zuj7k3c...
6,II7,1,4,6,0,0,Cavalry FC,E,Pacific FC,E,0,0,6-4-2019 Cavalry FC E 0-0 D Pacific FC,ATCO Field,https://canpl.ca/matchcentre/4ilfbdmlp4zuj7k3c...
7,II8,1,4,6,0,0,HFX Wanderers FC,E,Forge FC,E,0,0,6-4-2019 HFX Wanderers FC E 0-0 D Forge FC,Wanderers Grounds,https://canpl.ca/matchcentre/4ilfbdmlp4zuj7k3c...


In [8]:
results_old.tail(5)

Unnamed: 0,game,s,d,m,hs,as,home,hr,away,ar,csh,csa,combined,venue,links


In [9]:
results_diff = pd.concat([results, results_old]).drop_duplicates(keep=False)

In [10]:
results_diff.head(2)

Unnamed: 0,game,s,d,m,hs,as,home,hr,away,ar,csh,csa,combined,venue,links
0,II1,1,25,5,0,0,Forge FC,E,Cavalry FC,E,0,0,5-25-2019 Forge FC E 0-0 D Cavalry FC,Tim Hortons Field,https://canpl.ca/matchcentre/4ilfbdmlp4zuj7k3c...
1,II2,1,25,5,0,0,Pacific FC,E,FC Edmonton,E,0,0,5-25-2019 Pacific FC E 0-0 D FC Edmonton,Westhills Stadium,https://canpl.ca/matchcentre/4ilfbdmlp4zuj7k3c...


In [11]:
standings = get_standings(results,1,team_ref)
if results_old.empty == True:
    standings_old = get_standings(results,1,team_ref)
else:
    standings_old = get_standings(results_old,1,team_ref)

In [12]:
standings

Unnamed: 0,rank,team,gp,pts,tpp,ppg,w,l,d,gd,gf,ga,gfh,gah,gfa,gaa
0,1,Atletico Ottawa,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2,Cavalry FC,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,3,FC Edmonton,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,4,Forge FC,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,5,HFX Wanderers FC,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,6,Pacific FC,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,7,Valour FC,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,8,York9 FC,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [13]:
standings_old

Unnamed: 0,rank,team,gp,pts,tpp,ppg,w,l,d,gd,gf,ga,gfh,gah,gfa,gaa
0,1,Atletico Ottawa,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2,Cavalry FC,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,3,FC Edmonton,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,4,Forge FC,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,5,HFX Wanderers FC,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,6,Pacific FC,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,7,Valour FC,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,8,York9 FC,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [14]:
compare_standings_test = compare_standings(standings,standings_old,team_ref)

In [15]:
compare_standings_test

Unnamed: 0,team,change
0,Atletico Ottawa,0
1,Cavalry FC,0
2,FC Edmonton,0
3,Forge FC,0
4,HFX Wanderers FC,0
5,Pacific FC,0
6,Valour FC,0
7,York9 FC,0


In [16]:
previous_rankings = pd.read_csv(f'datasets/{year}/cpl-{year}-power_rankings.csv')
previous_rankings.to_csv(f'datasets/{year}/cpl-{year}-previous_rankings.csv',index=False)

In [17]:
power_rankings = get_power_rankings(standings,standings_old,team_ref,results,previous_rankings)
power_rankings

Unnamed: 0,rank,team,form,old_rank,change,goal_bonus,w_bonus,crest,colour,previous
0,1,Atletico Ottawa,0.0-0.0-0.0,1,0,0,0,Atletico_Ottawa_nav.png,cpl-ao,0
1,2,Cavalry FC,0.0-0.0-0.0,2,0,0,0,cavalry_fc_nav.png,cpl-cfc,0
2,3,FC Edmonton,0.0-0.0-0.0,3,0,0,0,FC_Edmonton_nav.png,cpl-fce,0
3,4,Forge FC,0.0-0.0-0.0,4,0,0,0,Forge_FC_nav.png,cpl-ffc,0
4,5,HFX Wanderers FC,0.0-0.0-0.0,5,0,0,0,HFX_Wanderers_FC.png,cpl-hfx,0
5,6,Pacific FC,0.0-0.0-0.0,6,0,0,0,Pacific_FC_nav.png,cpl-pfc,0
6,7,Valour FC,0.0-0.0-0.0,7,0,0,0,Valour_FC_nav.png,cpl-vfc,0
7,8,York9 FC,0.0-0.0-0.0,8,0,0,0,York_9_FC_nav.png,cpl-y9,0


In [18]:
game_week, goals, big_win, top_result, low_result,other_result = get_weeks_results(results[results['s'] <= 1],standings,team_ref)
game_week

Unnamed: 0,home,hs,away,as
0,,0,,0


In [19]:
big_win

Unnamed: 0,home,hs,away,as
0,,0,,0


In [20]:
top_result

Unnamed: 0,home,hs,away,as
0,,0,,0


In [21]:
other_result

Unnamed: 0,home,hs,away,as
0,,0,,0


In [22]:
low_result

Unnamed: 0,home,hs,away,as
0,,0,,0


In [23]:
goals

0

In [24]:
#championship = get_standings(results,2)
#championship = championship[championship['gp'] > 1]
#championship

In [25]:
#championship.to_csv(f'datasets/{year}/cpl-{year}-championship.csv',index=False)

In [26]:
#standings.to_csv(f'datasets/{year}/cpl-{year}-standings.csv',index=False)

In [27]:
results_brief = get_results_brief(results,team_ref)

In [28]:
results_brief.head(5)

Unnamed: 0,d,m,hs,as,home,hr,away,ar,summary,team
0,25,5,0,0,Valour FC,E,Atletico Ottawa,E,E A 0 - 0 VFC,Atletico Ottawa
1,4,6,0,0,Atletico Ottawa,E,York9 FC,E,E A 0 - 0 Y9,Atletico Ottawa
2,25,5,0,0,Forge FC,E,Cavalry FC,E,E A 0 - 0 FFC,Cavalry FC
3,4,6,0,0,Cavalry FC,E,Pacific FC,E,E A 0 - 0 PFC,Cavalry FC
4,25,5,0,0,Pacific FC,E,FC Edmonton,E,E A 0 - 0 PFC,FC Edmonton


In [29]:
#results_brief.to_csv(f'datasets/{year}/cpl-{year}-results_brief.csv',index=False)

In [30]:
stats.head(5)

Unnamed: 0,game,team,position,number,name,first,last,minutes,touches,passes,...,clearances,interceptions,yellow,red,fouls won,fouls conceded,shots faced,saves,claimed crosses,cs
0,II1,Atletico Ottawa,m,0,Ajay Khabra,Ajay,Khabra,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,II1,Atletico Ottawa,m,0,Antoine Coupland,Antoine,Coupland,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,II1,Atletico Ottawa,m,0,Ben Fisk,Ben,Fisk,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,II1,Atletico Ottawa,m,0,Bernardinho,Bernardinho,,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,II1,Atletico Ottawa,d,0,Brandon John,Brandon,John,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [31]:
stats.describe()

Unnamed: 0,number,minutes,touches,passes,pass-acc,crosses,cross-acc,assists,chances,goals,...,clearances,interceptions,yellow,red,fouls won,fouls conceded,shots faced,saves,claimed crosses,cs
count,152.0,152.0,152.0,152.0,152.0,152.0,152.0,152.0,152.0,152.0,...,152.0,152.0,152.0,152.0,152.0,152.0,152.0,152.0,152.0,152.0
mean,9.427632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
std,11.555568,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,16.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,77.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [32]:
team_stats = get_stats_all(stats,team_ref)

In [33]:
team_stats

Unnamed: 0,team,name,number,position,first,last,minutes,touches,passes,pass-acc,...,clearances,interceptions,yellow,red,fouls won,fouls conceded,shots faced,saves,claimed crosses,cs
0,Atletico Ottawa,Ajay Khabra,0,m,Ajay,Khabra,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Atletico Ottawa,Antoine Coupland,0,m,Antoine,Coupland,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Atletico Ottawa,Ben Fisk,0,m,Ben,Fisk,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Atletico Ottawa,Bernardinho,0,m,Bernardinho,,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Atletico Ottawa,Brandon John,0,d,Brandon,John,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
147,York9 FC,Morey Doner,3,d,Morey,Doner,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
148,York9 FC,Nathan Ingham,29,g,Nathan,Ingham,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
149,York9 FC,Nicholas Hamilton,0,f,Nicholas,Hamilton,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
150,York9 FC,Roger Thompson,6,d,Roger,Thompson,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [34]:
team_stats.describe()

Unnamed: 0,number,minutes,touches,passes,pass-acc,crosses,cross-acc,assists,chances,goals,...,clearances,interceptions,yellow,red,fouls won,fouls conceded,shots faced,saves,claimed crosses,cs
count,152.0,152.0,152.0,152.0,152.0,152.0,152.0,152.0,152.0,152.0,...,152.0,152.0,152.0,152.0,152.0,152.0,152.0,152.0,152.0,152.0
mean,9.427632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
std,11.555568,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,16.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,77.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [35]:
rated_goalscorers = top_tracked(team_stats,'goals')
rated_assists = top_tracked(team_stats,'assists')

In [36]:
rated_assists.describe()

Unnamed: 0,name,number,minutes,goals
count,1.0,1.0,1.0,1.0
mean,0.0,0.0,0.0,0.0
std,,,,
min,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0
50%,0.0,0.0,0.0,0.0
75%,0.0,0.0,0.0,0.0
max,0.0,0.0,0.0,0.0


In [37]:
rated_goalscorers.describe()

Unnamed: 0,name,number,minutes,goals
count,1.0,1.0,1.0,1.0
mean,0.0,0.0,0.0,0.0
std,,,,
min,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0
50%,0.0,0.0,0.0,0.0
75%,0.0,0.0,0.0,0.0
max,0.0,0.0,0.0,0.0


In [38]:
'''rated_g10 = rated_goalscorers.head(10)
rated_g10 = rated_g10[['rank','team','name','position','goals']]
rated_g10'''

"rated_g10 = rated_goalscorers.head(10)\nrated_g10 = rated_g10[['rank','team','name','position','goals']]\nrated_g10"

In [39]:
rated_forwards = top_position(team_stats,'f')
rated_midfielders = top_position(team_stats,'m')
rated_defenders = top_position(team_stats,'d')
rated_keepers = top_position(team_stats,'g')
rated_offenders = top_offenders(team_stats)

In [40]:
rated_forwards.head(10)

Unnamed: 0,team,name,number,position,minutes,goals,chances,assists,shots,shots on target,passes,crosses,duels,tackles,overall
0,,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [41]:
rated_midfielders.head(10)

Unnamed: 0,team,name,number,position,minutes,goals,chances,assists,shots,shots on target,passes,crosses,duels,tackles,overall
0,,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [42]:
rated_defenders.head(10)

Unnamed: 0,team,name,number,position,minutes,goals,chances,assists,shots,shots on target,passes,crosses,duels,tackles,overall
0,,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [43]:
rated_keepers

Unnamed: 0,team,name,number,position,minutes,goals,chances,assists,shots,shots on target,passes,crosses,duels,tackles,overall
0,,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [44]:
rated_offenders.head(2)

Unnamed: 0,team,name,number,minutes,yellow,red,f-conceded
0,,0,0,0,0,0,0


In [45]:
best_eleven = get_best_eleven(team_stats,team_ref,rated_forwards,rated_midfielders,rated_defenders,rated_keepers,player_info)
best_eleven

Unnamed: 0,image,first,last,flag,number,position,overall,link
0,Nathan-Ingham.jpg,Nathan,Ingham,Canada.png,1,g,0.93,https://en.wikipedia.org/wiki/Nathan_Ingham
1,Mélé-Temguia.jpg,Mélé,Temguia,Germany.png,8,d,0.96,https://en.wikipedia.org/wiki/Mélé_Temguia
2,Luca-Gasparotto.jpg,Luca,Gasparotto,Canada.png,13,d,0.92,https://en.wikipedia.org/wiki/Luca_Gasparotto
3,André-Bona.jpg,André,Bona,France.png,3,d,0.92,https://en.wikipedia.org/wiki/André_Bona
4,Marco-Bustos.jpg,Marco,Bustos,Canada.png,22,m,0.9,https://en.wikipedia.org/wiki/Marco_Bustos
5,Tristan-Borges.jpg,Tristan,Borges,Canada.png,4,m,0.89,https://en.wikipedia.org/wiki/Tristan_Borges
6,Julian-Buscher.jpg,Julian,Büscher,Germany.png,8,m,0.87,https://en.wikipedia.org/wiki/Julian_Büscher
7,Ryan-Telfer.jpg,Ryan,Telfer,Canada.png,18,m,0.78,https://en.wikipedia.org/wiki/Ryan_Telfer
8,Kyle-Bekker.jpg,Kyle,Bekker,Canada.png,3,m,0.77,https://en.wikipedia.org/wiki/Kyle_Bekker
9,Terran-Campbell.jpg,Terran,Campbell,Canada.png,14,f,0.79,https://en.wikipedia.org/wiki/Terran_Campbell


In [46]:
def get_match_tables(data,query):
    db = data[data['home'] == query]
    db = pd.concat([db,data[data['away'] == query]])
    db = db.sort_values(by=['m','d'])
    return db

def likelihood_input(array,a_list):
    b = a_list[0]
    c = a_list[1]
    d = a_list[2]
    array.append(b)
    array.append(c)
    array.append(d)
    return array

def likelihood_table(data,query):
    df = get_match_tables(data,query)
    array = []
    cols = data.columns
    for row in range(0,df.shape[0]):
        if df.iloc[row]['home'] == query:
            if df.iloc[row]['hr'] == 'W':
                array = likelihood_input(array,[[1,2,1],[1,0,0],[1,1,0]])
            if df.iloc[row]['hr'] == 'L':
                array = likelihood_input(array,[[1,2,0],[1,0,1],[1,1,0]])
            if df.iloc[row]['hr'] == 'D':
                array = likelihood_input(array,[[1,2,0],[1,0,0],[1,1,1]])
        if df.iloc[row]['away'] == query:
            if df.iloc[row]['ar'] == 'W':
                array = likelihood_input(array,[[2,2,1],[2,0,0],[2,1,0]])
            if df.iloc[row]['ar'] == 'L':
                array = likelihood_input(array,[[2,2,0],[2,0,1],[2,1,0]])
            if df.iloc[row]['ar'] == 'D':
                array = likelihood_input(array,[[2,2,1],[2,0,0],[2,1,1]])
    db= pd.DataFrame(array,columns=['h/a','w/l/d','y/n'])
    return db

def get_team_comparison(data,q1,q2):
    # getting games with q1 in both home or away
    db = data[data['team'] == q1]
    db = db.reset_index()
    db.pop('index')
    # filering down more to get only the games against q2
    db = db.sort_values(by=['m','d'])
    db = db[(db['home'] == q2) | (db['away'] == q2)]
    db = db.reset_index()
    db.pop('index')
    if db.empty == True:
        db = pd.DataFrame([(0,0,0,0,q1,'D',q2,'D','empty',q1)],columns=['d','m','hs','as','home','hr','away','ar','summary','team'])
    return db

def get_NB_data(data,query):
    db = likelihood_table(data,query)
    dy = db.pop('y/n').to_list()
    dx = [tuple(x) for x in db.values]
    return dx, dy

In [47]:
schedule = get_schedule(results_diff)
schedule

Unnamed: 0,game,home,away
0,II1,Forge FC,Cavalry FC
1,II2,Pacific FC,FC Edmonton
2,II3,York9 FC,HFX Wanderers FC
3,II4,Valour FC,Atletico Ottawa
4,II5,FC Edmonton,Valour FC
5,II6,Atletico Ottawa,York9 FC
6,II7,Cavalry FC,Pacific FC
7,II8,HFX Wanderers FC,Forge FC


In [48]:
# home side
q1 = schedule.iloc[0]['home']
# away side
q2 = schedule.iloc[0]['away']
print(q1,q2)

Forge FC Cavalry FC


In [49]:
compare = get_team_comparison(results_brief,q1,q2)

In [50]:
compare

Unnamed: 0,d,m,hs,as,home,hr,away,ar,summary,team
0,25,5,0,0,Forge FC,E,Cavalry FC,E,E A 0 - 0 CFC,Forge FC


In [51]:
t1_x, t1_y = get_NB_data(compare,q1)
t2_x, t2_y = get_NB_data(compare,q2)

### Game Prediction

use Gaussian Naive Bayes model to predict the outcome of a home match.

In [52]:
#Import Gaussian Naive Bayes model
from sklearn.naive_bayes import GaussianNB,BernoulliNB
import statistics

In [53]:
def get_gnb_prediction(query,x,y,result):
    
    gnb = GaussianNB()
    bnb = BernoulliNB()
    # Train the model using the training sets
    
    gnb.fit(x,y)
    bnb.fit(x,y)
    
    # use below instead of predicted = model.predict([result]) because we want the probability
    gnb_pred = np.round(gnb.predict_proba([result])[:, 1],decimals=2)
    bnb_pred = np.round(bnb.predict_proba([result])[:, 1],decimals=2)
    
    pred = round((gnb_pred[0] + bnb_pred[0]) / 2,2)
    #print(gnb_pred[0], bnb_pred[0], pred)
    
    return pred

def get_match_prediction_result(query,x,y,array):
    prediction = get_gnb_prediction(query,x,y,array)
    return prediction

def get_match_prediction(q1,q2,x1,y1,x2,y2):
    if len(x1) == 0:
        x = round(1/3,2)
        home_win, away_win,draw = x,x,x
        return home_win,away_win,draw
    home_win = get_match_prediction_result(q1,x1,y1,[1,2])
    draw = get_match_prediction_result(q1,x1,y1,[1,1])
    away_win = get_match_prediction_result(q2,x2,y2,[2,2])
    return home_win, draw, away_win

In [54]:
home_win, draw, away_win = get_match_prediction(q1,q2,t1_x,t1_y,t2_x,t2_y)

In [55]:
print(q1,'\nwin probability: ', round(home_win,2))

Forge FC 
win probability:  0.33


In [56]:
print(q2,'\nwin probability: ', round(away_win,2))

Cavalry FC 
win probability:  0.33


In [57]:
print('Draw probability: ', round(draw,2))

Draw probability:  0.33


In [58]:
round(home_win + draw + away_win,1)

1.0

In [59]:
team_form_results = get_form_results(results,team_ref)
team_form_results

Unnamed: 0,index,0,1
0,Atletico Ottawa,E A 0 - 0 VFC,E A 0 - 0 Y9
1,Cavalry FC,E A 0 - 0 FFC,E A 0 - 0 PFC
2,FC Edmonton,E A 0 - 0 PFC,E A 0 - 0 VFC
3,Forge FC,E A 0 - 0 CFC,E A 0 - 0 HFX
4,HFX Wanderers FC,E A 0 - 0 Y9,E A 0 - 0 FFC
5,Pacific FC,E A 0 - 0 FCE,E A 0 - 0 CFC
6,Valour FC,E A 0 - 0 AO,E A 0 - 0 FCE
7,York9 FC,E A 0 - 0 HFX,E A 0 - 0 AO


In [60]:
forwards_19 =pd.read_csv('datasets/2019/cpl-2019-forwards.csv')
midfielders_19 =pd.read_csv('datasets/2019/cpl-2019-midfielders.csv')
defenders_19 =pd.read_csv('datasets/2019/cpl-2019-defenders.csv')
keepers_19 =pd.read_csv('datasets/2019/cpl-2019-keepers.csv')

In [61]:
midfielders_19.head(2)

Unnamed: 0,team,name,number,position,minutes,goals,assists,touches,passes,pass-acc,crosses,cross-acc,chances,duels,tackles,overall
0,VFC,Marco Bustos,22,m,2249.0,7.0,3.0,1417.0,912.0,0.93,74.0,0.0,43.0,299.0,22.0,0.9
1,FFC,Tristan Borges,4,m,1884.0,13.0,5.0,1307.0,680.0,0.74,77.0,0.0,42.0,356.0,59.0,0.89


In [60]:
def update_player_info(player_info,rated_forwards,rated_midfielders,rated_defenders,rated_keepers):
    def get_player_score(data,name):
        name = [name]
        if data[data['name'].isin(name)].empty:
            pass
        else:
            overall = data[data['name'].isin(name)]
            new_overall = overall['overall'].values
            return new_overall
    combine = [rated_forwards,rated_midfielders,rated_defenders,rated_keepers]
    names = player_info['name'].values
    a = []
    for name in names:
        j = 1
        for i in range(0,4):
            score = get_player_score(combine[i],name)
            if score == None:
                j += 1
                pass
            if score != None:
                overall = score[0]
                a.append(overall)
            if j == 5:
                overall = 0.0
                a.append(overall)
    player_info['overall'] = a
    return player_info

In [61]:
player_info = update_player_info(player_info,forwards_19,midfielders_19,defenders_19,keepers_19)

NameError: name 'forwards_19' is not defined

In [70]:
standings.to_csv(f'datasets/{year}/cpl-{year}-standings.csv',index=False)
#championship.to_csv(f'datasets/{year}/cpl-{year}-championship.csv',index=False)
power_rankings.to_csv(f'datasets/{year}/cpl-{year}-power_rankings.csv',index=False)
results_brief.to_csv(f'datasets/{year}/cpl-{year}-results_brief.csv',index=False)
schedule.to_csv(f'datasets/{year}/cpl-{year}-schedule.csv',index=False)
team_stats.to_csv(f'datasets/{year}/cpl-{year}-team_stats.csv',index=False)
rated_forwards.to_csv(f'datasets/{year}/cpl-{year}-forwards.csv',index=False)
rated_midfielders.to_csv(f'datasets/{year}/cpl-{year}-midfielders.csv',index=False)
rated_defenders.to_csv(f'datasets/{year}/cpl-{year}-defenders.csv',index=False)
rated_keepers.to_csv(f'datasets/{year}/cpl-{year}-keepers.csv',index=False)
rated_offenders.to_csv(f'datasets/{year}/cpl-{year}-discipline.csv',index=False)
rated_goalscorers.to_csv(f'datasets/{year}/cpl-{year}-rated_goalscorers.csv',index=False)
rated_assists.to_csv(f'datasets/{year}/cpl-{year}-rated_assists.csv',index=False)
team_form_results.to_csv(f'datasets/{year}/cpl-{year}-team_form.csv',index=False)
#h1_roster.to_csv(f'datasets/{year}/teams/cpl-{year}-{q1}_final_scores.csv')
team_form_results.to_csv(f'datasets/{year}/cpl-{year}-team_form.csv',index=False)
best_eleven.to_csv(f'datasets/{year}/cpl-{year}-best_eleven.csv',index=False)
player_info.to_csv(f'datasets/{year}/player-{year}-info.csv',index=False)

In [74]:
# home side
q1 = schedule.iloc[3]['home']
# away side
q2 = schedule.iloc[3]['away']
print(q1,q2)

Valour FC Atletico Ottawa


In [75]:
game_info = schedule[schedule['home'] == q1]
game_info = game_info[game_info['away'] == q2]
game_info

Unnamed: 0,game,home,away
3,II4,Valour FC,Atletico Ottawa


In [76]:
game_h = get_home_away_comparison(stats,game_info,q1)
game_a = get_home_away_comparison(stats,game_info,q2)

  res_values = method(rvalues)


In [77]:
h1_roster = get_compare_roster(results,q1,team_stats,team_ref,rated_forwards,rated_midfielders,rated_defenders,rated_keepers,player_info)
h1_roster

Unnamed: 0,name,number,position,overall
0,Tyson Farago,1,g,0.55
1,Raphaël Garcia,2,d,0.45
2,Yohan LeBourhis,25,d,0.21
3,Amir Soto,0,d,0.0
4,Andrew Jean-Baptiste,0,d,0.0
5,Diego Gutiérrez,8,m,0.41
6,Raphael Ohin,27,m,0.35
7,Dylan Carreiro,10,m,0.33
8,Nicolás Galvis,14,m,0.16
9,Michele Paolucci,17,f,0.3


In [78]:
h2_roster = get_compare_roster(results,q2,team_stats,team_ref,rated_forwards,rated_midfielders,rated_defenders,rated_keepers,player_info)
h2_roster

  res_values = method(rvalues)


Unnamed: 0,name,number,position,overall
0,Nacho Zabal,0,g,0.0
1,Brandon John,0,d,0.0
2,Vashon Neufville,0,d,0.0
3,Ben Fisk,0,m,0.69
4,Ajay Khabra,0,m,0.35
5,Malyk Hamilton,0,m,0.22
6,Antoine Coupland,0,m,0.0
7,Kunle Dada-Luke,0,f,0.0


In [66]:
team1_history = get_team_history(results,q1)
team1_history

Unnamed: 0,d,m,hs,as,home,hr,away,ar
7,4,6,0,0,Forge FC,E,HFX Wanderers FC,E
0,25,5,0,0,Forge FC,E,Cavalry FC,E


In [67]:
team1_history = get_five_game_form(results,q1)

In [68]:
team1_history

Unnamed: 0,0
w,0.0
l,0.0
d,0.0


In [69]:
for i in team1_history:
    if i == 'W':
        print(3)
    if i == 'L':
        print(0)
    if i == 'D':
        print(1)

In [70]:
team2_history = get_team_history(results,q2)
team2_history

Unnamed: 0,d,m,hs,as,home,hr,away,ar
6,4,6,0,0,Cavalry FC,E,Pacific FC,E
0,25,5,0,0,Cavalry FC,E,Forge FC,E
