In [1]:
import re
import numpy as np
import pandas as pd

In [47]:
def get_string(data):
    data = str(data*100)
    data = data[0:4]
    return data

def get_long_name(string,team_ref):
    for short in team_ref['short']:
        row = team_ref[team_ref['short'] == short]
        if string == short:
            string = row.iloc[0]['team']
    return string

def get_shortest_name(string,team_ref):
    for team in team_ref['team']:
        row = team_ref[team_ref['team'] == team]
        if string == team:
            string = str(row.iloc[0]['short'])
    return string

def get_schedule(data):
    schedule = data.copy()
    schedule = schedule[schedule['s'] <= 1]
    #db = db.tail(4)
    schedule = schedule[['game','home','away']]
    schedule = index_reset(schedule)
    schedule = schedule.sort_values(by=['game'])
    return schedule

def fix_db_na(data):
    db = data.copy()
    if db['team'].isnull().values.any():
        for row in range(db.shape[0]):
            if pd.isna(db.iloc[row]['team']) == True:
                print(True)
                db.iloc[row]['team'] = get_long_name(db.iloc[row]['team'],data)
    return db

def index_reset(data):
    data = data.reset_index()
    data.pop('index')
    return data

def get_team_results(results_db,query): # get all the games played by the specific team
    db = results_db[results_db['home'] == query] # all home games
    da = results_db[results_db['away'] == query] # all away games
    db = pd.concat([db,da]) # create dataframe from the combined search
    db = index_reset(db)
    return db # return the dataframe

def get_team_brief(results_db,query,df):
    db = get_team_results(results_db,query) # get team games function
    cols = ['game','s','csh','csa','combined','venue','links'] # create list to pop specific unecessary columns
    for col in cols:
        db.pop(col)
    db = db.sort_values(by=['m','d']) # sort the values by month and day
    db = index_reset(db) # reset the index and drop the column named index that is created  from the old index
    db['summary'] = '0' # create summary column holding 0 values
    for i in range(0,db.shape[0]): # sort through the games
        if db.iloc[i]['home'] == query:
            away_team = df[df['team'] == db.iloc[i]['away']] # get the opponents name
            opponent = away_team.iloc[0]['short'] # convert the name to the short name
            outcome = db.iloc[i]['hr'] + ' H' # combine home result with H
        else:
            home_team = df[df['team'] == db.iloc[i]['home']] # get the opponents name
            opponent = home_team.iloc[0]['short'] # convert the name to the short name
            outcome = db.iloc[i]['ar'] + ' A' # combine away result with A
        score = str(db.iloc[i]['hs']) + ' - ' + str(db.iloc[i]['as'])
        db.loc[i,'summary'] = outcome + ' ' + score +  ' ' + opponent
    db['team'] = query # create team column with holding the team's name in all rows
    return db # return the dataframe

def get_results_brief(results,team_ref):
    db = pd.DataFrame()
    for team in team_ref['team']:
        df = get_team_brief(results,team,team_ref)
        db = pd.concat([db,df])
    results_brief = index_reset(db)
    return results_brief

def get_club_statistics(team_results,query):
    
    df = team_results
    a = [] 
    cols = df.columns # get the columns of the dataframe
    
    def get_game_results(result_check):
        if result_check == 'W':
            r,w,l,d = 3,1,0,0
        elif result_check == 'L':
            r,w,l,d = 0,0,1,0
        elif result_check == 'D':
            r,w,l,d = 1,0,0,1
        else:
            r,w,l,d = 0,0,0,0
        return r,w,l,d
    for row in range(0,df.shape[0]):
        # cycling through to get the appropiate data for each game
        # depending on the results of, w,l or d 
        # game played, win/loss/draw/ points, possible points,win,loss,draw,home-score,away-score,home-score,away-score
        if df.iloc[row]['home'] == query:
            if df.iloc[row]['hr'] != 'E':
                points,w,l,d = get_game_results(df.iloc[row]['hr'])
                vals = [1,points,3,w,l,d,df.iloc[row]['hs'],df.iloc[row]['as'],df.iloc[row]['hs'],df.iloc[row]['as'],0,0]
                a.append(vals)
        if df.iloc[row]['away'] == query:
            if df.iloc[row]['ar'] != 'E':
                points,w,l,d = get_game_results(df.iloc[row]['ar'])
                vals = [1,points,3,w,l,d,df.iloc[row]['hs'],df.iloc[row]['as'],df.iloc[row]['hs'],df.iloc[row]['as'],0,0]
                a.append(vals)
    db= pd.DataFrame(a,columns=['gp','pts','tpp','w','l','d','gf','ga','gfh','gah','gfa','gaa'])
    db = pd.DataFrame(db.sum())
    db = db.T
    return db

def get_standings(results,season_number,team_ref):
    standings = pd.DataFrame()
    # select the appropriate season, regular/championship
    if season_number == 1:
        results_db = results[results['s'] <= 1]
    if season_number == 2:
        results_db = results[results['s'] > 1]
    teams = team_ref['team']
    #teams = np.sort(teams,axis=-1)
    for team in teams: # loop through the teams of the league
        team_results = get_team_brief(results_db,team,team_ref)
        team_results = get_club_statistics(team_results,team)
        ppg = round(team_results['pts']/team_results['gp'],2) # calculate points per game
        gd = team_results['gf'] - team_results['ga'] #  calculate goal differential
        team_results.insert(0,'team',team)
        team_results.insert(4,'ppg',ppg)
        team_results.insert(8,'gd',gd)
        standings = pd.concat([standings,team_results])
    standings = standings.sort_values(by=['pts','w','gf'],ascending=False)
    standings = index_reset(standings)
    standings = standings.reset_index()
    standings = standings.rename(columns={'index':'rank'})
    standings['rank'] = standings['rank'] + 1
    standings = standings.fillna(0)
    
    columns = standings.select_dtypes(include=['float']).columns
    for column in columns:
        if column == 'ppg':
            continue
        standings[column] = standings[column].astype(int)
    
    return standings

def compare_standings(standings_current,standings_old,team_ref):
    # getting the change in team standings between current week and previous week
    a = []
    for team in team_ref['team']:
        rank1 = standings_old[standings_old['team'] == team] # get team's previous rank
        rank2 = standings_current[standings_current['team'] == team] # get teams current rank
        # calculate the change in team's ranking
        if rank1.iloc[0]['rank'] == rank2.iloc[0]['rank']:
            change = 0
        else:
            change = rank1.iloc[0]['rank'] - rank2.iloc[0]['rank']
        a.append([team,change])
    current_rankings = pd.DataFrame(a)
    current_rankings = pd.DataFrame({'team': current_rankings.iloc[:][0], 'change': current_rankings.iloc[:][1]})
    current_rankings = current_rankings.sort_values(by=['change'],ascending=False) # sort by change
    current_rankings = index_reset(current_rankings)
    return current_rankings

def clean_team_game(data,db,check): # Fix this section for teams that haven't played yet
    if check == 0:
        df = data.iloc[0]['team'] # Getting the name of the top team
    else:
        df = data.iloc[-1]['team'] # Getting the name of the bottom placed team
    if data.iloc[-1]['gp'] == 0 and check == 1:
        db = pd.DataFrame([(df,0,df,0)],columns=['home','hs','away','as']) # make an empty set if the game is empty
    else:
        df = db[(db['home'] == df) | (db['away'] == df)] # get appropirate game results for specified team
        db = index_reset(df)
        db = db.iloc[0][['home','hs','away','as']]
        db = pd.DataFrame(db)
        db = db.T
    return db

def get_longest_name(da,db,dc,team_ref):
    def get_long(data,dd):
        db = data.copy()
        for team in db['home']:
            row = dd[dd['short'] == team]
            db.at[0,'home'] = row.iloc[0]['team']
        for team in db['away']:
            row = dd[dd['short'] == team]
            db.at[0,'away'] = row.iloc[0]['team']
        return db
    da = get_long(da,team_ref)
    db = get_long(db,team_ref)
    dc = get_long(dc,team_ref)
    teams_in = pd.DataFrame([da.iloc[0]['home'],da.iloc[0]['away'],db.iloc[0]['home'],db.iloc[0]['away'],dc.iloc[0]['home'],dc.iloc[0]['away']],columns=['teams'])
    teams_in = teams_in.teams.unique()
    return teams_in
    
def get_short_name(data,dc):
    for team in data['home']:
        row = dc[dc['team'] == team]
        data.at[0,'home'] = row.iloc[0]['short']
    for team in data['away']:
        row = dc[dc['team'] == team]
        data.at[0,'away'] = row.iloc[0]['short']
    return data

def get_weeks_results(data,standings,team_ref):
    if data.iloc[0]['hr'] == 'E':
        db = pd.DataFrame([('NA',0,'NA',0)],columns=['home','hs','away','as'])
        big_win, top_team, low_team,other_team = db,db,db,db
        goals = 0
        return db,goals,big_win,top_team,low_team,other_team
    df = data
    month = df.iloc[-1]['m']
    week = df.iloc[-1]['d']
    db = df[df['m'] == month]
    db = db[db['d'] >= week - 6]
    db = db.sort_values(by=['game'],ascending=False)
    goals = db['hs'].sum() + db['as'].sum()
    max_home = db[db['hs'] == db['hs'].max()]
    max_away = db[db['as'] == db['as'].max()]
    if max_home.iloc[0]['hs'] > max_away.iloc[0]['as']:
        max_home_win = max_home
    else:
        max_home_win = max_away
    big_win = max_home_win[['home','hs','away','as']]
    big_win = index_reset(big_win)
    big_win = get_short_name(big_win,team_ref)
    big_win = pd.DataFrame(big_win.loc[0])
    big_win = big_win.T
    top_team = clean_team_game(standings,db,0)
    top_team = get_short_name(top_team,team_ref)
    low_team = clean_team_game(standings,db,1)
    low_team = get_short_name(low_team,team_ref)
    teams_in = get_longest_name(big_win,top_team,low_team,team_ref)
    other_team = db[(~db['home'].isin(teams_in)) | (~db['away'].isin(teams_in))]
    other_team = index_reset(other_team)
    other_team = pd.DataFrame(other_team.loc[0][['home','hs','away','as']])
    other_team = other_team.T
    other_team = get_short_name(other_team,team_ref)
    return db,goals,big_win,top_team,low_team,other_team

def get_team_stats(data,query):
    db = data[data['team'] == query]
    names = db['name'].unique()
    information = data.copy()
    db.pop('number')
    db = db.groupby(['name']).sum()
    db.insert(0,'last','empty')
    db.insert(0,'first','empty')
    db.insert(0,'position','empty')
    db.insert(0,'number',0)
    #db.insert(0,'team',team)
    i = 0
    for name in names:
        player = information[information['name'] == name].head(1)
        db.at[name,'first'] = player.iloc[0]['first']
        db.at[name,'last'] = player.iloc[0]['last']
        db.at[name,'number'] = int(player.iloc[0]['number'])
        db.at[name,'position'] = player.iloc[0]['position']
        db.at[name,'pass-acc'] = player.iloc[0]['pass-acc'].mean()
        db.at[name,'cross-acc'] = player.iloc[0]['cross-acc'].mean()
    db = db.reset_index()
    return db

def get_stats_all(stats,team_ref):
    stats_all = pd.DataFrame()
    for team in team_ref['team']:
        df = get_team_stats(stats,team)
        short_team = get_shortest_name(team,team_ref)
        df.insert(0,'team',short_team)
        stats_all = pd.concat([stats_all,df])
    stats_all = index_reset(stats_all)
    return stats_all

# get associated information for players league wide and calculate an overall score for each position
def get_evaluation(condensed_player_info,full_player_info):
    names = condensed_player_info.name.unique() # grab the list of names at the specified position
    eval_ = condensed_player_info.describe().T # get the evalution scores
    checks = condensed_player_info.columns[4:] # slice away the first three columns (name,number,postion) not needed
    condensed_player_info['overall'] = 0.0 # create the final column overall
    condensed_player_info = condensed_player_info.set_index('name') # set the index to the player name to search for a specific player
    for name in names: # iterate through the names in the lisst
        player = full_player_info[full_player_info['name'] == name].head(1) # get the players details
        a = [] # create an empty array to store the scores
        for check in checks: # iterate through the columns of remaining data
            result = player.iloc[0][check] / eval_['max'][check] # calculate the score for the value found value/max
            a.append(result) # append the result into the list
            score = sum(a) / len(checks) #calculate the final score sum(list) / num of checks
            overall = str(score)
            overall = overall[0:4]
            condensed_player_info.at[name,'overall'] = overall # assign the value as the overall score
    condensed_player_info = condensed_player_info.reset_index() # reset the index, making the name column a column again
    condensed_player_info = condensed_player_info.sort_values(by=['overall'],ascending=False) # sort using overall, descending
    return condensed_player_info

def top_tracked(team_stats,tracked):
    cols = ['team','name','position','number','minutes',tracked]
    if team_stats.minutes.sum() == 0:
        lst = ['rank']
        lst.extend(cols)
        tracked_player_stat = pd.DataFrame([(0,'NA','NA',0,0,0,0)],columns=lst)
        return tracked_player_stat
    player_information = team_stats.copy()
    tracked_player_stat = player_information[cols]
    tracked_player_stat = tracked_player_stat.sort_values(by=[tracked],ascending=False)
    tracked_player_stat = tracked_player_stat.reset_index()
    tracked_player_stat.pop('index')
    team = tracked_player_stat.pop('team')
    tracked_player_stat.insert(0,'team',team)
    tracked_player_stat = tracked_player_stat[tracked_player_stat[tracked] >= 1]
    rank = tracked_player_stat.index + 1
    tracked_player_stat.insert(0,'rank',rank)
    
    columns = tracked_player_stat.select_dtypes(include=['float']).columns
    for column in columns:
        if column == 'overall':
            continue
        tracked_player_stat[column] = tracked_player_stat[column].astype(int)
    
    return tracked_player_stat

def top_position(team_stats,position): # get the forwards in the league
    colf = ['team','name','number','position','minutes','goals','chances','assists','shots','s-target','passes','crosses','duels','tackles']
    colm = ['team','name','number','position','minutes','goals','assists','touches','passes','pass-acc','crosses','cross-acc','chances','duels','tackles']
    cold = ['team','name','number','position','minutes','tackles','t-won','clearances','interceptions','duels','d-won']
    colg = ['team','name','number','position','minutes','cs','saves','shots faced','claimed crosses']

    def create_blank_frame(columns):
        lst = [np.zeros(len(columns), dtype=int)]
        dataframe = pd.DataFrame(lst,columns=columns)
        dataframe['team'] = 'NA'
        dataframe['name'] = 'NA'
        dataframe['overall'] = 0
        return dataframe

    if team_stats.minutes.sum() == 0:
        if position == 'f':
            condensed_player_info = create_blank_frame(colf)
        if position == 'm':
            condensed_player_info = create_blank_frame(colm)
        if position == 'd':
            condensed_player_info = create_blank_frame(cold)
        if position == 'g':
            condensed_player_info = create_blank_frame(colg)
        return condensed_player_info

    if position == 'f':
        cols = colf
    if position == 'm':
        cols = colm
    if position == 'd':
        cols = cold
    if position == 'g':
        cols = colg

    player_information = team_stats.copy() # load player information
    full_player_info = player_information[player_information['position'] == position] # filter the players by selected position
    condensed_player_info = full_player_info[cols] # select specific columns associated with the evaluation
    condensed_player_info = get_evaluation(condensed_player_info,full_player_info) # condensed Dataframe and full Dataframe being passes
    condensed_player_info = index_reset(condensed_player_info)
    names = condensed_player_info.name.unique() # get the names of the players who fit the criteria
    condensed_player_info = condensed_player_info.set_index('name') # set the index to the name column to make the search possible

    for name in names:
        player = full_player_info[full_player_info['name'] == name].head(1) # forwards main purpose is to score goals
        if player.iloc[0]['assists'] > 2.0: # reward getting more than 3 assists
            condensed_player_info.at[name,'overall'] = condensed_player_info.at[name,'overall'] + 0.1
        if position == 'm':
            if player.iloc[0]['goals'] >= 5.0: # reward scoring greater than 5 goals
                condensed_player_info.at[name,'overall'] = condensed_player_info.at[name,'overall'] + 0.1
            if player.iloc[0]['pass-acc'] >= 0.85: # reward scoring greater than 5 goals
                condensed_player_info.at[name,'overall'] = condensed_player_info.at[name,'overall'] + 0.1
        if position == 'f':
            if (player.iloc[0]['goals'] <= 2.0 and player.iloc[0]['minutes'] >= 1000.0): # if player scores less than 2 & has minutes greater than 1000
                condensed_player_info.at[name,'overall'] = condensed_player_info.at[name,'overall'] - 0.1
            if player.iloc[0]['goals'] >= 8.0: # reward scoring greater than 8 goals
                condensed_player_info.at[name,'overall'] = condensed_player_info.at[name,'overall'] + 0.1
        if position == 'd':
            if (player.iloc[0]['interceptions'] > 200.0 and player.iloc[0]['minutes'] >= 1000.0): # if player scores less than 2 & has minutes greater than 1000
                condensed_player_info.at[name,'overall'] = condensed_player_info.at[name,'overall'] + 0.1
            if player.iloc[0]['d-won'] > 110.0: # reward scoring greater than 8 goals
                condensed_player_info.at[name,'overall'] = condensed_player_info.at[name,'overall'] + 0.1

    condensed_player_info = condensed_player_info.sort_values(by=['overall'],ascending=False)
    condensed_player_info = condensed_player_info.reset_index()
    team = condensed_player_info.pop('team')
    condensed_player_info.insert(0,'team',team)
    
    columns = condensed_player_info.select_dtypes(include=['float']).columns
    for column in columns:
        if column == 'overall':
            condensed_player_info[column] = condensed_player_info[column].round(4).astype(str)
            condensed_player_info[column] = condensed_player_info[column].astype(float)
            continue
        condensed_player_info[column] = condensed_player_info[column].astype(int)
    
    return condensed_player_info

def top_offenders(data):  # get the offences handed out in the league
    cols = ['team','name','position','number','minutes','yellow','red','f-conceded']
    if data.minutes.sum() == 0:
        top_offenders = pd.DataFrame([('NA','NA','NA',0,0,0,0,0)],columns=cols)
        return top_offenders
    player_information = data.copy()
    top_offenders = player_information[cols]
    top_offenders = get_evaluation(top_offenders,player_information)
    top_offenders = top_offenders.sort_values(by=['red','yellow'],ascending=False)
    top_offenders = top_offenders.reset_index()
    top_offenders.pop('index')
    team = top_offenders.pop('team')
    top_offenders.insert(0,'team',team)
    
    columns = top_offenders.select_dtypes(include=['float']).columns
    for column in columns:
        if column == 'overall':
            continue
        top_offenders[column] = top_offenders[column].astype(int)
    
    return top_offenders

def get_team_form(data,query):
    db = data[data['team'] == query]
    db = pd.DataFrame(db['summary'])
    return db

def get_form_results(data,dc):
    db = pd.DataFrame()
    form = get_results_brief(data[data['s'] <= 1],dc)
    teams = data.home.unique()
    teams = np.sort(teams,axis=-1)
    for team in teams:
        df = get_team_form(form,team)
        #print(team,'\n',df)
        db[team] = pd.Series(df['summary'].values)
    db = db.T
    db = db.reset_index()
    db = db.fillna('E')
    return db

def get_roster(query,stats,team_ref): # use team stats to get the player information
    roster = get_stats_all(stats,team_ref)
    roster = roster[roster['team'] == query]
    roster = roster[['name','number','position']]
    roster.insert(3,'overall',0)
    roster = index_reset(roster)
    return roster

def get_home_away_comparison(stats,game,team):
    db = stats[stats['game'] == game].copy()
    db = db[db['team'] == team]
    db = db.sort_values(by=['minutes'],ascending=False)
    db = db#[0:11]
    db = db['name']
    return db

def get_compare_roster(results,query,stats,team_ref,rated_forwards,rated_midfielders,rated_defenders,rated_keepers,player_info):
    roster = get_roster_overall(query,stats,team_ref,rated_forwards,rated_midfielders,rated_defenders,rated_keepers,player_info)
    def get_player(data,string):
        dz = data[data['position'] == string]
        dz = dz[['first','last','number','position','overall']]
        dz.insert(0,'name',dz['first'] + ' ' + dz['last'])
        dz.pop('first')
        dz.pop('last')
        return dz
    dk = get_player(roster,'g')
    dk = dk.sort_values(by=['overall'],ascending=False)
    dd = get_player(roster,'d')
    dd = dd.sort_values(by=['overall'],ascending=False)
    dm = get_player(roster,'m')
    dm = dm.sort_values(by=['overall'],ascending=False)
    df = get_player(roster,'f')
    df = df.sort_values(by=['overall'],ascending=False)
    db = pd.concat([dk[0:1],dd[0:4],dm[0:4],df[0:2]])
    db = index_reset(db)
    return db

def get_roster_overall(query,stats,team_ref,rated_forwards,rated_midfielders,rated_defenders,rated_keepers,player_info): # use team stats to get the player information
    def get_score(data,name):
        db = data[data['name'] == name]
        if db.empty:
            previous = player_info[player_info['name'] == name]
            if previous.empty:
                db = 0
            else:
                previous = previous['overall'].values
                db = previous[0]
        else:
            db = db['overall'].values
            db = db[0]
        return db
    def get_image(data,name):
        db = data[data['name'] == name]
        if db['image'].empty:
            db = 'empty.jpg'
        else:
            db = db['image'].values
            db = db[0]
        return db
    def get_link(data,name):
        db = data[data['name'] == name]
        if db['link'].empty:
            db = 'https://en.wikipedia.org/wiki/Canadian_Premier_League'
        else:
            db = db['link'].values
            db = db[0]
        return db
    def get_flag(data,name):
        db = data[data['name'] == name]
        if db['flag'].empty:
            db = 'empty.png'
        else:
            db = db['flag'].values
            db = db[0]
        return db
    roster = get_stats_all(stats,team_ref)
    roster = roster[roster['team'] == query].copy()
    roster = roster[['name','first','last','number','position']] # scale the dataframe down to what we need
    #roster.insert(3,'overall',a)
    a = []
    b = []
    c = []
    d = []
    for i in range(0,roster.shape[0]):
        if roster.iloc[i]['position'] == 'f':
            score = str(get_score(rated_forwards,roster.iloc[i]['name']))
            a.append(score[0:4])
            b.append(get_image(player_info,roster.iloc[i]['name']))
            c.append(get_flag(player_info,roster.iloc[i]['name']))
            d.append(get_link(player_info,roster.iloc[i]['name']))
        if roster.iloc[i]['position'] == 'm':
            score = str(get_score(rated_midfielders,roster.iloc[i]['name']))
            a.append(score[0:4])
            b.append(get_image(player_info,roster.iloc[i]['name']))
            c.append(get_flag(player_info,roster.iloc[i]['name']))
            d.append(get_link(player_info,roster.iloc[i]['name']))
        if roster.iloc[i]['position'] == 'd':
            score = str(get_score(rated_defenders,roster.iloc[i]['name']))
            a.append(score[0:4])
            b.append(get_image(player_info,roster.iloc[i]['name']))
            c.append(get_flag(player_info,roster.iloc[i]['name']))
            d.append(get_link(player_info,roster.iloc[i]['name']))
        if roster.iloc[i]['position'] == 'g':
            score = str(get_score(rated_keepers,roster.iloc[i]['name']))
            a.append(score[0:4])
            b.append(get_image(player_info,roster.iloc[i]['name']))
            c.append(get_flag(player_info,roster.iloc[i]['name']))
            d.append(get_link(player_info,roster.iloc[i]['name']))
    roster['overall'] = a
    roster['flag'] = c
    roster['link'] = d
    roster.insert(0,'image',b)
    #roster['image'] = b
    roster = index_reset(roster)
    roster.pop('name')
    roster = roster.sort_values(by=['overall'],ascending=False)
    return roster

def get_power_rankings(standings,standings_old,team_ref,results,previous_rankings):
    a = []
    for team in team_ref['team']:
        old_rank = previous_rankings[previous_rankings['team'] == team]
        old_rank = old_rank['rank'].values
        old_rank = old_rank[0]
        form = get_five_game_form(results,team)
        form = str(round(form.at['w',0],1))+'-'+str(form.at['l',0])+'-'+str(form.at['d',0])
        crest = team_ref[team_ref['team'] == team]
        colour = crest['colour'].values
        colour = colour[0]
        crest = crest['crest'].values
        crest = crest[0]
        
        rank1 = standings_old[standings_old['team'] == team]
        rank2 = standings[standings['team'] == team]
        
        if rank1.iloc[0]['rank'] == 1:
            bonus = 4
        elif rank1.iloc[0]['rank'] == 2:
            bonus = 3
        elif rank1.iloc[0]['rank'] == 3:
            bonus = 2
        else:
            bonus =0
        
        if standings.iloc[0]['gp'] == 0:
            bonus = 0
            
        if rank1.iloc[0]['rank'] == rank2.iloc[0]['rank']:
            change = 0
        else:
            change = (rank1.iloc[0]['rank'] - rank2.iloc[0]['rank']) * - 1
            
        if rank1.iloc[0]['gd'] == rank2.iloc[0]['gd']:
            gd_bonus = 0
        else:
            gd_bonus = (rank1.iloc[0]['gd'] - rank2.iloc[0]['gd']) * - 1
        
        if rank1.iloc[0]['ga'] == rank2.iloc[0]['ga']:
            ga_nerf = 0
        else:
            ga_nerf = (rank1.iloc[0]['ga'] - rank2.iloc[0]['ga']) * - 1
            
        if rank1.iloc[0]['w'] == rank2.iloc[0]['w']:
            w_bonus = 0
        else:
            w_bonus = (rank1.iloc[0]['w'] - rank2.iloc[0]['w']) * - 1
        
        goal_bonus = gd_bonus - ga_nerf
        change = change + bonus + goal_bonus + w_bonus
        
        a.append([team,form,old_rank,change,goal_bonus,w_bonus,crest,colour])
    power_rankings = pd.DataFrame(a,columns = ['team','form','old_rank','change','goal_bonus','w_bonus','crest','colour'])
    power_rankings = power_rankings.sort_values(by=['change'],ascending=False)
    power_rankings = index_reset(power_rankings)
    rank = power_rankings.index + 1
    power_rankings.insert(0,'rank',rank)
    power_rankings['previous'] = (power_rankings['rank'] - power_rankings['old_rank'])*-1
    return power_rankings

def get_best_eleven(team_stats,team_ref,rated_forwards,rated_midfielders,rated_defenders,rated_keepers,player_info):
    def get_image(data,name):
        db = data[data['name'] == name]
        if db['image'].empty:
            db = 'empty.jpg'
        else:
            db = db['image'].values
            db = db[0]
        return db
    def get_link(data,name):
        db = data[data['name'] == name]
        if db['link'].empty:
            db = 'https://en.wikipedia.org/wiki/Canadian_Premier_League'
        else:
            db = db['link'].values
            db = db[0]
        return db
    def get_flag(data,name):
        db = data[data['name'] == name]
        if db['flag'].empty:
            db = 'empty.png'
        else:
            db = db['flag'].values
            db = db[0]
        return db
    
    check = team_stats.describe()
    if check.loc['max']['minutes'] == 0:
        best_eleven = pd.read_csv('datasets/2019/cpl-2019-best_eleven.csv')
        #best_eleven = pd.DataFrame([['empty.jpg','empty.png',0,'NA',0,'NA','NA','https://canpl.ca/']],columns=['image','flag','number','position','overall','first','last','link'])
        #best_eleven = pd.concat([best_eleven]*11)
        return best_eleven
    else:
        roster = team_stats.copy()
        roster = roster[['name','first','last']]

        top_keeper = rated_keepers.head(1)
        top_keeper = top_keeper[['name','number','position','overall']]
        top_defenders = rated_defenders.iloc[0:3][['name','number','position','overall']]
        top_midfielders = rated_midfielders.iloc[0:5][['name','number','position','overall']]
        top_forwards = rated_forwards.iloc[0:2][['name','number','position','overall']]
        best_eleven = pd.DataFrame(columns=['name','number','position','overall'])
        best_eleven = pd.concat([best_eleven,top_keeper,top_defenders,top_midfielders,top_forwards])
        a,b,c,d,e = [],[],[],[],[]


        names = best_eleven['name'].values

        for i in range(0,best_eleven.shape[0]):
            player = roster[roster['name'] == best_eleven.iloc[i]['name']]
            player= index_reset(player)
            first = player.iloc[0]['first']
            last = player.iloc[0]['last']
            a.append(first)
            b.append(last)
            c.append(get_image(player_info,best_eleven.iloc[i]['name']))
            d.append(get_flag(player_info,best_eleven.iloc[i]['name']))
            e.append(get_link(player_info,best_eleven.iloc[i]['name']))

        best_eleven.insert(0,'image',c)
        best_eleven.insert(1,'first',a)
        best_eleven.insert(2,'last',b)
        best_eleven.insert(3,'flag',d)
        best_eleven['link'] = e
        best_eleven.pop('name')
        best_eleven = index_reset(best_eleven)
        return best_eleven
    
def get_match_tables(data,query):
    db = data[data['home'] == query]
    db = pd.concat([db,data[data['away'] == query]])
    db = db.sort_values(by=['m','d'])
    return db

def likelihood_input(array,a_list):
    b = a_list[0]
    c = a_list[1]
    d = a_list[2]
    array.append(b)
    array.append(c)
    array.append(d)
    return array

def likelihood_table(data,query):
    df = get_match_tables(data,query)
    array = []
    cols = data.columns
    for row in range(0,df.shape[0]):
        if df.iloc[row]['home'] == query:
            if df.iloc[row]['hr'] == 'W':
                array = likelihood_input(array,[[1,2,1],[1,0,0],[1,1,0]])
            if df.iloc[row]['hr'] == 'L':
                array = likelihood_input(array,[[1,2,0],[1,0,1],[1,1,0]])
            if df.iloc[row]['hr'] == 'D':
                array = likelihood_input(array,[[1,2,0],[1,0,0],[1,1,1]])
        if df.iloc[row]['away'] == query:
            if df.iloc[row]['ar'] == 'W':
                array = likelihood_input(array,[[2,2,1],[2,0,0],[2,1,0]])
            if df.iloc[row]['ar'] == 'L':
                array = likelihood_input(array,[[2,2,0],[2,0,1],[2,1,0]])
            if df.iloc[row]['ar'] == 'D':
                array = likelihood_input(array,[[2,2,1],[2,0,0],[2,1,1]])
    db= pd.DataFrame(array,columns=['h/a','w/l/d','y/n'])
    return db

def get_team_comparison(data,q1,q2):
    # getting games with q1 in both home or away
    db = data[data['team'] == q1]
    db = db.reset_index()
    db.pop('index')
    # filering down more to get only the games against q2
    db = db.sort_values(by=['m','d'])
    db = db[(db['home'] == q2) | (db['away'] == q2)]
    db = db.reset_index()
    db.pop('index')
    if db.empty == True:
        db = pd.DataFrame([(0,0,0,0,q1,'D',q2,'D','empty',q1)],columns=['d','m','hs','as','home','hr','away','ar','summary','team'])
    return db

def get_NB_data(data,query):
    db = likelihood_table(data,query)
    dy = db.pop('y/n').to_list()
    dx = [tuple(x) for x in db.values]
    return dx, dy

def get_team_history(data,query):
    df = data[data['away'] == query].copy()
    df = df[['d','m','as','hs','away','ar','home','hr']]
    df = df.rename(columns={'as':'hs','hs':'as','away':'home','ar':'hr','home':'away','hr':'ar'})
    db = data[data['home'] == query].copy()
    db = db[['d','m','hs','as','home','hr','away','ar']]
    db = pd.concat([db,df])
    db = db.tail(5)
    db = db.sort_values(by=['m','d'],ascending=False)
    return db

def get_five_game_form(data,query):
    db = get_team_history(data,query)
    db = db.pop('hr')
    a = []
    for i in db:
        if i == 'W':
            j = [1,0,0]
            a.append(j)
        if i == 'L':
            j = [0,1,0]
            a.append(j)
        if i == 'D':
            j = [0,0,1]
            a.append(j)
    db = pd.DataFrame(a,columns=['w','l','d'])
    db = pd.DataFrame(db.sum())
    return db

In [3]:
team_ref = pd.read_csv('datasets/teams.csv')

In [4]:
year = input('enter the year: ')

enter the year: 2019


In [5]:
results = pd.read_csv(f'datasets/{year}/cpl-{year}-results.csv')
stats = pd.read_csv(f'datasets/{year}/cpl-{year}-stats.csv')
player_info = pd.read_csv(f'datasets/{year}/player-{year}-info.csv')

In [6]:
if year == '2019':
    team_ref = team_ref[1:]
    results_old = results[:-7].copy()
else:
    results_old = results[results['hr'] != 'E'].copy()

In [7]:
results.head(5)

Unnamed: 0,game,s,d,m,hs,as,home,hr,away,ar,csh,csa,combined,venue,links
0,I1,0,27,4,1,1,Forge FC,D,York9 FC,D,0,0,4-27-2019 Forge FC D 1-1 D York9 FC,Tim Hortons Field,https://canpl.ca/matchcentre/4ilfbdmlp4zuj7k3c...
1,I2,0,28,4,1,0,Pacific FC,W,HFX Wanderers FC,L,1,0,4-28-2019 Pacific FC W 1-0 L HFX Wanderers FC,Westhills Stadium,https://canpl.ca/matchcentre/4itgc6bq5l5c7iv5k...
2,I3,0,1,5,1,2,Pacific FC,L,Valour FC,W,0,0,5-1-2019 Pacific FC L 1-2 W Valour FC,Westhills Stadium,https://canpl.ca/matchcentre/4j6hzym2ji5zgrm0w...
3,I4,0,4,5,2,1,HFX Wanderers FC,W,Forge FC,L,0,0,5-4-2019 HFX Wanderers FC W 2-1 L Forge FC,Wanderers Grounds,https://canpl.ca/matchcentre/4jopa68wp2k7cntkj...
4,I5,0,4,5,2,1,Cavalry FC,W,York9 FC,L,0,0,5-4-2019 Cavalry FC W 2-1 L York9 FC,ATCO Field,https://canpl.ca/matchcentre/4kj2hnlgvv3ncwouf...


In [8]:
results_old.head(5)

Unnamed: 0,game,s,d,m,hs,as,home,hr,away,ar,csh,csa,combined,venue,links
0,I1,0,27,4,1,1,Forge FC,D,York9 FC,D,0,0,4-27-2019 Forge FC D 1-1 D York9 FC,Tim Hortons Field,https://canpl.ca/matchcentre/4ilfbdmlp4zuj7k3c...
1,I2,0,28,4,1,0,Pacific FC,W,HFX Wanderers FC,L,1,0,4-28-2019 Pacific FC W 1-0 L HFX Wanderers FC,Westhills Stadium,https://canpl.ca/matchcentre/4itgc6bq5l5c7iv5k...
2,I3,0,1,5,1,2,Pacific FC,L,Valour FC,W,0,0,5-1-2019 Pacific FC L 1-2 W Valour FC,Westhills Stadium,https://canpl.ca/matchcentre/4j6hzym2ji5zgrm0w...
3,I4,0,4,5,2,1,HFX Wanderers FC,W,Forge FC,L,0,0,5-4-2019 HFX Wanderers FC W 2-1 L Forge FC,Wanderers Grounds,https://canpl.ca/matchcentre/4jopa68wp2k7cntkj...
4,I5,0,4,5,2,1,Cavalry FC,W,York9 FC,L,0,0,5-4-2019 Cavalry FC W 2-1 L York9 FC,ATCO Field,https://canpl.ca/matchcentre/4kj2hnlgvv3ncwouf...


In [9]:
results_diff = pd.concat([results, results_old]).drop_duplicates(keep=False)

In [10]:
results_diff.head(2)

Unnamed: 0,game,s,d,m,hs,as,home,hr,away,ar,csh,csa,combined,venue,links
93,I94,1,16,10,0,4,Valour FC,L,York9 FC,W,0,1,10-16-2019 Valour FC L 0-4 W York9 FC,IG Field,https://canpl.ca/matchcentre/5oyl3jwgr2padsvpk...
94,I95,1,16,10,3,1,FC Edmonton,W,Pacific FC,L,0,0,10-16-2019 FC Edmonton W 3-1 L Pacific FC,Clarke Stadium,https://canpl.ca/matchcentre/5nw6ub2q95ptd6n71...


In [11]:
schedule = get_schedule(results_diff)
schedule

Unnamed: 0,game,home,away
0,I94,Valour FC,York9 FC
1,I95,FC Edmonton,Pacific FC
2,I96,York9 FC,HFX Wanderers FC
3,I97,Cavalry FC,FC Edmonton
4,I98,Pacific FC,Valour FC


In [12]:
def get_team_files(schedule,team_ref):
    team1 = get_shortest_name(schedule.iloc[0]['home'],team_ref)
    team2 = get_shortest_name(schedule.iloc[0]['away'],team_ref)
    team3 = get_shortest_name(schedule.iloc[1]['home'],team_ref)
    team4 = get_shortest_name(schedule.iloc[1]['away'],team_ref)
    team5 = get_shortest_name(schedule.iloc[2]['home'],team_ref)
    team6 = get_shortest_name(schedule.iloc[2]['away'],team_ref)
    team7 = get_shortest_name(schedule.iloc[3]['home'],team_ref)
    team8 = get_shortest_name(schedule.iloc[3]['away'],team_ref)
    return team1, team2, team3, team4, team5, team6, team7, team8

In [13]:
team1, team2, team3, team4, team5, team6, team7, team8 = get_team_files(schedule,team_ref)

In [14]:
print(team1, team2, team3, team4, team5, team6, team7, team8)

VFC Y9 FCE PFC Y9 HFX CFC FCE


In [15]:
standings = get_standings(results,1,team_ref)
if results_old.empty == True:
    standings_old = get_standings(results,1,team_ref)
else:
    standings_old = get_standings(results_old,1,team_ref)

In [16]:
standings

Unnamed: 0,rank,team,gp,pts,tpp,ppg,w,l,d,gd,gf,ga,gfh,gah,gfa,gaa
0,1,Cavalry FC,28,62,84,2.21,19,4,5,4,37,33,37,33,0,0
1,2,Forge FC,28,56,84,2.0,17,6,5,11,41,30,41,30,0,0
2,3,York9 FC,28,34,84,1.21,9,12,7,-4,36,40,36,40,0,0
3,4,FC Edmonton,28,32,84,1.14,8,12,8,8,34,26,34,26,0,0
4,5,Pacific FC,28,31,84,1.11,8,13,7,15,48,33,48,33,0,0
5,6,Valour FC,28,28,84,1.0,8,16,4,-8,37,45,37,45,0,0
6,7,HFX Wanderers FC,28,28,84,1.0,6,12,10,22,39,17,39,17,0,0


In [17]:
standings_old

Unnamed: 0,rank,team,gp,pts,tpp,ppg,w,l,d,gd,gf,ga,gfh,gah,gfa,gaa
0,1,Cavalry FC,27,59,81,2.19,18,4,5,2,34,32,34,32,0,0
1,2,Forge FC,28,56,84,2.0,17,6,5,11,41,30,41,30,0,0
2,3,York9 FC,26,31,78,1.19,8,11,7,2,36,34,36,34,0,0
3,4,FC Edmonton,26,29,78,1.12,7,11,8,4,28,24,28,24,0,0
4,5,Valour FC,26,28,78,1.08,8,14,4,-6,35,41,35,41,0,0
5,6,Pacific FC,26,28,78,1.08,7,12,7,11,43,32,43,32,0,0
6,7,HFX Wanderers FC,27,25,81,0.93,5,12,10,24,39,15,39,15,0,0


In [18]:
compare_standings_test = compare_standings(standings,standings_old,team_ref)

In [19]:
compare_standings_test

Unnamed: 0,team,change
0,Pacific FC,1
1,Cavalry FC,0
2,FC Edmonton,0
3,Forge FC,0
4,HFX Wanderers FC,0
5,York9 FC,0
6,Valour FC,-1


In [20]:
previous_rankings = pd.read_csv(f'datasets/{year}/cpl-{year}-power_rankings.csv')
previous_rankings.to_csv(f'datasets/{year}/cpl-{year}-previous_rankings.csv',index=False)

In [21]:
power_rankings = get_power_rankings(standings,standings_old,team_ref,results,previous_rankings)
power_rankings

Unnamed: 0,rank,team,form,old_rank,change,goal_bonus,w_bonus,crest,colour,previous
0,1,Cavalry FC,2-2-1,1,6,1,1,cavalry_fc_nav.png,cpl-cfc,0
1,2,FC Edmonton,0-3-2,5,3,2,1,FC_Edmonton_nav.png,cpl-fce,3
2,3,Forge FC,2-2-1,2,3,0,0,Forge_FC_nav.png,cpl-ffc,-1
3,4,Pacific FC,0-3-2,6,3,3,1,Pacific_FC_nav.png,cpl-pfc,2
4,5,HFX Wanderers FC,1-2-2,3,-3,-4,1,HFX_Wanderers_FC.png,cpl-hfx,-2
5,6,Valour FC,1-3-1,7,-5,-6,0,Valour_FC_nav.png,cpl-vfc,1
6,7,York9 FC,3-2-0,4,-9,-12,1,York_9_FC_nav.png,cpl-y9,-3


In [22]:
game_week, goals, big_win, top_result, low_result,other_result = get_weeks_results(results[results['s'] <= 1],standings,team_ref)
game_week

Unnamed: 0,game,s,d,m,hs,as,home,hr,away,ar,csh,csa,combined,venue,links
97,I98,1,19,10,2,0,Pacific FC,W,Valour FC,L,1,0,10-19-2019 Pacific FC W 2-0 L Valour FC,Westhills Stadium,https://canpl.ca/matchcentre/5opkqc01qgmjdcbho...
96,I97,1,19,10,3,1,Cavalry FC,W,FC Edmonton,L,0,0,10-19-2019 Cavalry FC W 3-1 L FC Edmonton,ATCO Field,https://canpl.ca/matchcentre/5nw6ub2q95ptd6n71...
95,I96,1,19,10,0,2,York9 FC,L,HFX Wanderers FC,W,0,1,10-19-2019 York9 FC L 0-2 W HFX Wanderers FC,York Lions Stadium,https://canpl.ca/matchcentre/5opkqc01qgmjdcbho...
94,I95,1,16,10,3,1,FC Edmonton,W,Pacific FC,L,0,0,10-16-2019 FC Edmonton W 3-1 L Pacific FC,Clarke Stadium,https://canpl.ca/matchcentre/5nw6ub2q95ptd6n71...
93,I94,1,16,10,0,4,Valour FC,L,York9 FC,W,0,1,10-16-2019 Valour FC L 0-4 W York9 FC,IG Field,https://canpl.ca/matchcentre/5oyl3jwgr2padsvpk...
92,I93,1,16,10,1,0,Forge FC,W,Cavalry FC,L,1,0,10-16-2019 Forge FC W 1-0 L Cavalry FC,Tim Hortons Field,https://canpl.ca/matchcentre/5nossz36w45wbtz1z...


In [23]:
#championship = get_standings(results,2)
#championship = championship[championship['gp'] > 1]
#championship

In [24]:
#championship.to_csv(f'datasets/{year}/cpl-{year}-championship.csv',index=False)

In [25]:
standings.to_csv(f'datasets/{year}/cpl-{year}-standings.csv',index=False)

In [26]:
results_brief = get_results_brief(results,team_ref)

In [27]:
results_brief.head(5)

Unnamed: 0,d,m,hs,as,home,hr,away,ar,summary,team
0,4,5,2,1,Cavalry FC,W,York9 FC,L,W H 2 - 1 Y9,Cavalry FC
1,8,5,1,0,Cavalry FC,W,Valour FC,L,W H 1 - 0 VFC,Cavalry FC
2,12,5,1,2,Forge FC,L,Cavalry FC,W,W A 1 - 2 FFC,Cavalry FC
3,18,5,1,0,Cavalry FC,W,FC Edmonton,L,W H 1 - 0 FCE,Cavalry FC
4,25,5,2,0,Cavalry FC,W,HFX Wanderers FC,L,W H 2 - 0 HFX,Cavalry FC


In [28]:
#results_brief.to_csv(f'datasets/{year}/cpl-{year}-results_brief.csv',index=False)

In [29]:
stats.head(5)

Unnamed: 0,game,team,position,number,name,first,last,minutes,touches,passes,...,clearances,interceptions,yellow,red,f-won,f-conceded,shots faced,saves,claimed crosses,cs
0,I1,Forge FC,m,1,Alexander Achinioti-Jönsson,Alexander,Achinioti-Jönsson,77.0,47.0,41.0,...,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,I1,Forge FC,d,16,Bertrand Owundi,Bertrand,Owundi,77.0,69.0,56.0,...,5.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,I1,Forge FC,f,13,Chris Nanco,Christopher,Nanco,56.0,42.0,27.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
3,I1,Forge FC,d,22,Dominic Samuel,Dominic,Samuel,90.0,59.0,48.0,...,5.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
4,I1,Forge FC,m,6,Elimane Cissé,Elimane,Cissé,90.0,75.0,61.0,...,1.0,5.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0


In [30]:
stats.describe()

Unnamed: 0,number,minutes,touches,passes,pass-acc,crosses,cross-acc,assists,chances,goals,...,clearances,interceptions,yellow,red,f-won,f-conceded,shots faced,saves,claimed crosses,cs
count,2735.0,2735.0,2735.0,2735.0,2735.0,2735.0,2735.0,2735.0,2735.0,2735.0,...,2735.0,2735.0,2735.0,2735.0,2735.0,2735.0,2735.0,2735.0,2735.0,2735.0
mean,13.82925,71.540768,44.576965,30.244241,0.73657,1.116271,0.093119,0.060695,0.628154,0.087751,...,1.210238,0.790128,0.112249,0.006216,0.938574,0.995978,0.318099,0.227422,0.036563,0.019744
std,11.355872,28.545983,23.277512,18.645196,0.233204,1.904354,0.235873,0.246352,0.98912,0.311297,...,1.859761,1.13105,0.31573,0.078609,1.119698,1.125445,1.295603,0.956352,0.251072,0.139145
min,1.0,1.0,0.0,0.0,-1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,6.0,61.0,28.0,16.0,0.67,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,12.0,90.0,45.0,29.0,0.78,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0
75%,19.0,90.0,61.0,42.0,0.85,2.0,0.0,0.0,1.0,0.0,...,2.0,1.0,0.0,0.0,2.0,2.0,0.0,0.0,0.0,0.0
max,92.0,90.0,118.0,112.0,1.0,15.0,1.0,2.0,7.0,3.0,...,11.0,7.0,1.0,1.0,6.0,7.0,13.0,10.0,4.0,1.0


In [49]:
team_stats = get_stats_all(stats,team_ref)

In [50]:
team_stats

Unnamed: 0,team,name,number,position,first,last,minutes,touches,passes,pass-acc,...,clearances,interceptions,yellow,red,f-won,f-conceded,shots faced,saves,claimed crosses,cs
0,CFC,Aribim Pepple,24,f,Aribim,Pepple,60.0,21.0,9.0,1.00,...,0.0,0.0,0.0,0.0,1.0,4.0,0.0,0.0,0.0,0.0
1,CFC,Carlos Patiño,20,m,Carlos,Patiño,320.0,238.0,122.0,0.88,...,1.0,5.0,0.0,0.0,13.0,12.0,0.0,0.0,0.0,0.0
2,CFC,Dean Northover,12,d,Dean,Northover,609.0,478.0,255.0,0.67,...,10.0,15.0,2.0,1.0,14.0,13.0,0.0,0.0,0.0,0.0
3,CFC,Dominick Zator,4,d,Dominick,Zator,2335.0,1866.0,1402.0,0.83,...,75.0,51.0,0.0,0.0,19.0,15.0,0.0,0.0,0.0,0.0
4,CFC,Dominique Malonga,19,f,Dominique,Malonga,1871.0,760.0,489.0,0.80,...,12.0,2.0,1.0,0.0,31.0,8.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
159,Y9,Ryan Telfer,18,m,Ryan,Telfer,2216.0,1533.0,832.0,0.60,...,8.0,23.0,2.0,0.0,45.0,25.0,0.0,0.0,0.0,0.0
160,Y9,Simon Karlsson Adjei,12,f,Simon,Karlsson Adjei,1422.0,575.0,341.0,0.88,...,6.0,2.0,0.0,0.0,18.0,18.0,0.0,0.0,0.0,0.0
161,Y9,Stefan Lamanna,21,f,Stefan,Lamanna,77.0,55.0,31.0,0.86,...,0.0,0.0,0.0,0.0,2.0,1.0,0.0,0.0,0.0,0.0
162,Y9,Steven Furlano,77,d,Steven,Furlano,517.0,325.0,225.0,0.85,...,6.0,10.0,4.0,0.0,4.0,15.0,0.0,0.0,0.0,0.0


In [33]:
team_stats.describe()

Unnamed: 0,number,minutes,touches,passes,pass-acc,crosses,cross-acc,assists,chances,goals,...,clearances,interceptions,yellow,red,f-won,f-conceded,shots faced,saves,claimed crosses,cs
count,164.0,164.0,164.0,164.0,164.0,164.0,164.0,164.0,164.0,164.0,...,164.0,164.0,164.0,164.0,164.0,164.0,164.0,164.0,164.0,164.0
mean,15.0,1193.073171,743.402439,504.378049,0.734939,18.615854,0.06189,1.012195,10.47561,1.463415,...,20.182927,13.176829,1.871951,0.103659,15.652439,16.609756,5.304878,3.792683,0.609756,0.329268
std,12.78947,694.973598,500.156058,367.663695,0.224651,28.796174,0.200023,1.356592,11.323168,2.509834,...,25.700793,13.268319,1.796914,0.305751,13.363073,12.806015,19.381662,13.906557,2.492965,1.325122
min,1.0,9.0,10.0,3.0,-1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,7.0,560.0,322.5,198.75,0.67,1.0,0.0,0.0,2.0,0.0,...,3.0,2.0,0.0,0.0,5.0,7.0,0.0,0.0,0.0,0.0
50%,13.0,1243.0,664.0,453.5,0.77,6.0,0.0,0.0,7.0,0.0,...,9.0,9.0,2.0,0.0,12.0,14.0,0.0,0.0,0.0,0.0
75%,20.0,1775.0,1134.75,824.25,0.86,22.0,0.0,2.0,16.0,2.0,...,27.25,21.0,3.0,0.0,23.25,24.0,0.0,0.0,0.0,0.0
max,92.0,2520.0,1985.0,1536.0,1.0,164.0,1.0,5.0,66.0,13.0,...,117.0,51.0,9.0,1.0,57.0,62.0,122.0,88.0,17.0,9.0


In [34]:
rated_goalscorers = top_tracked(team_stats,'goals')
rated_assists = top_tracked(team_stats,'assists')

In [35]:
rated_assists

Unnamed: 0,rank,team,name,position,number,minutes,assists
0,1,Pacific FC,Blake Smith,d,4,2004,5
1,2,Valour FC,Michael Petrasso,m,9,1301,5
2,3,Pacific FC,Ben Fisk,m,21,1662,5
3,4,Forge FC,Tristan Borges,m,4,1884,5
4,5,Forge FC,Kyle Bekker,m,3,2354,5
...,...,...,...,...,...,...,...
74,75,Cavalry FC,Nik Ledgerwood,m,6,1227,1
75,76,Cavalry FC,Nathan Mavila,d,3,1952,1
76,77,York9 FC,Emmanuel Zambazis,m,14,277,1
77,78,Forge FC,Jonathan Grant,d,8,822,1


In [36]:
rated_assists.describe()

Unnamed: 0,rank,number,minutes,assists
count,79.0,79.0,79.0,79.0
mean,40.0,13.632911,1405.696203,2.101266
std,22.949219,11.626867,603.860728,1.236153
min,1.0,1.0,77.0,1.0
25%,20.5,6.5,994.5,1.0
50%,40.0,11.0,1530.0,2.0
75%,59.5,18.0,1877.5,3.0
max,79.0,77.0,2381.0,5.0


In [37]:
rated_goalscorers.describe()

Unnamed: 0,rank,number,minutes,goals
count,76.0,76.0,76.0,76.0
mean,38.5,13.578947,1423.894737,3.157895
std,22.083176,7.180089,637.472647,2.875425
min,1.0,2.0,137.0,1.0
25%,19.75,8.75,1010.25,1.0
50%,38.5,12.5,1490.0,2.0
75%,57.25,17.25,1926.5,4.25
max,76.0,45.0,2520.0,13.0


In [38]:
'''rated_g10 = rated_goalscorers.head(10)
rated_g10 = rated_g10[['rank','team','name','position','goals']]
rated_g10'''

"rated_g10 = rated_goalscorers.head(10)\nrated_g10 = rated_g10[['rank','team','name','position','goals']]\nrated_g10"

In [55]:
rated_forwards = top_position(team_stats,'f')
rated_midfielders = top_position(team_stats,'m')
rated_defenders = top_position(team_stats,'d')
rated_keepers = top_position(team_stats,'g')
rated_offenders = top_offenders(team_stats)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [56]:
rated_forwards.head(10)

Unnamed: 0,team,name,number,position,minutes,goals,chances,assists,shots,s-target,passes,crosses,duels,tackles,overall
0,PFC,Terran Campbell,14,f,2334,11,31,2,53,23,569,29,324,39,0.8
1,HFX,Mohamed Kourouma,12,f,1988,1,44,3,68,17,702,156,264,56,0.77
2,CFC,Nico Pasquotti,17,f,1853,5,24,3,33,16,457,61,374,47,0.73
3,Y9,Rodrigo Gattas,22,f,1958,9,23,0,77,36,728,38,214,17,0.7
4,CFC,Dominique Malonga,19,f,1871,11,19,1,68,34,489,5,185,2,0.64
5,FCE,Oumar Diouck,45,f,1977,6,30,2,45,16,583,53,269,36,0.6
6,FCE,Easton Ongaro,19,f,1167,10,11,2,38,20,249,6,188,5,0.52
7,FFC,Anthony Novak,23,f,1027,6,14,3,24,13,235,3,169,10,0.48
8,Y9,Simon Karlsson Adjei,12,f,1422,7,21,2,53,16,341,4,210,15,0.47
9,CFC,Oliver Minatel,7,f,1450,7,17,1,37,12,348,21,194,29,0.43


In [57]:
rated_midfielders.head(10)

Unnamed: 0,team,name,number,position,minutes,goals,assists,touches,passes,pass-acc,crosses,cross-acc,chances,duels,tackles,overall
0,VFC,Marco Bustos,22,m,2249,7,3,1417,912,0,74,0,43,299,22,0.91
1,FFC,Tristan Borges,4,m,1884,13,5,1307,680,0,77,0,42,356,59,0.9
2,CFC,Julian Büscher,8,m,1678,5,4,1432,971,0,113,0,38,282,57,0.87
3,Y9,Ryan Telfer,18,m,2216,8,1,1533,832,0,164,0,66,372,52,0.79
4,FFC,Kyle Bekker,3,m,2354,4,5,1748,1362,0,98,0,44,173,23,0.77
5,PFC,Victor Blasco,19,m,1586,6,3,1040,589,0,47,0,27,349,43,0.72
6,PFC,Ben Fisk,21,m,1662,6,5,859,546,0,65,0,29,153,28,0.69
7,CFC,Elijah Adekugbe,16,m,1575,1,4,1185,949,0,10,0,12,196,42,0.67
8,CFC,Sergio Camargo,10,m,1171,6,3,580,359,0,20,0,22,134,16,0.67
9,Y9,Kyle Porter,19,m,1916,2,4,1131,729,0,88,0,31,216,47,0.65


In [58]:
rated_defenders.head(10)

Unnamed: 0,team,name,number,position,minutes,tackles,t-won,clearances,interceptions,duels,d-won,overall
0,FCE,Mélé Temguia,8,d,2430,61,41,82,51,244,140,0.96
1,Y9,Luca Gasparotto,13,d,2520,31,24,117,49,254,182,0.93
2,HFX,André Bona,3,d,2053,66,50,76,32,257,150,0.93
3,CFC,Dominick Zator,4,d,2335,46,32,75,51,265,169,0.92
4,Y9,Diyaeddine Abzi,20,d,1814,48,35,31,48,267,134,0.92
5,FFC,Kwame Awuah,2,d,1885,57,40,17,39,258,124,0.9
6,Y9,Morey Doner,3,d,2250,40,27,35,32,239,145,0.86
7,PFC,Kadin Chung,7,d,2032,57,42,42,31,203,111,0.79
8,HFX,Ndzemdzela Langwa,20,d,1281,65,47,20,14,272,154,0.77
9,CFC,Nathan Mavila,3,d,1952,51,32,42,20,257,134,0.75


In [59]:
rated_keepers

Unnamed: 0,team,name,number,position,minutes,cs,saves,shots faced,claimed crosses,overall
0,Y9,Nathan Ingham,1,g,2340,6,88,122,17,0.93
1,CFC,Marco Carducci,1,g,2160,9,65,81,11,0.79
2,FFC,Triston Henry,9,g,1890,8,53,70,12,0.71
3,FCE,Connor James,1,g,1980,6,67,91,9,0.7
4,VFC,Tyson Farago,1,g,1440,2,47,77,13,0.55
5,HFX,Christian Oxner,50,g,1489,5,41,56,10,0.54
6,PFC,Mark Village,1,g,1440,4,47,69,10,0.54
7,PFC,Nolan Wirth,6,g,1080,1,59,83,6,0.45
8,VFC,Mathias Janssens,26,g,1080,4,43,65,0,0.38
9,HFX,Jan-Michael Williams,21,g,1028,2,40,59,4,0.36


In [60]:
rated_offenders.head(2)

Unnamed: 0,team,name,position,number,minutes,yellow,red,f-conceded,overall
0,Y9,Manny Aparicio,m,10,2159,5,1,56,0.82
1,FFC,Dominic Samuel,d,22,2044,5,1,38,0.74


In [45]:
best_eleven = get_best_eleven(team_stats,team_ref,rated_forwards,rated_midfielders,rated_defenders,rated_keepers,player_info)
best_eleven

Unnamed: 0,image,first,last,flag,number,position,overall,link
0,Nathan-Ingham.jpg,Nathan,Ingham,Canada.png,1,g,0.93,https://en.wikipedia.org/wiki/Nathan_Ingham
1,Mélé-Temguia.jpg,Mélé,Temguia,Germany.png,8,d,0.96,https://en.wikipedia.org/wiki/Mélé_Temguia
2,Luca-Gasparotto.jpg,Luca,Gasparotto,Canada.png,13,d,0.92,https://en.wikipedia.org/wiki/Luca_Gasparotto
3,André-Bona.jpg,André,Bona,France.png,3,d,0.92,https://en.wikipedia.org/wiki/André_Bona
4,Marco-Bustos.jpg,Marco,Bustos,Canada.png,22,m,0.9,https://en.wikipedia.org/wiki/Marco_Bustos
5,Tristan-Borges.jpg,Tristan,Borges,Canada.png,4,m,0.89,https://en.wikipedia.org/wiki/Tristan_Borges
6,Julian-Buscher.jpg,Julian,Büscher,Germany.png,8,m,0.87,https://en.wikipedia.org/wiki/Julian_Büscher
7,Ryan-Telfer.jpg,Ryan,Telfer,Canada.png,18,m,0.78,https://en.wikipedia.org/wiki/Ryan_Telfer
8,Kyle-Bekker.jpg,Kyle,Bekker,Canada.png,3,m,0.77,https://en.wikipedia.org/wiki/Kyle_Bekker
9,Terran-Campbell.jpg,Terran,Campbell,Canada.png,14,f,0.79,https://en.wikipedia.org/wiki/Terran_Campbell


In [46]:
def get_match_tables(data,query):
    db = data[data['home'] == query]
    db = pd.concat([db,data[data['away'] == query]])
    db = db.sort_values(by=['m','d'])
    return db

def likelihood_input(array,a_list):
    b = a_list[0]
    c = a_list[1]
    d = a_list[2]
    array.append(b)
    array.append(c)
    array.append(d)
    return array

def likelihood_table(data,query):
    df = get_match_tables(data,query)
    array = []
    cols = data.columns
    for row in range(0,df.shape[0]):
        if df.iloc[row]['home'] == query:
            if df.iloc[row]['hr'] == 'W':
                array = likelihood_input(array,[[1,2,1],[1,0,0],[1,1,0]])
            if df.iloc[row]['hr'] == 'L':
                array = likelihood_input(array,[[1,2,0],[1,0,1],[1,1,0]])
            if df.iloc[row]['hr'] == 'D':
                array = likelihood_input(array,[[1,2,0],[1,0,0],[1,1,1]])
        if df.iloc[row]['away'] == query:
            if df.iloc[row]['ar'] == 'W':
                array = likelihood_input(array,[[2,2,1],[2,0,0],[2,1,0]])
            if df.iloc[row]['ar'] == 'L':
                array = likelihood_input(array,[[2,2,0],[2,0,1],[2,1,0]])
            if df.iloc[row]['ar'] == 'D':
                array = likelihood_input(array,[[2,2,1],[2,0,0],[2,1,1]])
    db= pd.DataFrame(array,columns=['h/a','w/l/d','y/n'])
    return db

def get_team_comparison(data,q1,q2):
    # getting games with q1 in both home or away
    db = data[data['team'] == q1]
    db = db.reset_index()
    db.pop('index')
    # filering down more to get only the games against q2
    db = db.sort_values(by=['m','d'])
    db = db[(db['home'] == q2) | (db['away'] == q2)]
    db = db.reset_index()
    db.pop('index')
    if db.empty == True:
        db = pd.DataFrame([(0,0,0,0,q1,'D',q2,'D','empty',q1)],columns=['d','m','hs','as','home','hr','away','ar','summary','team'])
    return db

def get_NB_data(data,query):
    db = likelihood_table(data,query)
    dy = db.pop('y/n').to_list()
    dx = [tuple(x) for x in db.values]
    return dx, dy

In [47]:
# home side
q1 = schedule.iloc[3]['home']
# away side
q2 = schedule.iloc[3]['away']
print(q1,q2)

HFX Wanderers FC Valour FC


In [48]:
compare = get_team_comparison(results_brief,q1,q2)

In [49]:
compare

Unnamed: 0,d,m,hs,as,home,hr,away,ar,summary,team
0,7,1,0,0,HFX Wanderers FC,E,Valour FC,E,E H 0 - 0 VFC,HFX Wanderers FC
1,7,1,0,0,HFX Wanderers FC,E,Valour FC,E,E H 0 - 0 VFC,HFX Wanderers FC
2,7,1,0,0,Valour FC,E,HFX Wanderers FC,E,E A 0 - 0 VFC,HFX Wanderers FC
3,7,1,0,0,Valour FC,E,HFX Wanderers FC,E,E A 0 - 0 VFC,HFX Wanderers FC


In [50]:
t1_x, t1_y = get_NB_data(compare,q1)
t2_x, t2_y = get_NB_data(compare,q2)

### Game Prediction

use Gaussian Naive Bayes model to predict the outcome of a home match.

In [51]:
#Import Gaussian Naive Bayes model
from sklearn.naive_bayes import GaussianNB,BernoulliNB
import statistics

In [52]:
def get_gnb_prediction(query,x,y,result):
    
    gnb = GaussianNB()
    bnb = BernoulliNB()
    # Train the model using the training sets
    
    gnb.fit(x,y)
    bnb.fit(x,y)
    
    # use below instead of predicted = model.predict([result]) because we want the probability
    gnb_pred = np.round(gnb.predict_proba([result])[:, 1],decimals=2)
    bnb_pred = np.round(bnb.predict_proba([result])[:, 1],decimals=2)
    
    pred = round((gnb_pred[0] + bnb_pred[0]) / 2,2)
    #print(gnb_pred[0], bnb_pred[0], pred)
    
    return pred

def get_match_prediction_result(query,x,y,array):
    prediction = get_gnb_prediction(query,x,y,array)
    return prediction

def get_match_prediction(q1,q2,x1,y1,x2,y2):
    if len(x1) == 0:
        x = round(1/3,2)
        home_win, away_win,draw = x,x,x
        return home_win,away_win,draw
    home_win = get_match_prediction_result(q1,x1,y1,[1,2])
    draw = get_match_prediction_result(q1,x1,y1,[1,1])
    away_win = get_match_prediction_result(q2,x2,y2,[2,2])
    return home_win, draw, away_win

In [53]:
home_win, draw, away_win = get_match_prediction(q1,q2,t1_x,t1_y,t2_x,t2_y)

In [54]:
print(q1,'\nwin probability: ', round(home_win,2))

HFX Wanderers FC 
win probability:  0.33


In [55]:
print(q2,'\nwin probability: ', round(away_win,2))

Valour FC 
win probability:  0.33


In [56]:
print('Draw probability: ', round(draw,2))

Draw probability:  0.33


In [57]:
round(home_win + draw + away_win,1)

1.0

In [58]:
team_form_results = get_form_results(results,team_ref)
team_form_results

Unnamed: 0,index,0,1,2,3,4,5,6,7,8,...,18,19,20,21,22,23,24,25,26,27
0,Atletico Ottawa,E H 0 - 0 HFX,E H 0 - 0 PFC,E H 0 - 0 Y9,E H 0 - 0 CFC,E H 0 - 0 Y9,E H 0 - 0 CFC,E H 0 - 0 FFC,E H 0 - 0 VFC,E H 0 - 0 FCE,...,E A 0 - 0 HFX,E A 0 - 0 FCE,E A 0 - 0 FFC,E A 0 - 0 PFC,E A 0 - 0 VFC,E A 0 - 0 CFC,E A 0 - 0 Y9,E A 0 - 0 FCE,E A 0 - 0 PFC,E A 0 - 0 CFC
1,Cavalry FC,E H 0 - 0 Y9,E H 0 - 0 PFC,E H 0 - 0 Y9,E H 0 - 0 VFC,E H 0 - 0 VFC,E H 0 - 0 FFC,E H 0 - 0 FCE,E H 0 - 0 AO,E H 0 - 0 HFX,...,E A 0 - 0 AO,E A 0 - 0 HFX,E A 0 - 0 Y9,E A 0 - 0 FCE,E A 0 - 0 AO,E A 0 - 0 VFC,E A 0 - 0 Y9,E A 0 - 0 VFC,E A 0 - 0 PFC,E A 0 - 0 FFC
2,FC Edmonton,E H 0 - 0 CFC,E H 0 - 0 FFC,E H 0 - 0 PFC,E H 0 - 0 AO,E H 0 - 0 VFC,E H 0 - 0 HFX,E H 0 - 0 VFC,E H 0 - 0 CFC,E H 0 - 0 AO,...,E A 0 - 0 FFC,E A 0 - 0 HFX,E A 0 - 0 CFC,E A 0 - 0 PFC,E A 0 - 0 VFC,E A 0 - 0 AO,E A 0 - 0 CFC,E A 0 - 0 AO,E A 0 - 0 FFC,E A 0 - 0 HFX
3,Forge FC,E H 0 - 0 CFC,E H 0 - 0 AO,E H 0 - 0 HFX,E H 0 - 0 VFC,E H 0 - 0 FCE,E H 0 - 0 AO,E H 0 - 0 PFC,E H 0 - 0 Y9,E H 0 - 0 PFC,...,E A 0 - 0 CFC,E A 0 - 0 VFC,E A 0 - 0 VFC,E A 0 - 0 Y9,E A 0 - 0 AO,E A 0 - 0 CFC,E A 0 - 0 FCE,E A 0 - 0 HFX,E A 0 - 0 AO,E A 0 - 0 PFC
4,HFX Wanderers FC,E H 0 - 0 AO,E H 0 - 0 CFC,E H 0 - 0 FFC,E H 0 - 0 AO,E H 0 - 0 FCE,E H 0 - 0 CFC,E H 0 - 0 Y9,E H 0 - 0 PFC,E H 0 - 0 PFC,...,E A 0 - 0 VFC,E A 0 - 0 FCE,E A 0 - 0 VFC,E A 0 - 0 CFC,E A 0 - 0 Y9,E A 0 - 0 PFC,E A 0 - 0 FFC,E A 0 - 0 AO,E A 0 - 0 FCE,E A 0 - 0 CFC
5,Pacific FC,E H 0 - 0 FCE,E H 0 - 0 HFX,E H 0 - 0 VFC,E H 0 - 0 FFC,E H 0 - 0 CFC,E H 0 - 0 VFC,E H 0 - 0 AO,E H 0 - 0 FCE,E H 0 - 0 Y9,...,E A 0 - 0 Y9,E A 0 - 0 FFC,E A 0 - 0 HFX,E A 0 - 0 FFC,E A 0 - 0 HFX,E A 0 - 0 FCE,E A 0 - 0 CFC,E A 0 - 0 VFC,E A 0 - 0 Y9,E A 0 - 0 AO
6,Valour FC,E H 0 - 0 AO,E H 0 - 0 FCE,E H 0 - 0 HFX,E H 0 - 0 PFC,E H 0 - 0 FFC,E H 0 - 0 AO,E H 0 - 0 FFC,E H 0 - 0 HFX,E H 0 - 0 FCE,...,E A 0 - 0 PFC,E A 0 - 0 CFC,E A 0 - 0 Y9,E A 0 - 0 FCE,E A 0 - 0 FCE,E A 0 - 0 AO,E A 0 - 0 HFX,E A 0 - 0 FFC,E A 0 - 0 HFX,E A 0 - 0 AO
7,York9 FC,E H 0 - 0 HFX,E H 0 - 0 VFC,E H 0 - 0 FCE,E H 0 - 0 AO,E H 0 - 0 FCE,E H 0 - 0 FFC,E H 0 - 0 VFC,E H 0 - 0 PFC,E H 0 - 0 CFC,...,E A 0 - 0 AO,E A 0 - 0 FFC,E A 0 - 0 PFC,E A 0 - 0 FCE,E A 0 - 0 VFC,E A 0 - 0 FFC,E A 0 - 0 HFX,E A 0 - 0 FCE,E A 0 - 0 VFC,E A 0 - 0 PFC


forwards_19 = pd.read_csv('datasets/2019/cpl-2019-forwards.csv')
midfielders_19 = pd.read_csv('datasets/2019/cpl-2019-midfielders.csv')
defenders_19 = pd.read_csv('datasets/2019/cpl-2019-defenders.csv')
keepers_19 = pd.read_csv('datasets/2019/cpl-2019-keepers.csv')

midfielders_19.head(2)

In [59]:
def update_player_info(player_info,rated_forwards,rated_midfielders,rated_defenders,rated_keepers):
    def get_player_score(data,name):
        name = [name]
        if data[data['name'].isin(name)].empty:
            pass
        else:
            overall = data[data['name'].isin(name)]
            new_overall = overall['overall'].values
            return new_overall
    combine = [rated_forwards,rated_midfielders,rated_defenders,rated_keepers]
    names = player_info['name'].values
    a = []
    for name in names:
        j = 1
        for i in range(0,4):
            score = get_player_score(combine[i],name)
            if score == None:
                j += 1
                pass
            if score != None:
                overall = score[0]
                a.append(overall)
            if j == 5:
                overall = 0.0
                a.append(overall)
    player_info['overall'] = a
    return player_info

In [60]:
#player_info = update_player_info(player_info,forwards_19,midfielders_19,defenders_19,keepers_19)

In [61]:
standings.to_csv(f'datasets/{year}/cpl-{year}-standings.csv',index=False)
#championship.to_csv(f'datasets/{year}/cpl-{year}-championship.csv',index=False)
power_rankings.to_csv(f'datasets/{year}/cpl-{year}-power_rankings.csv',index=False)
results_brief.to_csv(f'datasets/{year}/cpl-{year}-results_brief.csv',index=False)
schedule.to_csv(f'datasets/{year}/cpl-{year}-schedule.csv',index=False)
team_stats.to_csv(f'datasets/{year}/cpl-{year}-team_stats.csv',index=False)

In [61]:
rated_forwards.to_csv(f'datasets/{year}/cpl-{year}-forwards.csv',index=False)
rated_midfielders.to_csv(f'datasets/{year}/cpl-{year}-midfielders.csv',index=False)
rated_defenders.to_csv(f'datasets/{year}/cpl-{year}-defenders.csv',index=False)
rated_keepers.to_csv(f'datasets/{year}/cpl-{year}-keepers.csv',index=False)
rated_offenders.to_csv(f'datasets/{year}/cpl-{year}-discipline.csv',index=False)
rated_goalscorers.to_csv(f'datasets/{year}/cpl-{year}-rated_goalscorers.csv',index=False)
rated_assists.to_csv(f'datasets/{year}/cpl-{year}-rated_assists.csv',index=False)

In [30]:
team_form_results.to_csv(f'datasets/{year}/cpl-{year}-team_form.csv',index=False)
#h1_roster.to_csv(f'datasets/{year}/teams/cpl-{year}-{q1}_final_scores.csv')
team_form_results.to_csv(f'datasets/{year}/cpl-{year}-team_form.csv',index=False)
best_eleven.to_csv(f'datasets/{year}/cpl-{year}-best_eleven.csv',index=False)
player_info.to_csv(f'datasets/{year}/player-{year}-info.csv',index=False)

"team_stats.to_csv(f'datasets/{year}/cpl-{year}-team_stats.csv',index=False)\nrated_forwards.to_csv(f'datasets/{year}/cpl-{year}-forwards.csv',index=False)\nrated_midfielders.to_csv(f'datasets/{year}/cpl-{year}-midfielders.csv',index=False)\nrated_defenders.to_csv(f'datasets/{year}/cpl-{year}-defenders.csv',index=False)\nrated_keepers.to_csv(f'datasets/{year}/cpl-{year}-keepers.csv',index=False)\nrated_offenders.to_csv(f'datasets/{year}/cpl-{year}-discipline.csv',index=False)\nrated_goalscorers.to_csv(f'datasets/{year}/cpl-{year}-rated_goalscorers.csv',index=False)\nrated_assists.to_csv(f'datasets/{year}/cpl-{year}-rated_assists.csv',index=False)\nteam_form_results.to_csv(f'datasets/{year}/cpl-{year}-team_form.csv',index=False)\n#h1_roster.to_csv(f'datasets/{year}/teams/cpl-{year}-{q1}_final_scores.csv')\nteam_form_results.to_csv(f'datasets/{year}/cpl-{year}-team_form.csv',index=False)\nbest_eleven.to_csv(f'datasets/{year}/cpl-{year}-best_eleven.csv',index=False)\nplayer_info.to_csv(f'

In [67]:
# home side
q1 = schedule.iloc[3]['home']
# away side
q2 = schedule.iloc[3]['away']
print(q1,q2)

Cavalry FC FC Edmonton


In [64]:
game_info = schedule[schedule['home'] == q1]
game_info = game_info[game_info['away'] == q2]
game_info

Unnamed: 0,game,home,away
100,II101,HFX Wanderers FC,Valour FC
83,II84,HFX Wanderers FC,Valour FC


In [65]:
game_h = get_home_away_comparison(stats,game_info,q1)
game_a = get_home_away_comparison(stats,game_info,q2)

  res_values = method(rvalues)


In [66]:
h1_roster = get_compare_roster(results,q1,team_stats,team_ref,rated_forwards,rated_midfielders,rated_defenders,rated_keepers,player_info)
h1_roster

Unnamed: 0,name,number,position,overall
0,Christian Oxner,50,g,0.54
1,Peter Schaale,2,d,0.62
2,Chrisnovic N'sa,6,d,0.41
3,Alex DeCarolis,24,d,0.4
4,Jems Geffrard,3,d,0.0
5,Louis Béland-Goyette,5,m,0.64
6,Andre Rampersad,18,m,0.57
7,Scott Firth,15,m,0.1
8,Omar Kreim,8,m,0.0
9,Akeem Garcia,11,f,0.47


In [78]:
h2_roster = get_compare_roster(results,q2,team_stats,team_ref,rated_forwards,rated_midfielders,rated_defenders,rated_keepers,player_info)
h2_roster

  res_values = method(rvalues)


Unnamed: 0,name,number,position,overall
0,Nacho Zabal,0,g,0.0
1,Brandon John,0,d,0.0
2,Vashon Neufville,0,d,0.0
3,Ben Fisk,0,m,0.69
4,Ajay Khabra,0,m,0.35
5,Malyk Hamilton,0,m,0.22
6,Antoine Coupland,0,m,0.0
7,Kunle Dada-Luke,0,f,0.0


In [66]:
team1_history = get_team_history(results,q1)
team1_history

Unnamed: 0,d,m,hs,as,home,hr,away,ar
7,4,6,0,0,Forge FC,E,HFX Wanderers FC,E
0,25,5,0,0,Forge FC,E,Cavalry FC,E


In [67]:
team1_history = get_five_game_form(results,q1)

In [68]:
team1_history

Unnamed: 0,0
w,0.0
l,0.0
d,0.0


In [69]:
for i in team1_history:
    if i == 'W':
        print(3)
    if i == 'L':
        print(0)
    if i == 'D':
        print(1)

In [70]:
team2_history = get_team_history(results,q2)
team2_history

Unnamed: 0,d,m,hs,as,home,hr,away,ar
6,4,6,0,0,Cavalry FC,E,Pacific FC,E
0,25,5,0,0,Cavalry FC,E,Forge FC,E
