In [1]:
import pandas as pd

In [2]:
# Get Fanfooty match data with a row for each player (including match notes and injury tags)
df_fanfooty_player_raw = pd.read_csv(r'inputs/fanfooty_raw.csv', low_memory=False)

# Pre-Processing
## Full Name
df_fanfooty_player_raw['Full Name'] = df_fanfooty_player_raw['First Name'] + ' ' + df_fanfooty_player_raw['Surname']

## Set standard Team Names
df_team_names = pd.read_csv(r'Inputs/all_sources_team_names.csv')
team_mapping = dict(df_team_names[['fanfooty_team_name', 'footywire_team_name']].values)
df_fanfooty_player_raw['Team'] = df_fanfooty_player_raw['Team'].map(team_mapping)

df_fanfooty_player_season_list = df_fanfooty_player_raw.groupby(['Player ID', 'Year', 'Team', 'Full Name'])['SC'].mean().reset_index().sort_values(['Full Name', 'Year', 'Team'])
df_fanfooty_player_season_list['season_team_name_id'] = df_fanfooty_player_season_list['Year'].astype(str) + "_" + df_fanfooty_player_season_list['Team'].astype(str) + "_" + df_fanfooty_player_season_list['Full Name'].astype(str)
df_fanfooty_player_season_list
# df_fanfooty_player_raw.groupby(['Player ID', 'Year', 'Team', 'Full Name'])['SC'].mean().reset_index().sort_values(['Full Name', 'Year', 'Team']).to_csv('fanfooty_players.csv')

Unnamed: 0,Player ID,Year,Team,Full Name,SC,season_team_name_id
4079,291672,2011,North Melbourne,Aaron Black,33.000000,2011_North Melbourne_Aaron Black
4080,291672,2012,North Melbourne,Aaron Black,49.666667,2012_North Melbourne_Aaron Black
4081,291672,2013,North Melbourne,Aaron Black,76.388889,2013_North Melbourne_Aaron Black
4082,291672,2014,North Melbourne,Aaron Black,56.680000,2014_North Melbourne_Aaron Black
4083,291672,2015,North Melbourne,Aaron Black,42.000000,2015_North Melbourne_Aaron Black
...,...,...,...,...,...,...
5877,991988,2017,Sydney,Zak Jones,82.521739,2017_Sydney_Zak Jones
5878,991988,2018,Sydney,Zak Jones,78.473684,2018_Sydney_Zak Jones
5879,991988,2019,Sydney,Zak Jones,85.705882,2019_Sydney_Zak Jones
2801,280442,2011,Western Bulldogs,Zephaniah Skinner,4.000000,2011_Western Bulldogs_Zephaniah Skinner


In [3]:
# Get Footywire match data with a row for each player
df_footywire_data = pd.read_csv(r'inputs/footywire_player_matches.csv', index_col=0)

# Exclude rounds 1 to 4 of 2010 season (couldn't scrape these from fanfooty)
exclude_rounds = ['Round 1', 'Round 2', 'Round 3', 'Round 4']
df_footywire_data.drop(df_footywire_data.loc[(df_footywire_data['Season'] == 2010) & (df_footywire_data['Round'].isin(exclude_rounds))].index, inplace=True)
df_footywire_player_season_list = df_footywire_data.groupby(['Season', 'Team', 'Player'])['SC'].mean().reset_index().sort_values(['Player', 'Season', 'Team'])
df_footywire_player_season_list['season_team_name_id'] = df_footywire_player_season_list['Season'].astype(str) + "_" + df_footywire_player_season_list['Team'].astype(str) +"_" + df_footywire_player_season_list['Player'].astype(str)
df_footywire_player_season_list

Unnamed: 0,Season,Team,Player,SC,season_team_name_id
950,2011,North Melbourne,Aaron Black,33.000000,2011_North Melbourne_Aaron Black
1640,2012,North Melbourne,Aaron Black,49.666667,2012_North Melbourne_Aaron Black
2308,2013,North Melbourne,Aaron Black,76.555556,2013_North Melbourne_Aaron Black
2969,2014,North Melbourne,Aaron Black,56.680000,2014_North Melbourne_Aaron Black
3638,2015,North Melbourne,Aaron Black,42.000000,2015_North Melbourne_Aaron Black
...,...,...,...,...,...
5130,2017,Sydney,Zak Jones,82.521739,2017_Sydney_Zak Jones
5788,2018,Sydney,Zak Jones,78.473684,2018_Sydney_Zak Jones
6447,2019,Sydney,Zak Jones,85.947368,2019_Sydney_Zak Jones
1208,2011,Western Bulldogs,Zephaniah Skinner,4.000000,2011_Western Bulldogs_Zephaniah Skinner


In [4]:
df_merged = df_footywire_player_season_list.merge(df_fanfooty_player_season_list, on='season_team_name_id', how='left', suffixes=['_footywire', '_fanfooty'])
display(df_merged)
df_no_match = df_merged.loc[df_merged['Player ID'].isnull()]
df_no_match

Unnamed: 0,Season,Team_footywire,Player,SC_footywire,season_team_name_id,Player ID,Year,Team_fanfooty,Full Name,SC_fanfooty
0,2011,North Melbourne,Aaron Black,33.000000,2011_North Melbourne_Aaron Black,291672.0,2011.0,North Melbourne,Aaron Black,33.000000
1,2012,North Melbourne,Aaron Black,49.666667,2012_North Melbourne_Aaron Black,291672.0,2012.0,North Melbourne,Aaron Black,49.666667
2,2013,North Melbourne,Aaron Black,76.555556,2013_North Melbourne_Aaron Black,291672.0,2013.0,North Melbourne,Aaron Black,76.388889
3,2014,North Melbourne,Aaron Black,56.680000,2014_North Melbourne_Aaron Black,291672.0,2014.0,North Melbourne,Aaron Black,56.680000
4,2015,North Melbourne,Aaron Black,42.000000,2015_North Melbourne_Aaron Black,291672.0,2015.0,North Melbourne,Aaron Black,42.000000
...,...,...,...,...,...,...,...,...,...,...
6511,2017,Sydney,Zak Jones,82.521739,2017_Sydney_Zak Jones,991988.0,2017.0,Sydney,Zak Jones,82.521739
6512,2018,Sydney,Zak Jones,78.473684,2018_Sydney_Zak Jones,991988.0,2018.0,Sydney,Zak Jones,78.473684
6513,2019,Sydney,Zak Jones,85.947368,2019_Sydney_Zak Jones,991988.0,2019.0,Sydney,Zak Jones,85.705882
6514,2011,Western Bulldogs,Zephaniah Skinner,4.000000,2011_Western Bulldogs_Zephaniah Skinner,280442.0,2011.0,Western Bulldogs,Zephaniah Skinner,4.000000


Unnamed: 0,Season,Team_footywire,Player,SC_footywire,season_team_name_id,Player ID,Year,Team_fanfooty,Full Name,SC_fanfooty
187,2015,Melbourne,Alex N-Bullen,42.818182,2015_Melbourne_Alex N-Bullen,,,,,
188,2016,Melbourne,Alex N-Bullen,54.000000,2016_Melbourne_Alex N-Bullen,,,,,
189,2017,Melbourne,Alex N-Bullen,75.473684,2017_Melbourne_Alex N-Bullen,,,,,
190,2018,Melbourne,Alex N-Bullen,75.920000,2018_Melbourne_Alex N-Bullen,,,,,
191,2019,Melbourne,Alex N-Bullen,61.066667,2019_Melbourne_Alex N-Bullen,,,,,
...,...,...,...,...,...,...,...,...,...,...
6497,2015,GWS,Zachary Williams,69.583333,2015_GWS_Zachary Williams,,,,,
6498,2016,GWS,Zachary Williams,85.727273,2016_GWS_Zachary Williams,,,,,
6499,2017,GWS,Zachary Williams,91.391304,2017_GWS_Zachary Williams,,,,,
6500,2018,GWS,Zachary Williams,97.000000,2018_GWS_Zachary Williams,,,,,


In [5]:
from fuzzywuzzy import fuzz
from fuzzywuzzy import process

def get_year_team_df(year, team):
    df = df_fanfooty_player_season_list.loc[(df_fanfooty_player_season_list['Year'] == year) & (df_fanfooty_player_season_list['Team'] == team)]
    return df

def get_fuzzy_match(row):
    df_year_team = get_year_team_df(row['Season'], row['Team_footywire'])
    fuzzy_match = process.extractOne(row['Player'], df_year_team['Full Name'])
    fuzzy_match_name = fuzzy_match[0]
    fuzzy_match_score = fuzzy_match[1]
    fuzzy_match_index = fuzzy_match[2]
    if fuzzy_match[1] >= 70:
        return pd.Series([fuzzy_match_name, fuzzy_match_score, fuzzy_match_index])

    
df_no_match[['fuzzy_match_name', 'fuzzy_match_score', 'fuzzy_match_index']] = df_no_match.apply(lambda row: get_fuzzy_match(row), axis=1)
df_no_match['fuzzy_season_team_name'] = df_no_match['Season'].astype(str) + "_" + df_no_match['Team_footywire'].astype(str) +"_" + df_no_match['fuzzy_match_name'].astype(str)
df_no_match

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[k1] = value[k2]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,Season,Team_footywire,Player,SC_footywire,season_team_name_id,Player ID,Year,Team_fanfooty,Full Name,SC_fanfooty,fuzzy_match_name,fuzzy_match_score,fuzzy_match_index,fuzzy_season_team_name
187,2015,Melbourne,Alex N-Bullen,42.818182,2015_Melbourne_Alex N-Bullen,,,,,,Alex Neal-Bullen,90.0,5392.0,2015_Melbourne_Alex Neal-Bullen
188,2016,Melbourne,Alex N-Bullen,54.000000,2016_Melbourne_Alex N-Bullen,,,,,,Alex Neal-Bullen,90.0,5393.0,2016_Melbourne_Alex Neal-Bullen
189,2017,Melbourne,Alex N-Bullen,75.473684,2017_Melbourne_Alex N-Bullen,,,,,,Alex Neal-Bullen,90.0,5394.0,2017_Melbourne_Alex Neal-Bullen
190,2018,Melbourne,Alex N-Bullen,75.920000,2018_Melbourne_Alex N-Bullen,,,,,,Alex Neal-Bullen,90.0,5395.0,2018_Melbourne_Alex Neal-Bullen
191,2019,Melbourne,Alex N-Bullen,61.066667,2019_Melbourne_Alex N-Bullen,,,,,,Alex Neal-Bullen,90.0,5396.0,2019_Melbourne_Alex Neal-Bullen
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6497,2015,GWS,Zachary Williams,69.583333,2015_GWS_Zachary Williams,,,,,,Zac Williams,86.0,4954.0,2015_GWS_Zac Williams
6498,2016,GWS,Zachary Williams,85.727273,2016_GWS_Zachary Williams,,,,,,Zac Williams,86.0,4955.0,2016_GWS_Zac Williams
6499,2017,GWS,Zachary Williams,91.391304,2017_GWS_Zachary Williams,,,,,,Zac Williams,86.0,4956.0,2017_GWS_Zac Williams
6500,2018,GWS,Zachary Williams,97.000000,2018_GWS_Zachary Williams,,,,,,Zac Williams,86.0,4957.0,2018_GWS_Zac Williams


In [6]:
def replace_with_fuzzy_results(row):
    original_row = row[['Player ID', 'Year', 'Team_fanfooty', 'Full Name', 'SC_fanfooty']]
    if row[['Player ID']].isnull()[0]:
        matched_row = df_fanfooty_player_season_list.loc[df_fanfooty_player_season_list['season_team_name_id'] == row['fuzzy_season_team_name'], ['Player ID', 'Year', 'Team', 'Full Name', 'SC']].squeeze()
        if len(matched_row) ==5:
            original_row['Player ID'] = matched_row['Player ID']
            original_row['Year'] = matched_row['Year']
            original_row['Team_fanfooty'] = matched_row['Team']
            original_row['Full Name'] = matched_row['Full Name']
            original_row['SC_fanfooty'] = matched_row['SC']
    return original_row

df_final_player_ids = df_merged.merge(df_no_match[['season_team_name_id', 'fuzzy_match_name', 'fuzzy_season_team_name']], how='left', left_on='season_team_name_id', right_on='season_team_name_id')
df_final_player_ids[['Player ID', 'Year', 'Team_fanfooty', 'Full Name', 'SC_fanfooty']] = df_final_player_ids.apply(lambda row: replace_with_fuzzy_results(row), axis=1)
df_final_player_ids
# df_final_player_ids.to_csv('Outputs/final_player_ids_08-02-20.csv')

Unnamed: 0,Season,Team_footywire,Player,SC_footywire,season_team_name_id,Player ID,Year,Team_fanfooty,Full Name,SC_fanfooty,fuzzy_match_name,fuzzy_season_team_name
0,2011,North Melbourne,Aaron Black,33.000000,2011_North Melbourne_Aaron Black,291672.0,2011.0,North Melbourne,Aaron Black,33.000000,,
1,2012,North Melbourne,Aaron Black,49.666667,2012_North Melbourne_Aaron Black,291672.0,2012.0,North Melbourne,Aaron Black,49.666667,,
2,2013,North Melbourne,Aaron Black,76.555556,2013_North Melbourne_Aaron Black,291672.0,2013.0,North Melbourne,Aaron Black,76.388889,,
3,2014,North Melbourne,Aaron Black,56.680000,2014_North Melbourne_Aaron Black,291672.0,2014.0,North Melbourne,Aaron Black,56.680000,,
4,2015,North Melbourne,Aaron Black,42.000000,2015_North Melbourne_Aaron Black,291672.0,2015.0,North Melbourne,Aaron Black,42.000000,,
...,...,...,...,...,...,...,...,...,...,...,...,...
6511,2017,Sydney,Zak Jones,82.521739,2017_Sydney_Zak Jones,991988.0,2017.0,Sydney,Zak Jones,82.521739,,
6512,2018,Sydney,Zak Jones,78.473684,2018_Sydney_Zak Jones,991988.0,2018.0,Sydney,Zak Jones,78.473684,,
6513,2019,Sydney,Zak Jones,85.947368,2019_Sydney_Zak Jones,991988.0,2019.0,Sydney,Zak Jones,85.705882,,
6514,2011,Western Bulldogs,Zephaniah Skinner,4.000000,2011_Western Bulldogs_Zephaniah Skinner,280442.0,2011.0,Western Bulldogs,Zephaniah Skinner,4.000000,,
