In [1]:
%matplotlib inline

import pandas as pd
import glob
import os
import numpy as np
import matplotlib.pyplot as plt

transfers_path = '/home/tdraths/sb_assignments/Transfers_Capstone/data/data_cleaning_outputs/transfers_best.csv'
spi_path = '/home/tdraths/sb_assignments/Transfers_Capstone/data/data_cleaning_outputs/spi_best.csv'

transfers = pd.read_csv(transfers_path)
spi = pd.read_csv(spi_path)

### I. Organizing dataframes and some initial analysis


In [2]:
display(transfers.columns)
spi.columns

Index(['Unnamed: 0', 'club_name', 'team_short', 'player_name', 'age',
       'position', 'club_involved_name', 'transfer_movement', 'fee_cleaned',
       'league_name', 'year', 'season'],
      dtype='object')

Index(['Unnamed: 0', 'season', 'date', 'league_id', 'league', 'team1',
       'team1_short', 'team2', 'team2_short', 'spi1', 'spi2'],
      dtype='object')

In [3]:
transfers.drop(columns=['Unnamed: 0'], inplace=True)
spi.drop(columns=['Unnamed: 0'], inplace=True)

display(transfers.columns)
spi.columns

Index(['club_name', 'team_short', 'player_name', 'age', 'position',
       'club_involved_name', 'transfer_movement', 'fee_cleaned', 'league_name',
       'year', 'season'],
      dtype='object')

Index(['season', 'date', 'league_id', 'league', 'team1', 'team1_short',
       'team2', 'team2_short', 'spi1', 'spi2'],
      dtype='object')

In [4]:
spi_home = spi[['season', 'date', 'league', 'team1', 'team1_short', 'spi1']]
spi_away = spi[['season', 'date', 'league', 'team2', 'team2_short', 'spi2']]

display(spi.head())
display(spi_home.head())
display(spi_away.head())

Unnamed: 0,season,date,league_id,league,team1,team1_short,team2,team2_short,spi1,spi2
0,2017,2017-08-04,2412,English League Championship,Sunderland,SUN,Derby County,DER,50.39,40.83
1,2017,2017-08-04,2412,English League Championship,Nottingham Forest,NOT,Millwall,MIL,35.55,28.23
2,2017,2017-08-05,2412,English League Championship,Sheffield United,SHU,Brentford,BRE,27.72,39.7
3,2017,2017-08-05,2412,English League Championship,Queens Park Rangers,QPR,Reading,REA,36.33,34.9
4,2017,2017-08-05,2412,English League Championship,Fulham,FUL,Norwich City,NOR,43.0,42.6


Unnamed: 0,season,date,league,team1,team1_short,spi1
0,2017,2017-08-04,English League Championship,Sunderland,SUN,50.39
1,2017,2017-08-04,English League Championship,Nottingham Forest,NOT,35.55
2,2017,2017-08-05,English League Championship,Sheffield United,SHU,27.72
3,2017,2017-08-05,English League Championship,Queens Park Rangers,QPR,36.33
4,2017,2017-08-05,English League Championship,Fulham,FUL,43.0


Unnamed: 0,season,date,league,team2,team2_short,spi2
0,2017,2017-08-04,English League Championship,Derby County,DER,40.83
1,2017,2017-08-04,English League Championship,Millwall,MIL,28.23
2,2017,2017-08-05,English League Championship,Brentford,BRE,39.7
3,2017,2017-08-05,English League Championship,Reading,REA,34.9
4,2017,2017-08-05,English League Championship,Norwich City,NOR,42.6


In [5]:
spi_home = spi_home.groupby(['season', 'league', 'team1_short'], 
                            as_index=False).mean().loc[:,['season', 'league', 'team1_short', 'spi1']]
spi_home.columns = ['year', 'league', 'team', 'avg_home_spi']
spi_home[spi_home['year'] == 2018].sort_values(by=['avg_home_spi'], ascending=False)

Unnamed: 0,year,league,team,avg_home_spi
76,2018,Barclays Premier League,MNC,93.55
75,2018,Barclays Premier League,LIV,91.81
69,2018,Barclays Premier League,CHE,85.693158
80,2018,Barclays Premier League,TOT,84.935263
64,2018,Barclays Premier League,ARS,80.714211
77,2018,Barclays Premier League,MNU,79.308947
74,2018,Barclays Premier League,LEI,71.798421
71,2018,Barclays Premier League,EVE,70.269474
70,2018,Barclays Premier League,CRY,70.152632
81,2018,Barclays Premier League,WAT,69.451053


In [6]:
spi_away = spi_away.groupby(['season', 'league', 'team2_short'], 
                            as_index=False).mean().loc[:,['season', 'league', 'team2_short', 'spi2']]
spi_away.columns = ['year', 'league', 'team', 'avg_away_spi']
spi_away[spi_away['year'] == 2018].sort_values(by=['avg_away_spi'], ascending=False)

Unnamed: 0,year,league,team,avg_away_spi
76,2018,Barclays Premier League,MNC,93.579474
75,2018,Barclays Premier League,LIV,91.626316
69,2018,Barclays Premier League,CHE,85.461053
80,2018,Barclays Premier League,TOT,84.589474
64,2018,Barclays Premier League,ARS,80.506842
77,2018,Barclays Premier League,MNU,79.445263
74,2018,Barclays Premier League,LEI,71.273158
71,2018,Barclays Premier League,EVE,70.414211
70,2018,Barclays Premier League,CRY,69.804211
81,2018,Barclays Premier League,WAT,69.436316


In [7]:
spi = spi_home.merge(spi_away, how='inner', on=['year', 'team', 'league'])
spi[spi['year'] == 2018].head()

Unnamed: 0,year,league,team,avg_home_spi,avg_away_spi
64,2018,Barclays Premier League,ARS,80.714211,80.506842
65,2018,Barclays Premier League,BHA,61.455263,61.482105
66,2018,Barclays Premier League,BOU,65.690526,65.685263
67,2018,Barclays Premier League,BUR,61.341579,61.136842
68,2018,Barclays Premier League,CAR,58.476316,58.371053


##### What's happened so far:
 - I took the spi dataframe and subset it into two new dataframes, spi_home and spi_away
 - Each new one was simply a step on the path to a standard spi dataframe that has the average home and away spi for each club each season
 - I merged spi_home and spi_away into a 'new dataframe' called spi.
 - The new and improved **spi** has five columns:
     - Year
     - League
     - Team
     - Avg_Home_SPI
     - Avg_Away_SPI

In [8]:
transfers.columns

Index(['club_name', 'team_short', 'player_name', 'age', 'position',
       'club_involved_name', 'transfer_movement', 'fee_cleaned', 'league_name',
       'year', 'season'],
      dtype='object')

In [9]:
transfers.columns = ['team_name', 'team', 'player_name', 'age', 'position',
                     'club_involved_name', 'movement', 'fee_cleaned', 'league', 'year', 'season']
transfers

Unnamed: 0,team_name,team,player_name,age,position,club_involved_name,movement,fee_cleaned,league,year,season
0,Arsenal FC,ARS,Thomas,27,Defensive Midfield,Atlético Madrid,in,45.0,Premier League,2020,2020/2021
1,Arsenal FC,ARS,Gabriel,22,Centre-Back,LOSC Lille,in,23.4,Premier League,2020,2020/2021
2,Arsenal FC,ARS,Pablo Marí,26,Centre-Back,Flamengo,in,7.2,Premier League,2020,2020/2021
3,Arsenal FC,ARS,Rúnar Alex Rúnarsson,25,Goalkeeper,Dijon,in,1.8,Premier League,2020,2020/2021
4,Arsenal FC,ARS,Cédric Soares,28,Right-Back,Southampton,in,0.0,Premier League,2020,2020/2021
...,...,...,...,...,...,...,...,...,...,...,...
8450,Barnsley FC,BAR,Samy Morsy,25,Central Midfield,Wigan,out,0.0,Championship,2016,2016/2017
8451,Barnsley FC,BAR,Ryan Kent,20,Left Winger,Liverpool U23,out,0.0,Championship,2016,2016/2017
8452,Barnsley FC,BAR,Matty James,25,Central Midfield,Leicester,out,0.0,Championship,2016,2016/2017
8453,Barnsley FC,BAR,Gethin Jones,21,Right-Back,Everton U23,out,0.0,Championship,2016,2016/2017


In [10]:
spi_transfers = transfers.merge(spi, how='outer', on=['team', 'year'])
spi_transfers.head()

Unnamed: 0,team_name,team,player_name,age,position,club_involved_name,movement,fee_cleaned,league_x,year,season,league_y,avg_home_spi,avg_away_spi
0,Arsenal FC,ARS,Thomas,27,Defensive Midfield,Atlético Madrid,in,45.0,Premier League,2020,2020/2021,Barclays Premier League,78.847895,78.742105
1,Arsenal FC,ARS,Gabriel,22,Centre-Back,LOSC Lille,in,23.4,Premier League,2020,2020/2021,Barclays Premier League,78.847895,78.742105
2,Arsenal FC,ARS,Pablo Marí,26,Centre-Back,Flamengo,in,7.2,Premier League,2020,2020/2021,Barclays Premier League,78.847895,78.742105
3,Arsenal FC,ARS,Rúnar Alex Rúnarsson,25,Goalkeeper,Dijon,in,1.8,Premier League,2020,2020/2021,Barclays Premier League,78.847895,78.742105
4,Arsenal FC,ARS,Cédric Soares,28,Right-Back,Southampton,in,0.0,Premier League,2020,2020/2021,Barclays Premier League,78.847895,78.742105


In [11]:
spi_transfers.drop(columns='league_x', inplace=True)
spi_transfers.head()

Unnamed: 0,team_name,team,player_name,age,position,club_involved_name,movement,fee_cleaned,year,season,league_y,avg_home_spi,avg_away_spi
0,Arsenal FC,ARS,Thomas,27,Defensive Midfield,Atlético Madrid,in,45.0,2020,2020/2021,Barclays Premier League,78.847895,78.742105
1,Arsenal FC,ARS,Gabriel,22,Centre-Back,LOSC Lille,in,23.4,2020,2020/2021,Barclays Premier League,78.847895,78.742105
2,Arsenal FC,ARS,Pablo Marí,26,Centre-Back,Flamengo,in,7.2,2020,2020/2021,Barclays Premier League,78.847895,78.742105
3,Arsenal FC,ARS,Rúnar Alex Rúnarsson,25,Goalkeeper,Dijon,in,1.8,2020,2020/2021,Barclays Premier League,78.847895,78.742105
4,Arsenal FC,ARS,Cédric Soares,28,Right-Back,Southampton,in,0.0,2020,2020/2021,Barclays Premier League,78.847895,78.742105


In [12]:
spi_transfers.rename(columns={'team_name': 'Team Name', 'team': 'Team', 'player_name': 'Player', 
                              'age': 'Age', 'position': 'Position', 'club_involved_name': 'Club Involved', 
                              'movement': 'Movement', 'fee_cleaned': 'Fee', 'year': 'Year', 
                              'season': 'Season', 'league_y': 'League', 'avg_home_spi': 'Average Home SPI', 
                              'avg_away_spi': 'Average Away SPI'}, inplace=True)

spi_transfers.head()

Unnamed: 0,Team Name,Team,Player,Age,Position,Club Involved,Movement,Fee,Year,Season,League,Average Home SPI,Average Away SPI
0,Arsenal FC,ARS,Thomas,27,Defensive Midfield,Atlético Madrid,in,45.0,2020,2020/2021,Barclays Premier League,78.847895,78.742105
1,Arsenal FC,ARS,Gabriel,22,Centre-Back,LOSC Lille,in,23.4,2020,2020/2021,Barclays Premier League,78.847895,78.742105
2,Arsenal FC,ARS,Pablo Marí,26,Centre-Back,Flamengo,in,7.2,2020,2020/2021,Barclays Premier League,78.847895,78.742105
3,Arsenal FC,ARS,Rúnar Alex Rúnarsson,25,Goalkeeper,Dijon,in,1.8,2020,2020/2021,Barclays Premier League,78.847895,78.742105
4,Arsenal FC,ARS,Cédric Soares,28,Right-Back,Southampton,in,0.0,2020,2020/2021,Barclays Premier League,78.847895,78.742105


In [13]:
spi_transfers = spi_transfers[['Year', 'Team Name', 'Team', 'Player', 'Age', 'Position',
                               'Club Involved', 'Movement', 'Fee', 'League', 
                               'Average Home SPI', 'Average Away SPI']]
spi_transfers.head()

Unnamed: 0,Year,Team Name,Team,Player,Age,Position,Club Involved,Movement,Fee,League,Average Home SPI,Average Away SPI
0,2020,Arsenal FC,ARS,Thomas,27,Defensive Midfield,Atlético Madrid,in,45.0,Barclays Premier League,78.847895,78.742105
1,2020,Arsenal FC,ARS,Gabriel,22,Centre-Back,LOSC Lille,in,23.4,Barclays Premier League,78.847895,78.742105
2,2020,Arsenal FC,ARS,Pablo Marí,26,Centre-Back,Flamengo,in,7.2,Barclays Premier League,78.847895,78.742105
3,2020,Arsenal FC,ARS,Rúnar Alex Rúnarsson,25,Goalkeeper,Dijon,in,1.8,Barclays Premier League,78.847895,78.742105
4,2020,Arsenal FC,ARS,Cédric Soares,28,Right-Back,Southampton,in,0.0,Barclays Premier League,78.847895,78.742105


##### What's happened so far:
 - I have a clean, organized data frame from 'transfers' and my new 'spi' dataframe
 - I am ready to start adding features to my dataframe so that I can move toward a model to predict performance improvements by using SPI scores
 
Below I'll list some examples of features that I might work on in the coming days 

In [14]:
cond_in = (spi_transfers['Movement'] == 'in')

spi_transfers = spi_transfers.loc[cond_in,:].groupby(
    ['Year', 'Team Name', 'Team', 'League'],as_index=False).agg(
        Transfers_In=('Fee', 'count'),
        Total_Fee=('Fee','sum'),
        Max_Fee=('Fee','max'),
        Min_Fee=('Fee', 'min'),
        Average_Home_SPI=('Average Home SPI', 'mean'), 
        Average_Away_SPI=('Average Away SPI', 'mean'))

In [15]:
spi_transfers.head()

Unnamed: 0,Year,Team Name,Team,League,Transfers_In,Total_Fee,Max_Fee,Min_Fee,Average_Home_SPI,Average_Away_SPI
0,2016,AFC Bournemouth,BOU,Barclays Premier League,19,36.626,16.2,0.0,60.945789,60.804211
1,2016,Arsenal FC,ARS,Barclays Premier League,12,101.736,40.5,0.0,82.174211,81.785789
2,2016,Burnley FC,BUR,Barclays Premier League,26,41.04,13.59,0.0,57.487368,57.947895
3,2016,Chelsea FC,CHE,Barclays Premier League,27,119.52,35.1,0.0,84.546842,84.528421
4,2016,Crystal Palace,CRY,Barclays Premier League,22,91.17,28.08,0.0,58.646842,58.364737


In [16]:
spi_transfers.columns = ['Year', 'Team Name', 'Team', 'League', 'Transfers In', 'Total Fees Paid',
                         'Max Fee Paid', 'Min Fee Paid', 'Average Home SPI', 'Average Away SPI']

spi_transfers['Average Season SPI'] = (spi_transfers['Average Home SPI'] + spi_transfers['Average Away SPI']) / 2
spi_transfers

Unnamed: 0,Year,Team Name,Team,League,Transfers In,Total Fees Paid,Max Fee Paid,Min Fee Paid,Average Home SPI,Average Away SPI,Average Season SPI
0,2016,AFC Bournemouth,BOU,Barclays Premier League,19,36.626,16.20,0.0,60.945789,60.804211,60.875000
1,2016,Arsenal FC,ARS,Barclays Premier League,12,101.736,40.50,0.0,82.174211,81.785789,81.980000
2,2016,Burnley FC,BUR,Barclays Premier League,26,41.040,13.59,0.0,57.487368,57.947895,57.717632
3,2016,Chelsea FC,CHE,Barclays Premier League,27,119.520,35.10,0.0,84.546842,84.528421,84.537632
4,2016,Crystal Palace,CRY,Barclays Premier League,22,91.170,28.08,0.0,58.646842,58.364737,58.505789
...,...,...,...,...,...,...,...,...,...,...,...
191,2020,Watford FC,WAT,English League Championship,18,0.000,0.00,0.0,64.058261,64.019130,64.038696
192,2020,West Bromwich Albion,WBA,Barclays Premier League,10,22.320,12.15,0.0,58.420526,58.345263,58.382895
193,2020,West Ham United,WHU,Barclays Premier League,9,19.980,14.58,0.0,69.082105,68.997368,69.039737
194,2020,Wolverhampton Wanderers,WLV,Barclays Premier League,13,73.620,36.00,0.0,78.464211,78.435789,78.450000


In [17]:
spi_transfers.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 196 entries, 0 to 195
Data columns (total 11 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Year                196 non-null    int64  
 1   Team Name           196 non-null    object 
 2   Team                196 non-null    object 
 3   League              196 non-null    object 
 4   Transfers In        196 non-null    int64  
 5   Total Fees Paid     196 non-null    float64
 6   Max Fee Paid        196 non-null    float64
 7   Min Fee Paid        196 non-null    float64
 8   Average Home SPI    196 non-null    float64
 9   Average Away SPI    196 non-null    float64
 10  Average Season SPI  196 non-null    float64
dtypes: float64(6), int64(2), object(3)
memory usage: 18.4+ KB


In [18]:
transfers.head()

Unnamed: 0,team_name,team,player_name,age,position,club_involved_name,movement,fee_cleaned,league,year,season
0,Arsenal FC,ARS,Thomas,27,Defensive Midfield,Atlético Madrid,in,45.0,Premier League,2020,2020/2021
1,Arsenal FC,ARS,Gabriel,22,Centre-Back,LOSC Lille,in,23.4,Premier League,2020,2020/2021
2,Arsenal FC,ARS,Pablo Marí,26,Centre-Back,Flamengo,in,7.2,Premier League,2020,2020/2021
3,Arsenal FC,ARS,Rúnar Alex Rúnarsson,25,Goalkeeper,Dijon,in,1.8,Premier League,2020,2020/2021
4,Arsenal FC,ARS,Cédric Soares,28,Right-Back,Southampton,in,0.0,Premier League,2020,2020/2021


In [19]:
idx = transfers.groupby(['team_name','team', 'year', 'league'])['fee_cleaned'].transform(max) == transfers['fee_cleaned']

max_position = transfers.loc[idx, ['team_name','team', 'year', 'position', 'fee_cleaned', 'league']]

max_position.head()

Unnamed: 0,team_name,team,year,position,fee_cleaned,league
0,Arsenal FC,ARS,2020,Defensive Midfield,45.0,Premier League
19,Aston Villa,AST,2020,Centre-Forward,27.72,Premier League
63,Brighton & Hove Albion,BHA,2020,Right Winger,10.53,Premier League
81,Burnley FC,BUR,2020,Central Midfield,0.99,Premier League
92,Chelsea FC,CHE,2020,Attacking Midfield,72.0,Premier League


In [20]:
max_position.rename(columns={'team_name': 'Team Name', 'team': 'Team', 'year': 'Year',
                            'position': 'Position', 'fee_cleaned': 'Max Fee Paid', 'league': 'League'}, 
                    inplace=True)

max_position = max_position[['Year', 'Team Name', 'Team', 'League', 'Max Fee Paid', 'Position']]

max_position.head()

Unnamed: 0,Year,Team Name,Team,League,Max Fee Paid,Position
0,2020,Arsenal FC,ARS,Premier League,45.0,Defensive Midfield
19,2020,Aston Villa,AST,Premier League,27.72,Centre-Forward
63,2020,Brighton & Hove Albion,BHA,Premier League,10.53,Right Winger
81,2020,Burnley FC,BUR,Premier League,0.99,Central Midfield
92,2020,Chelsea FC,CHE,Premier League,72.0,Attacking Midfield


In [21]:
display(max_position[max_position['Year'] == 2016].head())
spi_transfers[spi_transfers['Year'] == 2016].head()

Unnamed: 0,Year,Team Name,Team,League,Max Fee Paid,Position
3301,2016,Leicester City,LEI,Premier League,32.22,Central Midfield
3319,2016,Arsenal FC,ARS,Premier League,40.5,Defensive Midfield
3343,2016,Tottenham Hotspur,TOT,Premier League,31.5,Central Midfield
3359,2016,Manchester City,MNC,Premier League,50.04,Centre-Back
3411,2016,Manchester United,MNU,Premier League,94.5,Central Midfield


Unnamed: 0,Year,Team Name,Team,League,Transfers In,Total Fees Paid,Max Fee Paid,Min Fee Paid,Average Home SPI,Average Away SPI,Average Season SPI
0,2016,AFC Bournemouth,BOU,Barclays Premier League,19,36.626,16.2,0.0,60.945789,60.804211,60.875
1,2016,Arsenal FC,ARS,Barclays Premier League,12,101.736,40.5,0.0,82.174211,81.785789,81.98
2,2016,Burnley FC,BUR,Barclays Premier League,26,41.04,13.59,0.0,57.487368,57.947895,57.717632
3,2016,Chelsea FC,CHE,Barclays Premier League,27,119.52,35.1,0.0,84.546842,84.528421,84.537632
4,2016,Crystal Palace,CRY,Barclays Premier League,22,91.17,28.08,0.0,58.646842,58.364737,58.505789


In [22]:
transfers.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8455 entries, 0 to 8454
Data columns (total 11 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   team_name           8455 non-null   object 
 1   team                8455 non-null   object 
 2   player_name         8455 non-null   object 
 3   age                 8455 non-null   int64  
 4   position            8455 non-null   object 
 5   club_involved_name  8455 non-null   object 
 6   movement            8455 non-null   object 
 7   fee_cleaned         8455 non-null   float64
 8   league              8455 non-null   object 
 9   year                8455 non-null   int64  
 10  season              8455 non-null   object 
dtypes: float64(1), int64(2), object(8)
memory usage: 726.7+ KB


In [23]:
max_position.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 342 entries, 0 to 8423
Data columns (total 6 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Year          342 non-null    int64  
 1   Team Name     342 non-null    object 
 2   Team          342 non-null    object 
 3   League        342 non-null    object 
 4   Max Fee Paid  342 non-null    float64
 5   Position      342 non-null    object 
dtypes: float64(1), int64(1), object(4)
memory usage: 18.7+ KB


In [24]:
max_position.Team.value_counts()

ROT    46
MIL    42
LUT    21
WYC    19
DER     6
CAR     6
CHE     6
AST     5
ARS     5
MNC     5
EVE     5
BHA     5
REA     5
BOU     5
BRM     5
BUR     5
WHU     5
SWA     5
LIV     5
WLV     5
BRS     5
BRE     5
MNU     5
NOR     5
LEE     5
LEI     5
HUD     5
PRE     5
WAT     5
SOU     5
STO     5
CRY     5
WBA     5
MID     5
NEW     5
FUL     5
NOT     5
QPR     5
SHW     5
TOT     5
SHU     4
BAR     4
WIG     4
BLA     4
HUL     4
IPS     3
SUN     2
BOL     2
BRT     2
COV     1
CHA     1
Name: Team, dtype: int64

In [25]:
max_position[max_position.Team == 'ROT']

Unnamed: 0,Year,Team Name,Team,League,Max Fee Paid,Position
927,2020,Rotherham United,ROT,Championship,0.248,Central Midfield
6063,2018,Rotherham United,ROT,Championship,0.0,Right-Back
6064,2018,Rotherham United,ROT,Championship,0.0,Centre-Back
6065,2018,Rotherham United,ROT,Championship,0.0,Central Midfield
6066,2018,Rotherham United,ROT,Championship,0.0,Centre-Back
6067,2018,Rotherham United,ROT,Championship,0.0,Goalkeeper
6068,2018,Rotherham United,ROT,Championship,0.0,Left Midfield
6069,2018,Rotherham United,ROT,Championship,0.0,Centre-Forward
6070,2018,Rotherham United,ROT,Championship,0.0,Defensive Midfield
6071,2018,Rotherham United,ROT,Championship,0.0,Right-Back


In [26]:
max_not_zero = max_position[(max_position['Max Fee Paid'] != 0.00) & (max_position['League'] == 'Premier League') ]
max_not_zero.shape

(101, 6)

In [27]:
max_not_zero = max_not_zero[['Year', 'Team Name', 'Team', 'Max Fee Paid', 'Position']]

In [28]:
spi_PL = spi_transfers[spi_transfers.League == 'Barclays Premier League']
spi_PL.shape


(100, 11)

In [29]:
spi_PL_merge = spi_PL.merge(max_not_zero, how='left', on=['Year', 'Team Name', 'Team', 'Max Fee Paid'])

spi_PL_merge.head()

Unnamed: 0,Year,Team Name,Team,League,Transfers In,Total Fees Paid,Max Fee Paid,Min Fee Paid,Average Home SPI,Average Away SPI,Average Season SPI,Position
0,2016,AFC Bournemouth,BOU,Barclays Premier League,19,36.626,16.2,0.0,60.945789,60.804211,60.875,Right Winger
1,2016,Arsenal FC,ARS,Barclays Premier League,12,101.736,40.5,0.0,82.174211,81.785789,81.98,Defensive Midfield
2,2016,Burnley FC,BUR,Barclays Premier League,26,41.04,13.59,0.0,57.487368,57.947895,57.717632,Left Midfield
3,2016,Chelsea FC,CHE,Barclays Premier League,27,119.52,35.1,0.0,84.546842,84.528421,84.537632,
4,2016,Crystal Palace,CRY,Barclays Premier League,22,91.17,28.08,0.0,58.646842,58.364737,58.505789,Centre-Forward


In [36]:
spi_PL_merge[spi_PL_merge.Position.isna()]

Unnamed: 0,Year,Team Name,Team,League,Transfers In,Total Fees Paid,Max Fee Paid,Min Fee Paid,Average Home SPI,Average Away SPI,Average Season SPI,Position
3,2016,Chelsea FC,CHE,Barclays Premier League,27,119.52,35.1,0.0,84.546842,84.528421,84.537632,
5,2016,Everton FC,EVE,Barclays Premier League,18,77.31,26.01,0.0,69.925789,70.190526,70.058158,
7,2016,Leicester City,LEI,Barclays Premier League,17,82.44,27.45,0.0,62.715789,62.918947,62.817368,
12,2016,Southampton FC,SOU,Barclays Premier League,12,62.01,16.83,0.0,68.128421,67.990526,68.059474,
15,2016,Swansea City,SWA,Barclays Premier League,21,52.38,16.2,0.0,57.245263,57.192105,57.218684,
17,2016,Watford FC,WAT,Barclays Premier League,29,62.965,13.5,0.0,59.052632,58.881053,58.966842,
19,2016,West Ham United,WHU,Barclays Premier League,20,75.15,21.69,0.0,61.657895,61.418421,61.538158,
23,2017,Burnley FC,BUR,Barclays Premier League,18,32.166,14.76,0.0,60.564211,60.332105,60.448158,
27,2017,Everton FC,EVE,Barclays Premier League,24,182.88,44.46,0.0,60.066316,60.378421,60.222368,
29,2017,Leicester City,LEI,Barclays Premier League,16,79.061,24.93,0.0,63.318421,63.369474,63.343947,
