## Transfers Capstone - Features Creation

Follow along with the rest of the notebooks from this project here: https://github.com/tdraths/transfers_capstone/tree/main/notebooks.

In this notebook, I create features I'll use to model a club team's performance improvements based on their activities during the transfer window.

### I. Packages & Data

In [1]:
import pandas as pd

transfers_path = '/home/tdraths/sb_assignments/Transfers_Capstone/data/data_cleaning_outputs/transfers_best.csv'
spi_path = '/home/tdraths/sb_assignments/Transfers_Capstone/data/data_cleaning_outputs/spi_best.csv'

transfers = pd.read_csv(transfers_path)
spi = pd.read_csv(spi_path)

In [2]:
# Checking columns
display(transfers.columns)
spi.columns

Index(['Unnamed: 0', 'club_name', 'team_short', 'player_name', 'age',
       'position', 'club_involved_name', 'transfer_movement', 'fee_cleaned',
       'league_name', 'year', 'season'],
      dtype='object')

Index(['Unnamed: 0', 'season', 'date', 'league_id', 'league', 'team1',
       'team1_short', 'team2', 'team2_short', 'spi1', 'spi2'],
      dtype='object')

In [3]:
# Dropping 'Unnamed: 0'
transfers.drop(columns=['Unnamed: 0'], inplace=True)
spi.drop(columns=['Unnamed: 0'], inplace=True)

display(transfers.head(3))
spi.head(3)

Unnamed: 0,club_name,team_short,player_name,age,position,club_involved_name,transfer_movement,fee_cleaned,league_name,year,season
0,Arsenal FC,ARS,Thomas,27,Defensive Midfield,Atlético Madrid,in,45.0,Premier League,2020,2020/2021
1,Arsenal FC,ARS,Gabriel,22,Centre-Back,LOSC Lille,in,23.4,Premier League,2020,2020/2021
2,Arsenal FC,ARS,Pablo Marí,26,Centre-Back,Flamengo,in,7.2,Premier League,2020,2020/2021


Unnamed: 0,season,date,league_id,league,team1,team1_short,team2,team2_short,spi1,spi2
0,2017,2017-08-04,2412,English League Championship,Sunderland,SUN,Derby County,DER,50.39,40.83
1,2017,2017-08-04,2412,English League Championship,Nottingham Forest,NOT,Millwall,MIL,35.55,28.23
2,2017,2017-08-05,2412,English League Championship,Sheffield United,SHU,Brentford,BRE,27.72,39.7


### II. Working with the SPI DataFrame
 - As a reminder, the SPI dataframe shows the season, date, league_id, league, team names and team spis for games in the English Premier and English Championship Leagues
 - spi1 is the **SPI score for 'team 1'**, which I'll call 'home team'.
 - spi2 is the **SPI score for 'team2'**, which I'll call 'away team'.
 
__GOALS:__ 
 - Create a data frame that shows the 'home' and 'away' spi scores for each team listed in dataframe.
 - Create columns that show the average home, average away, and season average SPI score for each team per year.

In [4]:
spi_home = spi[['season', 'league', 'team1', 'team1_short', 'spi1']]
spi_away = spi[['season', 'league', 'team2', 'team2_short', 'spi2']]

display(spi.head(3))
display(spi_home.head(3))
spi_away.head(3)

Unnamed: 0,season,date,league_id,league,team1,team1_short,team2,team2_short,spi1,spi2
0,2017,2017-08-04,2412,English League Championship,Sunderland,SUN,Derby County,DER,50.39,40.83
1,2017,2017-08-04,2412,English League Championship,Nottingham Forest,NOT,Millwall,MIL,35.55,28.23
2,2017,2017-08-05,2412,English League Championship,Sheffield United,SHU,Brentford,BRE,27.72,39.7


Unnamed: 0,season,league,team1,team1_short,spi1
0,2017,English League Championship,Sunderland,SUN,50.39
1,2017,English League Championship,Nottingham Forest,NOT,35.55
2,2017,English League Championship,Sheffield United,SHU,27.72


Unnamed: 0,season,league,team2,team2_short,spi2
0,2017,English League Championship,Derby County,DER,40.83
1,2017,English League Championship,Millwall,MIL,28.23
2,2017,English League Championship,Brentford,BRE,39.7


In [5]:
# Create SPI_HOME and look at the wide range of avg_home_spi between the top PL clubs and the not-top ECL clubs

spi_home = spi_home.groupby(['season', 'league', 'team1', 'team1_short'],
                           as_index=False).mean().loc[:, ['season', 'league', 'team1', 'team1_short', 'spi1']]

spi_home.columns = ['year', 'league', 'team name', 'team', 'avg_home_spi']

spi_home[spi_home['year'] == 2018].sort_values(by=['avg_home_spi'], ascending=False).head(3)

Unnamed: 0,year,league,team name,team,avg_home_spi
76,2018,Barclays Premier League,Manchester City,MNC,93.55
75,2018,Barclays Premier League,Liverpool,LIV,91.81
69,2018,Barclays Premier League,Chelsea,CHE,85.693158


In [6]:
# Create SPI_AWAY and look at the wide range of avg_away_spi between the top PL clubs and the not-top ECL clubs

spi_away = spi_away.groupby(['season', 'league', 'team2', 'team2_short'],
                           as_index=False).mean().loc[:, ['season', 'league', 'team2', 'team2_short', 'spi2']]

spi_away.columns = ['year', 'league', 'team name', 'team', 'avg_away_spi']

spi_away[spi_away['year'] == 2018].sort_values(by=['avg_away_spi'], ascending=False).head(3)

Unnamed: 0,year,league,team name,team,avg_away_spi
76,2018,Barclays Premier League,Manchester City,MNC,93.579474
75,2018,Barclays Premier League,Liverpool,LIV,91.626316
69,2018,Barclays Premier League,Chelsea,CHE,85.461053


In [7]:
spi = spi_home.merge(spi_away, how='inner', on=['year', 'team name', 'team', 'league'])
spi.head()

Unnamed: 0,year,league,team name,team,avg_home_spi,avg_away_spi
0,2016,Barclays Premier League,AFC Bournemouth,BOU,60.945789,60.804211
1,2016,Barclays Premier League,Arsenal,ARS,82.174211,81.785789
2,2016,Barclays Premier League,Burnley,BUR,57.487368,57.947895
3,2016,Barclays Premier League,Chelsea,CHE,84.546842,84.528421
4,2016,Barclays Premier League,Crystal Palace,CRY,58.646842,58.364737


In [8]:
# Create an Average SPI column to show the averagee per club per year
spi['avg_spi_score'] = round((spi['avg_home_spi'] + spi['avg_away_spi']) / 2, 2)
spi.head()

Unnamed: 0,year,league,team name,team,avg_home_spi,avg_away_spi,avg_spi_score
0,2016,Barclays Premier League,AFC Bournemouth,BOU,60.945789,60.804211,60.88
1,2016,Barclays Premier League,Arsenal,ARS,82.174211,81.785789,81.98
2,2016,Barclays Premier League,Burnley,BUR,57.487368,57.947895,57.72
3,2016,Barclays Premier League,Chelsea,CHE,84.546842,84.528421,84.54
4,2016,Barclays Premier League,Crystal Palace,CRY,58.646842,58.364737,58.51


In [9]:
# Check for any null values in the dataframe
spi.isna().sum()

year             0
league           0
team name        0
team             0
avg_home_spi     0
avg_away_spi     0
avg_spi_score    0
dtype: int64

In [10]:
# I have different values in this 'league' column than what I want to use. These are a bit wordy
display(spi.league.value_counts())

# Goingt to fix these to match the 'transfers' dataframe
transfers.league_name.value_counts()

Barclays Premier League        100
English League Championship     96
Name: league, dtype: int64

Championship      5002
Premier League    3453
Name: league_name, dtype: int64

In [11]:
spi.league.replace({'Barclays Premier League': 'Premier League',
                    'English League Championship': 'Championship'},inplace=True)
display(spi.league.unique())
transfers.league_name.unique()


array(['Premier League', 'Championship'], dtype=object)

array(['Premier League', 'Championship'], dtype=object)

##### What's happened so far:
 - I took the spi dataframe and subset it into two new dataframes, spi_home and spi_away
 - Each new one was simply a step on the path to a standard spi dataframe that has the average home and away spi for each club each season
 - I merged spi_home and spi_away into a 'new dataframe' called spi.
 - The new and improved **spi** has five columns:
     - Year
     - League
     - Team
     - Avg_Home_SPI
     - Avg_Away_SPI
     - Avg_SPI_Score (the average of 'Home' and 'Away')

### II. Working with the Transfers DataFrame and merging with SPI
 - As a reminder, the transfers dataframe shows the team_name, team, player_name, age, position, club_involved_name, movement, fee_cleaned, league, year and season of every player transfer involving an English Premier League or English Championship League club
 
 
__GOAL:__
 - Create a dataframe that can be merged with SPI.
 - Merge transfers and SPI, and clean up the merge so we can move on to features.

_After this, I'll build features that I can use in a model_

In [12]:
display(transfers.columns)
spi.columns

Index(['club_name', 'team_short', 'player_name', 'age', 'position',
       'club_involved_name', 'transfer_movement', 'fee_cleaned', 'league_name',
       'year', 'season'],
      dtype='object')

Index(['year', 'league', 'team name', 'team', 'avg_home_spi', 'avg_away_spi',
       'avg_spi_score'],
      dtype='object')

In [13]:
transfers.columns = ['team name', 'team', 'player_name', 'age', 'position',
                    'club_involved_name', 'movement in/out', 'fee', 'league', 'year', 'season']
transfers.head()

Unnamed: 0,team name,team,player_name,age,position,club_involved_name,movement in/out,fee,league,year,season
0,Arsenal FC,ARS,Thomas,27,Defensive Midfield,Atlético Madrid,in,45.0,Premier League,2020,2020/2021
1,Arsenal FC,ARS,Gabriel,22,Centre-Back,LOSC Lille,in,23.4,Premier League,2020,2020/2021
2,Arsenal FC,ARS,Pablo Marí,26,Centre-Back,Flamengo,in,7.2,Premier League,2020,2020/2021
3,Arsenal FC,ARS,Rúnar Alex Rúnarsson,25,Goalkeeper,Dijon,in,1.8,Premier League,2020,2020/2021
4,Arsenal FC,ARS,Cédric Soares,28,Right-Back,Southampton,in,0.0,Premier League,2020,2020/2021


In [14]:
spi_transfers = transfers.merge(spi, how='outer', on=['team', 'year'])
spi_transfers.head()

Unnamed: 0,team name_x,team,player_name,age,position,club_involved_name,movement in/out,fee,league_x,year,season,league_y,team name_y,avg_home_spi,avg_away_spi,avg_spi_score
0,Arsenal FC,ARS,Thomas,27,Defensive Midfield,Atlético Madrid,in,45.0,Premier League,2020,2020/2021,Premier League,Arsenal,78.847895,78.742105,78.79
1,Arsenal FC,ARS,Gabriel,22,Centre-Back,LOSC Lille,in,23.4,Premier League,2020,2020/2021,Premier League,Arsenal,78.847895,78.742105,78.79
2,Arsenal FC,ARS,Pablo Marí,26,Centre-Back,Flamengo,in,7.2,Premier League,2020,2020/2021,Premier League,Arsenal,78.847895,78.742105,78.79
3,Arsenal FC,ARS,Rúnar Alex Rúnarsson,25,Goalkeeper,Dijon,in,1.8,Premier League,2020,2020/2021,Premier League,Arsenal,78.847895,78.742105,78.79
4,Arsenal FC,ARS,Cédric Soares,28,Right-Back,Southampton,in,0.0,Premier League,2020,2020/2021,Premier League,Arsenal,78.847895,78.742105,78.79


In [15]:
spi_transfers.shape

(8455, 16)

In [16]:
display(spi_transfers.isna().sum())
# I have a good few null values in here, and I expect they are from missing data for Championship teams in 2016

spi_transfers[(spi_transfers.league_x == 'Championship') & (spi_transfers.year == 2016)].isna().sum()

team name_x              0
team                     0
player_name              0
age                      0
position                 0
club_involved_name       0
movement in/out          0
fee                      0
league_x                 0
year                     0
season                   0
league_y              1200
team name_y           1200
avg_home_spi          1200
avg_away_spi          1200
avg_spi_score         1200
dtype: int64

team name_x              0
team                     0
player_name              0
age                      0
position                 0
club_involved_name       0
movement in/out          0
fee                      0
league_x                 0
year                     0
season                   0
league_y              1200
team name_y           1200
avg_home_spi          1200
avg_away_spi          1200
avg_spi_score         1200
dtype: int64

In [17]:
# Since those missing values match, I'm going to filter this to just Premier League clubs.
spi_transfers = spi_transfers[spi_transfers.league_x == 'Premier League']
display(spi_transfers.head())
display(spi_transfers.shape)
spi_transfers.isna().sum()

Unnamed: 0,team name_x,team,player_name,age,position,club_involved_name,movement in/out,fee,league_x,year,season,league_y,team name_y,avg_home_spi,avg_away_spi,avg_spi_score
0,Arsenal FC,ARS,Thomas,27,Defensive Midfield,Atlético Madrid,in,45.0,Premier League,2020,2020/2021,Premier League,Arsenal,78.847895,78.742105,78.79
1,Arsenal FC,ARS,Gabriel,22,Centre-Back,LOSC Lille,in,23.4,Premier League,2020,2020/2021,Premier League,Arsenal,78.847895,78.742105,78.79
2,Arsenal FC,ARS,Pablo Marí,26,Centre-Back,Flamengo,in,7.2,Premier League,2020,2020/2021,Premier League,Arsenal,78.847895,78.742105,78.79
3,Arsenal FC,ARS,Rúnar Alex Rúnarsson,25,Goalkeeper,Dijon,in,1.8,Premier League,2020,2020/2021,Premier League,Arsenal,78.847895,78.742105,78.79
4,Arsenal FC,ARS,Cédric Soares,28,Right-Back,Southampton,in,0.0,Premier League,2020,2020/2021,Premier League,Arsenal,78.847895,78.742105,78.79


(3453, 16)

team name_x           0
team                  0
player_name           0
age                   0
position              0
club_involved_name    0
movement in/out       0
fee                   0
league_x              0
year                  0
season                0
league_y              0
team name_y           0
avg_home_spi          0
avg_away_spi          0
avg_spi_score         0
dtype: int64

Cleaning up the spi_transfers merge
 - I will clean up the new dataframe by dropping a few columns that popped up in the merge but aren't required
 - team name_x will be dropped, and team name_y will be kept/changed to 'team name'
 - Both 'league' columns will be dropped because I have filtered everything to Premier League clubs
 - season will be dropped because I have year
 - club_involved_name will be dropped because it isn't part of my analysis

In [18]:
spi_transfers.drop(columns=['team name_x', 'league_x','league_y', 'season', 'club_involved_name'], inplace=True)

In [19]:
spi_transfers.columns

Index(['team', 'player_name', 'age', 'position', 'movement in/out', 'fee',
       'year', 'team name_y', 'avg_home_spi', 'avg_away_spi', 'avg_spi_score'],
      dtype='object')

In [20]:
spi_transfers.columns = ['team', 'player name', 'age', 'position', 'movement in/out', 'fee', 
                         'year', 'team name', 'avg home spi', 'avg away spi', 'avg spi score']

In [21]:
display(spi_transfers.head())
display(spi_transfers.shape)
spi_transfers.isna().sum()

Unnamed: 0,team,player name,age,position,movement in/out,fee,year,team name,avg home spi,avg away spi,avg spi score
0,ARS,Thomas,27,Defensive Midfield,in,45.0,2020,Arsenal,78.847895,78.742105,78.79
1,ARS,Gabriel,22,Centre-Back,in,23.4,2020,Arsenal,78.847895,78.742105,78.79
2,ARS,Pablo Marí,26,Centre-Back,in,7.2,2020,Arsenal,78.847895,78.742105,78.79
3,ARS,Rúnar Alex Rúnarsson,25,Goalkeeper,in,1.8,2020,Arsenal,78.847895,78.742105,78.79
4,ARS,Cédric Soares,28,Right-Back,in,0.0,2020,Arsenal,78.847895,78.742105,78.79


(3453, 11)

team               0
player name        0
age                0
position           0
movement in/out    0
fee                0
year               0
team name          0
avg home spi       0
avg away spi       0
avg spi score      0
dtype: int64

In [22]:
# Reogranizing my columns
spi_transfers = spi_transfers[['year', 'team', 'team name', 'player name', 'age', 
                               'position', 'movement in/out', 'fee',
                               'avg home spi', 'avg away spi', 'avg spi score']]
spi_transfers.head()

Unnamed: 0,year,team,team name,player name,age,position,movement in/out,fee,avg home spi,avg away spi,avg spi score
0,2020,ARS,Arsenal,Thomas,27,Defensive Midfield,in,45.0,78.847895,78.742105,78.79
1,2020,ARS,Arsenal,Gabriel,22,Centre-Back,in,23.4,78.847895,78.742105,78.79
2,2020,ARS,Arsenal,Pablo Marí,26,Centre-Back,in,7.2,78.847895,78.742105,78.79
3,2020,ARS,Arsenal,Rúnar Alex Rúnarsson,25,Goalkeeper,in,1.8,78.847895,78.742105,78.79
4,2020,ARS,Arsenal,Cédric Soares,28,Right-Back,in,0.0,78.847895,78.742105,78.79


In [23]:
# Renaming the columns
spi_transfers.rename(columns={
    'year': 'Year', 'team': 'Team', 'team name': 'Team Name', 'player name': 'Player Name', 'age': 'Age',
    'position': 'Position', 'movement in/out': 'Movement In/Out', 'fee': 'Fee',
    'avg home spi': 'Average Home SPI Score', 'avg away spi': 'Average Away SPI Score',
    'avg spi score': 'Average Season SPI Score'}, inplace=True)
spi_transfers.head(3)

Unnamed: 0,Year,Team,Team Name,Player Name,Age,Position,Movement In/Out,Fee,Average Home SPI Score,Average Away SPI Score,Average Season SPI Score
0,2020,ARS,Arsenal,Thomas,27,Defensive Midfield,in,45.0,78.847895,78.742105,78.79
1,2020,ARS,Arsenal,Gabriel,22,Centre-Back,in,23.4,78.847895,78.742105,78.79
2,2020,ARS,Arsenal,Pablo Marí,26,Centre-Back,in,7.2,78.847895,78.742105,78.79


In [24]:
display(spi_transfers.shape)
spi_transfers.isna().sum()

(3453, 11)

Year                        0
Team                        0
Team Name                   0
Player Name                 0
Age                         0
Position                    0
Movement In/Out             0
Fee                         0
Average Home SPI Score      0
Average Away SPI Score      0
Average Season SPI Score    0
dtype: int64

##### What's happened so far:
 - I have a clean data frame of Premier League clubs organized by year
 - spi_transfers shows the players, ages, positions, movement and fees associated with the transfers between 2016 and 2020
 - It also shows the average home, away and season SPI scores that will help me build a model to determine the impact of transfers on a team's performance

### III. Creating Features for spi_transfers
Some questions I'd like to answer through feature creation
 - How many transfers in and out were there per team per year?
 - What is the total amount that each club spent per year?
 - What are the largest and smallest sums spent on a player per club per year?
 - What is the average fee spent per year?
 - Which positions were brought in the most per season per club?
 - Which position attracted the highest fee per club per season?
 
The first four questions can be answered by creating new features from the quantitative data in spi_transfers, 
but the position-specific questions will require more work. I think I'll try to assign the positions cat codes and see if I can work out the details from there!

In [25]:
# First, I'm going to replace some of the values from the position column and standardize things a bit
positions = spi_transfers['Position'].unique()
display(spi_transfers.Position.value_counts())
print(positions)

Centre-Forward        653
Centre-Back           540
Central Midfield      416
Goalkeeper            387
Right-Back            265
Right Winger          261
Left Winger           237
Defensive Midfield    208
Left-Back             201
Attacking Midfield    184
Left Midfield          54
Second Striker         27
Right Midfield         16
Midfielder              2
Forward                 2
Name: Position, dtype: int64

['Defensive Midfield' 'Centre-Back' 'Goalkeeper' 'Right-Back'
 'Right Winger' 'Attacking Midfield' 'Central Midfield' 'Centre-Forward'
 'Left Winger' 'Left-Back' 'Right Midfield' 'Left Midfield'
 'Second Striker' 'Midfielder' 'Forward']


In [26]:
new_position = {'Defensive Midfield': 'Defensive Midfielder',
                'Centre-Back': 'Centre-Back',
                'Goalkeeper': 'Goalkeeper',
                'Right-Back': 'Right-Back',
                'Right Winger': 'Right Winger',
                'Attacking Midfield': 'Attacking Midfielder',
                'Central Midfield': 'Attacking Midfielder',
                'Centre-Forward': 'Striker',
                'Left Winger': 'Left Winger',
                'Left-Back': 'Left-Back',
                'Right Midfield': 'Right Winger',
                'Left Midfield': 'Left Winger',
                'Second Striker': 'Striker',
                'Midfielder': 'Attacking Midfielder',
                'Forward': 'Striker'
               } 

In [27]:
spi_transfers['Position'] = spi_transfers['Position'].replace(new_position)
spi_transfers.Position.value_counts()
# This looks better; nine different positions, all of which can be assigned an integer I can use for analysis

Striker                 682
Attacking Midfielder    602
Centre-Back             540
Goalkeeper              387
Left Winger             291
Right Winger            277
Right-Back              265
Defensive Midfielder    208
Left-Back               201
Name: Position, dtype: int64

In [28]:
position_map = {
    'Goalkeeper': 1,
    'Left-Back': 2,
    'Right-Back': 3,
    'Centre-Back': 4,
    'Defensive Midfielder': 5,
    'Left Winger': 6,
    'Attacking Midfielder': 7,
    'Right Winger': 8,
    'Striker': 9
}
spi_transfers['Num Position'] = spi_transfers['Position'].replace(position_map)
spi_transfers['Num Position'].value_counts()

9    682
7    602
4    540
1    387
6    291
8    277
3    265
5    208
2    201
Name: Num Position, dtype: int64

In [29]:
# Creating a function to help me groupby and aggregate
def my_agg(x):
    features = {
        'Total Transfers In': x[x['Movement In/Out'] == 'in']['Fee'].count(),
        'Total Transfers Out': x[x['Movement In/Out'] == 'out']['Fee'].count(),
        'Total Spent (In)': x[x['Movement In/Out'] == 'in']['Fee'].sum(),
        'Total Earned (Out)': x[x['Movement In/Out'] == 'out']['Fee'].sum(),
        'Average Fee (In)': x[x['Movement In/Out'] == 'in']['Fee'].mean(),
        'Max Fee Spent': x[x['Movement In/Out'] == 'in']['Fee'].max(),
        'Min Fee Spent': x[x['Movement In/Out'] == 'out']['Fee'].max(),
        'Average Home SPI Score': x['Average Home SPI Score'].mean(),
        'Average Away SPI Score': x['Average Away SPI Score'].mean(),
        'Average Season SPI Score': x['Average Season SPI Score'].mean(),
        'Position - Most Purchased': x[x['Movement In/Out'] == 'in']['Num Position'].count()
    }
    
    return pd.Series(features)

In [30]:
df = spi_transfers.groupby(['Year', 'Team', 'Team Name'], as_index=False).apply(my_agg)

In [31]:
df.head(10)

Unnamed: 0,Year,Team,Team Name,Total Transfers In,Total Transfers Out,Total Spent (In),Total Earned (Out),Average Fee (In),Max Fee Spent,Min Fee Spent,Average Home SPI Score,Average Away SPI Score,Average Season SPI Score,Position - Most Purchased
0,2016,ARS,Arsenal,12.0,12.0,101.736,9.32,8.478,40.5,4.5,82.174211,81.785789,81.98,12.0
1,2016,BOU,AFC Bournemouth,19.0,26.0,36.626,22.941,1.927684,16.2,10.8,60.945789,60.804211,60.88,19.0
2,2016,BUR,Burnley,26.0,25.0,41.04,1.08,1.578462,13.59,1.08,57.487368,57.947895,57.72,26.0
3,2016,CHE,Chelsea,27.0,30.0,119.52,97.56,4.426667,35.1,54.0,84.546842,84.528421,84.54,27.0
4,2016,CRY,Crystal Palace,22.0,25.0,91.17,45.18,4.144091,28.08,26.01,58.646842,58.364737,58.51,22.0
5,2016,EVE,Everton,18.0,20.0,77.31,54.72,4.295,26.01,50.04,69.925789,70.190526,70.06,18.0
6,2016,HUL,Hull City,25.0,16.0,36.0,26.01,1.44,13.86,10.8,47.56,47.643684,47.6,25.0
7,2016,LEI,Leicester City,17.0,18.0,82.44,59.45,4.849412,27.45,32.22,62.715789,62.918947,62.82,17.0
8,2016,LIV,Liverpool,19.0,19.0,71.91,76.842,3.784737,37.08,28.08,81.343684,80.923158,81.13,19.0
9,2016,MID,Middlesbrough,18.0,20.0,43.16,12.15,2.397778,9.45,5.31,52.787368,53.230526,53.01,18.0


In [32]:
display(df.shape)
df.isna().sum()

(100, 14)

Year                         0
Team                         0
Team Name                    0
Total Transfers In           0
Total Transfers Out          0
Total Spent (In)             0
Total Earned (Out)           0
Average Fee (In)             0
Max Fee Spent                0
Min Fee Spent                0
Average Home SPI Score       0
Average Away SPI Score       0
Average Season SPI Score     0
Position - Most Purchased    0
dtype: int64

In [33]:
df.dtypes

Year                           int64
Team                          object
Team Name                     object
Total Transfers In           float64
Total Transfers Out          float64
Total Spent (In)             float64
Total Earned (Out)           float64
Average Fee (In)             float64
Max Fee Spent                float64
Min Fee Spent                float64
Average Home SPI Score       float64
Average Away SPI Score       float64
Average Season SPI Score     float64
Position - Most Purchased    float64
dtype: object

In [34]:
df['Year'] = pd.to_datetime(df['Year'], format='%Y')
df.describe()

Unnamed: 0,Total Transfers In,Total Transfers Out,Total Spent (In),Total Earned (Out),Average Fee (In),Max Fee Spent,Min Fee Spent,Average Home SPI Score,Average Away SPI Score,Average Season SPI Score,Position - Most Purchased
count,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0,100.0
mean,17.19,17.34,78.3107,37.19676,4.834574,29.3411,22.00693,70.112568,70.062268,70.0876,17.19
std,6.734046,6.996709,54.703505,37.192549,3.445289,19.676078,23.232908,11.739065,11.743365,11.740474,6.734046
min,4.0,3.0,0.0,0.0,0.0,0.0,0.0,47.467895,47.195263,47.33,4.0
25%,12.0,12.75,40.905,8.79,2.289671,15.3,5.1975,60.850395,60.366842,60.555,12.0
50%,16.0,17.0,66.715,25.8975,3.960481,22.95,14.67,69.067368,68.970263,69.02,16.0
75%,21.25,21.25,101.2065,52.8975,6.547147,37.71,30.195,80.378684,80.529474,80.52,21.25
max,39.0,45.0,285.75,180.54,18.5,94.5,130.5,94.928947,94.827895,94.88,39.0
