# 410_prep_RQ2_Dataframe
## Purpose 
In this notebook we prepare a dataframe for our Research Question 2.  
## Datasets 
* _Input_: Joined1617.csv,Joined1516.csv,Joined1415.csv,Joined1314.csv,Joined1213.csv
* _Output_: RQ2.csv

In [1]:
import math
import os.path
import numpy as np
import pandas as pd

#### Reading in our cleaned Joined datasets from 16-17 to 12-13.

In [2]:
J16_17 = pd.read_csv("../../data/prep/Joined1617.csv")
J15_16 = pd.read_csv("../../data/prep/Joined1516.csv")
J14_15 = pd.read_csv("../../data/prep/Joined1415.csv")
J13_14 = pd.read_csv("../../data/prep/Joined1314.csv")
J12_13 = pd.read_csv("../../data/prep/Joined1213.csv")

In [3]:
index = ['Players','Position','league','age','nationality','photo','overall','skill_moves','pac','def','pas','dri','eur_value','eur_wage']
fantasyinfo = ['Apps','Points','Start','Off','Goals','A','CS']

In [4]:
s1 = J16_17.drop_duplicates(index).set_index(index)[fantasyinfo]
s2 = J15_16.drop_duplicates(index).set_index(index)[fantasyinfo]
s3 = J14_15.drop_duplicates(index).set_index(index)[fantasyinfo]
s4 = J13_14.drop_duplicates(index).set_index(index)[fantasyinfo]
s5 = J12_13.drop_duplicates(index).set_index(index)[fantasyinfo]

RQ2 = pd.concat([s1,s2,s3,s4,s5], axis=1, keys=('16/17','15/16','14/15','13/14','12/13')).fillna(0).astype(float).reset_index()

In [5]:
RQ2['Homegrown'] = (RQ2['nationality'] =='England') & (RQ2['league'] == 'English Premier League') |(RQ2['nationality'] =='Germany') & (RQ2['league'] == 'German Bundesliga')|(RQ2['nationality'] =='Spain') & (RQ2['league'] == 'Spanish Primera Division')|(RQ2['nationality'] =='France') & (RQ2['league'] == 'French Ligue 1')|(RQ2['nationality'] =='Italy') & (RQ2['league'] == 'Italian Serie A')

In [6]:
RQ2['Abroad'] = (RQ2['nationality'] =='England') & (RQ2['league'].isin(['German Bundesliga','Spanish Primera Division','French Ligue 1','Italian Serie A'])) | (RQ2['nationality'] =='Germany') & (RQ2['league'].isin(['English Premier League','Spanish Primera Division','French Ligue 1','Italian Serie A']))|(RQ2['nationality'] =='Spain') & (RQ2['league'].isin(['English Premier League','German Bundesliga','French Ligue 1','Italian Serie A']))|(RQ2['nationality'] =='France') & (RQ2['league'].isin(['English Premier League','German Bundesliga','Spanish Primera Division','Italian Serie A']))|(RQ2['nationality'] =='Italy') & (RQ2['league'].isin(['English Premier League','German Bundesliga','Spanish Primera Division','French Ligue 1']))

In [7]:
RQ2['Overall Apps'] = RQ2['16/17']['Apps']+RQ2['15/16']['Apps']+RQ2['14/15']['Apps']+RQ2['13/14']['Apps']+RQ2['12/13']['Apps']

In [8]:
RQ2['Starts/Apps'] = (RQ2['16/17']['Start']+RQ2['15/16']['Start']+RQ2['14/15']['Start']+RQ2['13/14']['Start']+RQ2['12/13']['Start'])/RQ2['Overall Apps']

In [9]:
RQ2['Subbed Off/Apps'] = (RQ2['16/17']['Off']+RQ2['15/16']['Off']+RQ2['14/15']['Off']+RQ2['13/14']['Off']+RQ2['12/13']['Off'])/RQ2['Overall Apps']

In [10]:
RQ2['Average Form'] = (RQ2['16/17']['Points']+RQ2['15/16']['Points']+RQ2['14/15']['Points']+RQ2['13/14']['Points']+RQ2['12/13']['Points'])/RQ2['Overall Apps']

In [11]:
RQ2['Goals/Apps'] = (RQ2['16/17']['Goals']+RQ2['15/16']['Goals']+RQ2['14/15']['Goals']+RQ2['13/14']['Goals']+RQ2['12/13']['Goals'])/RQ2['Overall Apps']

In [12]:
RQ2['Assists/Apps'] = (RQ2['16/17']['A']+RQ2['15/16']['A']+RQ2['14/15']['A']+RQ2['13/14']['A']+RQ2['12/13']['A'])/RQ2['Overall Apps']

In [13]:
RQ2['Clean Sheets/Apps'] = (RQ2['16/17']['CS']+RQ2['15/16']['CS']+RQ2['14/15']['CS']+RQ2['13/14']['CS']+RQ2['12/13']['CS'])/RQ2['Overall Apps']

In [14]:
RQ2.loc[RQ2['Starts/Apps']<0,'Starts/Apps Ratio']=0
RQ2.loc[RQ2['Subbed Off/Apps']<0,'Subbed Off/Apps Ratio']=0
RQ2.loc[RQ2['Average Form']<0,'Average Form']=0
RQ2.loc[RQ2['Goals/Apps']<0,'Goals/Apps']=0
RQ2.loc[RQ2['Assists/Apps']<0,'Assists/Apps']=0
RQ2.loc[RQ2['Clean Sheets/Apps']<0,'Cleen Sheets/Apps']=0

### Filtering

In [15]:
RQ2= RQ2[RQ2['Average Form']<10000]

In [16]:
RQ2 = RQ2[RQ2['Overall Apps']>=0]

### Tidying Up

In [17]:
RQ2 = RQ2.sort_values('Average Form', ascending=False).reset_index().drop('index',1)

  """Entry point for launching an IPython kernel.


In [18]:
list(RQ2)

[('Players', ''),
 ('Position', ''),
 ('league', ''),
 ('age', ''),
 ('nationality', ''),
 ('photo', ''),
 ('overall', ''),
 ('skill_moves', ''),
 ('pac', ''),
 ('def', ''),
 ('pas', ''),
 ('dri', ''),
 ('eur_value', ''),
 ('eur_wage', ''),
 ('16/17', 'Apps'),
 ('16/17', 'Points'),
 ('16/17', 'Start'),
 ('16/17', 'Off'),
 ('16/17', 'Goals'),
 ('16/17', 'A'),
 ('16/17', 'CS'),
 ('15/16', 'Apps'),
 ('15/16', 'Points'),
 ('15/16', 'Start'),
 ('15/16', 'Off'),
 ('15/16', 'Goals'),
 ('15/16', 'A'),
 ('15/16', 'CS'),
 ('14/15', 'Apps'),
 ('14/15', 'Points'),
 ('14/15', 'Start'),
 ('14/15', 'Off'),
 ('14/15', 'Goals'),
 ('14/15', 'A'),
 ('14/15', 'CS'),
 ('13/14', 'Apps'),
 ('13/14', 'Points'),
 ('13/14', 'Start'),
 ('13/14', 'Off'),
 ('13/14', 'Goals'),
 ('13/14', 'A'),
 ('13/14', 'CS'),
 ('12/13', 'Apps'),
 ('12/13', 'Points'),
 ('12/13', 'Start'),
 ('12/13', 'Off'),
 ('12/13', 'Goals'),
 ('12/13', 'A'),
 ('12/13', 'CS'),
 ('Homegrown', ''),
 ('Abroad', ''),
 ('Overall Apps', ''),
 ('Starts

In [19]:
RQ2 = RQ2[['Players','Position','league','age','nationality','overall','photo','skill_moves','pac','def','pas','dri','eur_value','eur_wage','Homegrown','Abroad','Starts/Apps','Subbed Off/Apps','Overall Apps','Average Form','Goals/Apps','Assists/Apps','Clean Sheets/Apps']]
RQ2

Unnamed: 0_level_0,Players,Position,league,age,nationality,overall,photo,skill_moves,pac,def,...,eur_wage,Homegrown,Abroad,Starts/Apps,Subbed Off/Apps,Overall Apps,Average Form,Goals/Apps,Assists/Apps,Clean Sheets/Apps
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,...,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,Lionel Messi,Forward,Spanish Primera Division,30,Argentina,93,https://cdn.sofifa.org/18/players/158023.png,4,89,26,...,565000.0,False,False,0.934524,0.059524,168.0,8.255952,1.065476,0.386905,0.000000
1,Cristiano Ronaldo,Forward,Spanish Primera Division,32,Portugal,94,https://cdn.sofifa.org/18/players/20801.png,5,90,33,...,565000.0,False,False,0.975610,0.109756,164.0,7.926829,1.054878,0.317073,0.000000
2,Luis Suarez,Forward,Spanish Primera Division,30,Uruguay,92,https://cdn.sofifa.org/18/players/176580.png,4,82,42,...,510000.0,False,False,0.975758,0.151515,165.0,7.018182,0.836364,0.363636,0.000000
3,Denis Bouanga,Midfielder,French Ligue 2,22,Gabon,71,https://cdn.sofifa.org/18/players/225951.png,3,77,22,...,6000.0,False,False,1.000000,1.000000,1.0,7.000000,1.000000,0.000000,0.000000
4,Josh Harrop,Midfielder,English Championship,21,England,65,https://cdn.sofifa.org/18/players/213692.png,4,72,37,...,4000.0,False,False,1.000000,0.000000,1.0,7.000000,1.000000,0.000000,0.000000
5,Dylan Saint-Louis,Midfielder,French Ligue 2,22,France,68,https://cdn.sofifa.org/18/players/227547.png,3,78,32,...,5000.0,False,False,1.000000,1.000000,1.0,7.000000,1.000000,0.000000,0.000000
6,Luca Crosta,Goalkeeper,Italian Serie A,19,Italy,64,https://cdn.sofifa.org/18/players/238847.png,1,63,24,...,4000.0,True,False,1.000000,0.000000,1.0,7.000000,0.000000,0.000000,0.000000
7,Zlatan Ibrahimovic,Forward,English Premier League,35,Sweden,88,https://cdn.sofifa.org/18/players/41236.png,4,65,32,...,240000.0,False,False,0.960000,0.093333,150.0,6.900000,0.866667,0.286667,0.000000
8,Gabriel Jesus,Forward,English Premier League,20,Brazil,81,https://cdn.sofifa.org/18/players/230666.png,4,87,26,...,115000.0,False,False,0.800000,0.300000,10.0,6.300000,0.700000,0.400000,0.000000
9,Robert Lewandowski,Forward,German Bundesliga,28,Poland,91,https://cdn.sofifa.org/18/players/188545.png,3,81,38,...,355000.0,False,False,0.925000,0.150000,160.0,6.050000,0.756250,0.150000,0.000000


In [20]:
RQ2.to_csv('../../data/analysis/RQ2.csv')

In [21]:
RQ2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3455 entries, 0 to 3454
Data columns (total 23 columns):
(Players, )              3455 non-null object
(Position, )             3455 non-null object
(league, )               3455 non-null object
(age, )                  3455 non-null int64
(nationality, )          3455 non-null object
(overall, )              3455 non-null int64
(photo, )                3455 non-null object
(skill_moves, )          3455 non-null int64
(pac, )                  3455 non-null int64
(def, )                  3455 non-null int64
(pas, )                  3455 non-null int64
(dri, )                  3455 non-null int64
(eur_value, )            3455 non-null float64
(eur_wage, )             3455 non-null float64
(Homegrown, )            3455 non-null bool
(Abroad, )               3455 non-null bool
(Starts/Apps, )          3455 non-null float64
(Subbed Off/Apps, )      3455 non-null float64
(Overall Apps, )         3455 non-null float64
(Average Form, )         