# API Cleaning

In [1]:
import pandas as pd
import numpy as np
import unicodedata

## Fixture Gameweek Data Cleaning

In [2]:
df=pd.read_csv('fixture_gw/fixture_gw1.csv')

In [3]:
df.head()

Unnamed: 0.1,Unnamed: 0,code,event,finished,finished_provisional,id,kickoff_time,minutes,provisional_start_time,started,team_a,team_a_score,team_h,team_h_score,stats,team_h_difficulty,team_a_difficulty
0,0,1059702,1,True,True,1,2019-08-09T19:00:00Z,90,False,True,14,1,10,4,"[{'identifier': 'goals_scored', 'a': [{'value'...",2,5
1,1,1059709,1,True,True,8,2019-08-10T11:30:00Z,90,False,True,11,5,19,0,"[{'identifier': 'goals_scored', 'a': [{'value'...",4,2
2,2,1059703,1,True,True,2,2019-08-10T14:00:00Z,90,False,True,15,1,3,1,"[{'identifier': 'goals_scored', 'a': [{'value'...",3,3
3,3,1059704,1,True,True,3,2019-08-10T14:00:00Z,90,False,True,16,0,5,3,"[{'identifier': 'goals_scored', 'a': [], 'h': ...",2,3
4,4,1059705,1,True,True,4,2019-08-10T14:00:00Z,90,False,True,8,0,7,0,"[{'identifier': 'goals_scored', 'a': [], 'h': ...",2,2


In [4]:
df.set_index('id',inplace=True)

Fixture gw already has an id feature, so this is used. 

In [5]:
df.columns

Index(['Unnamed: 0', 'code', 'event', 'finished', 'finished_provisional',
       'kickoff_time', 'minutes', 'provisional_start_time', 'started',
       'team_a', 'team_a_score', 'team_h', 'team_h_score', 'stats',
       'team_h_difficulty', 'team_a_difficulty'],
      dtype='object')

In [6]:
df.drop(columns=['finished','finished_provisional','minutes','provisional_start_time','started','stats','Unnamed: 0'],inplace=True)

The removed columns have values that are not important to analysis. finished, finished provisional, minutes, provisional start time, started all have the same value for each entry and are not important to analysis. Stats is a feature used by the website to display the other features and is thus not important to analysis. Unnamed: 0 is unneeded data that was created due to the way in which data was gathered.

In [7]:
df.dtypes

code                  int64
event                 int64
kickoff_time         object
team_a                int64
team_a_score          int64
team_h                int64
team_h_score          int64
team_h_difficulty     int64
team_a_difficulty     int64
dtype: object

In [8]:
for i in range(1,28):
    df=pd.read_csv('C:/Users/nath1/Documents/CS/105/project/fixture_gw/fixture_gw'+str(i)+'.csv')
    df.set_index('id',inplace=True)
    df.drop(columns=['finished','finished_provisional','minutes','provisional_start_time','started','stats','Unnamed: 0'],inplace=True)
    df.to_csv('C:/Users/nath1/Documents/CS/105/project/fixture_gw_clean/fixture_gw_clean'+str(i)+'.csv')

The above cleaning processes were executed on all fixture_gw files

In [9]:
df_all=pd.read_csv('C:/Users/nath1/Documents/CS/105/project/fixture_gw_clean/fixture_gw_clean1.csv')
for i in range(2,28):
    df_temp=pd.read_csv('C:/Users/nath1/Documents/CS/105/project/fixture_gw_clean/fixture_gw_clean'+str(i)+'.csv')
    df_all=pd.concat([df_all,df_temp],axis=0)
df_all.to_csv('C:/Users/nath1/Documents/CS/105/project/fixture_gw_clean/fixture_gw_clean_all.csv')

All fixture data concatenated into one CSV file.

## Player Gameweek Data Cleaning

In [10]:
df2=pd.read_csv('player_gw/player_gw1.csv')

In [11]:
df2.head()

Unnamed: 0.1,Unnamed: 0,id,explain,stats.minutes,stats.goals_scored,stats.assists,stats.clean_sheets,stats.goals_conceded,stats.own_goals,stats.penalties_saved,...,stats.red_cards,stats.saves,stats.bonus,stats.bps,stats.influence,stats.creativity,stats.threat,stats.ict_index,stats.total_points,stats.in_dreamteam
0,0,14,"[{'fixture': 10, 'stats': [{'identifier': 'min...",90,0,0,1,0,0,0,...,0,2,1,27,18.0,0.0,0.0,1.8,7,False
1,1,27,"[{'fixture': 6, 'stats': [{'identifier': 'minu...",0,0,0,0,0,0,0,...,0,0,0,0,0.0,0.0,0.0,0.0,0,False
2,2,47,"[{'fixture': 7, 'stats': [{'identifier': 'minu...",90,0,0,1,0,0,0,...,0,3,2,29,25.4,0.0,0.0,2.5,9,False
3,3,48,"[{'fixture': 7, 'stats': [{'identifier': 'minu...",0,0,0,0,0,0,0,...,0,0,0,0,0.0,0.0,0.0,0.0,0,False
4,4,72,"[{'fixture': 2, 'stats': [{'identifier': 'minu...",0,0,0,0,0,0,0,...,0,0,0,0,0.0,0.0,0.0,0.0,0,False


In [12]:
df2['explain']=df2['explain'].str[12:]

In [13]:
df2['explain']=df2['explain'].str[:3]

In [14]:
df2['explain']=df2['explain'].str.strip(', \'')

In [15]:
df2.rename(columns={'explain':'fixture_id'},inplace=True)

In [16]:
df2['fixture_id']=df2['fixture_id'].astype(int)

Player gw data 'explain' column carries a lot of unneeded information that is for FPL website use. Above code modifies it to extract the useful data (fixture id) and rename the column as such.

In [17]:
df2.columns

Index(['Unnamed: 0', 'id', 'fixture_id', 'stats.minutes', 'stats.goals_scored',
       'stats.assists', 'stats.clean_sheets', 'stats.goals_conceded',
       'stats.own_goals', 'stats.penalties_saved', 'stats.penalties_missed',
       'stats.yellow_cards', 'stats.red_cards', 'stats.saves', 'stats.bonus',
       'stats.bps', 'stats.influence', 'stats.creativity', 'stats.threat',
       'stats.ict_index', 'stats.total_points', 'stats.in_dreamteam'],
      dtype='object')

In [18]:
df2=df2.drop(columns=['Unnamed: 0'])

Unnamed: 0 is unneeded data that was created due to the way in which data was gathered/saved.

In [19]:
df2.set_index('id',inplace=True)

In [20]:
df2.head()

Unnamed: 0_level_0,fixture_id,stats.minutes,stats.goals_scored,stats.assists,stats.clean_sheets,stats.goals_conceded,stats.own_goals,stats.penalties_saved,stats.penalties_missed,stats.yellow_cards,stats.red_cards,stats.saves,stats.bonus,stats.bps,stats.influence,stats.creativity,stats.threat,stats.ict_index,stats.total_points,stats.in_dreamteam
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
14,10,90,0,0,1,0,0,0,0,0,0,2,1,27,18.0,0.0,0.0,1.8,7,False
27,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,False
47,7,90,0,0,1,0,0,0,0,0,0,3,2,29,25.4,0.0,0.0,2.5,9,False
48,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,False
72,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0,False


In [21]:
df2.dtypes

fixture_id                  int32
stats.minutes               int64
stats.goals_scored          int64
stats.assists               int64
stats.clean_sheets          int64
stats.goals_conceded        int64
stats.own_goals             int64
stats.penalties_saved       int64
stats.penalties_missed      int64
stats.yellow_cards          int64
stats.red_cards             int64
stats.saves                 int64
stats.bonus                 int64
stats.bps                   int64
stats.influence           float64
stats.creativity          float64
stats.threat              float64
stats.ict_index           float64
stats.total_points          int64
stats.in_dreamteam           bool
dtype: object

In [22]:
for i in range(1,28):
    df=pd.read_csv('C:/Users/nath1/Documents/CS/105/project/player_gw/player_gw'+str(i)+'.csv')
    df.set_index('id',inplace=True)
    df=df.drop(columns=['Unnamed: 0'])
    df['explain']=df['explain'].str[13:]
    df['explain']=df['explain'].str[:4]
    df['explain']=df['explain'].str.strip(', \'')
    df.rename(columns={'explain':'fixture_id'},inplace=True)
    df=df[df['fixture_id']!='']
    df['fixture_id']=df['fixture_id'].astype(int)
    df['gw']=i
    df.to_csv('C:/Users/nath1/Documents/CS/105/project/player_gw_clean/player_gw_clean'+str(i)+'.csv')

Executes above cleaning process to all player gw files. Also removes players who did not play for a given gameweek (blank gameweek). Above code block also adds a feature that shows what gameweek the player's stats are from. 

In [23]:
df_all=pd.read_csv('C:/Users/nath1/Documents/CS/105/project/player_gw_clean/player_gw_clean1.csv')
for i in range(2,28):
    df_temp=pd.read_csv('C:/Users/nath1/Documents/CS/105/project/player_gw_clean/player_gw_clean'+str(i)+'.csv')
    df_all=pd.concat([df_all,df_temp],axis=0)
df_all.to_csv('C:/Users/nath1/Documents/CS/105/project/player_gw_clean/player_gw_clean_all.csv')

Concatenates all player_gw files into one csv file.

## Teams Data Cleaning

In [24]:
df3=pd.read_csv('data/teams.csv')

In [25]:
df3.head()

Unnamed: 0.1,Unnamed: 0,code,draw,form,id,loss,name,played,points,position,...,strength,team_division,unavailable,win,strength_overall_home,strength_overall_away,strength_attack_home,strength_attack_away,strength_defence_home,strength_defence_away
0,0,3,0,,1,0,Arsenal,0,0,0,...,4,,False,0,1230,1270,1150,1190,1280,1330
1,1,7,0,,2,0,Aston Villa,0,0,0,...,2,,False,0,1040,1080,1030,1060,1030,1050
2,2,91,0,,3,0,Bournemouth,0,0,0,...,3,,False,0,1090,1120,1030,1070,1110,1180
3,3,36,0,,4,0,Brighton,0,0,0,...,3,,False,0,1050,1120,1040,1160,1030,1130
4,4,90,0,,5,0,Burnley,0,0,0,...,3,,False,0,1050,1110,1060,1130,1050,1050


In [26]:
df3.set_index('id',inplace=True)

Uses unique identifier already given in data.

In [27]:
df3.columns

Index(['Unnamed: 0', 'code', 'draw', 'form', 'loss', 'name', 'played',
       'points', 'position', 'short_name', 'strength', 'team_division',
       'unavailable', 'win', 'strength_overall_home', 'strength_overall_away',
       'strength_attack_home', 'strength_attack_away', 'strength_defence_home',
       'strength_defence_away'],
      dtype='object')

In [28]:
df3=df3.drop(columns=['Unnamed: 0','draw', 'form','loss','played','points', 'position','team_division','unavailable', 'win'])

Removes features that are not valuable to analysis. Unnamed: 0 is irrelevant data, as before. Draw, form, loss, played, points, position, team division, unavailable and win all are either null,NaN or irrelevant.

In [29]:
df3.head()

Unnamed: 0_level_0,code,name,short_name,strength,strength_overall_home,strength_overall_away,strength_attack_home,strength_attack_away,strength_defence_home,strength_defence_away
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1,3,Arsenal,ARS,4,1230,1270,1150,1190,1280,1330
2,7,Aston Villa,AVL,2,1040,1080,1030,1060,1030,1050
3,91,Bournemouth,BOU,3,1090,1120,1030,1070,1110,1180
4,36,Brighton,BHA,3,1050,1120,1040,1160,1030,1130
5,90,Burnley,BUR,3,1050,1110,1060,1130,1050,1050


In [30]:
df3.dtypes

code                      int64
name                     object
short_name               object
strength                  int64
strength_overall_home     int64
strength_overall_away     int64
strength_attack_home      int64
strength_attack_away      int64
strength_defence_home     int64
strength_defence_away     int64
dtype: object

In [31]:
df3.to_csv('C:/Users/nath1/Documents/CS/105/project/data_clean/teams_clean.csv')

Cleaned data file saved.

## Events (Gameweeks) Data Cleaning

In [32]:
df4=pd.read_csv('data/events.csv')

In [33]:
df4.set_index('id',inplace=True)

Using given unique identifier

In [34]:
df4.head()

Unnamed: 0_level_0,Unnamed: 0,name,deadline_time,average_entry_score,finished,data_checked,highest_scoring_entry,deadline_time_epoch,deadline_time_game_offset,highest_score,...,chip_plays,most_selected,most_transferred_in,top_element,transfers_made,most_captained,most_vice_captained,top_element_info.id,top_element_info.points,top_element_info
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0,Gameweek 1,2019-08-09T18:00:00Z,65,True,False,3493085.0,1565373600,0,142.0,...,"[{'chip_name': 'bboost', 'num_played': 128770}...",183.0,1.0,214.0,0,191.0,189.0,214.0,20.0,
2,1,Gameweek 2,2019-08-17T10:30:00Z,41,True,False,6106693.0,1566037800,0,119.0,...,"[{'chip_name': 'bboost', 'num_played': 114585}...",183.0,235.0,278.0,9998839,191.0,214.0,278.0,17.0,
3,2,Gameweek 3,2019-08-23T18:00:00Z,44,True,False,5087809.0,1566583200,0,107.0,...,"[{'chip_name': 'bboost', 'num_played': 79958},...",183.0,278.0,191.0,13387638,214.0,191.0,191.0,15.0,
4,3,Gameweek 4,2019-08-31T10:30:00Z,57,True,False,6372519.0,1567247400,0,136.0,...,"[{'chip_name': 'bboost', 'num_played': 72741},...",183.0,278.0,166.0,13215999,214.0,191.0,166.0,16.0,
5,4,Gameweek 5,2019-09-14T10:30:00Z,52,True,False,6223449.0,1568457000,0,128.0,...,"[{'chip_name': 'bboost', 'num_played': 58835},...",183.0,243.0,342.0,12363678,191.0,214.0,342.0,16.0,


In [35]:
df4.columns

Index(['Unnamed: 0', 'name', 'deadline_time', 'average_entry_score',
       'finished', 'data_checked', 'highest_scoring_entry',
       'deadline_time_epoch', 'deadline_time_game_offset', 'highest_score',
       'is_previous', 'is_current', 'is_next', 'chip_plays', 'most_selected',
       'most_transferred_in', 'top_element', 'transfers_made',
       'most_captained', 'most_vice_captained', 'top_element_info.id',
       'top_element_info.points', 'top_element_info'],
      dtype='object')

In [36]:
df4.drop(columns=['Unnamed: 0','data_checked', 'highest_scoring_entry','deadline_time_game_offset','is_previous', 'is_current', 'is_next', 'top_element_info'],inplace=True)

Unnamed: 0 is again irrelevant data. Data checked, highest scoring entry, deadline time game offset, is previous, is current is next and top element info are all features that are primarily for internal FPL website use and are not important to analysis.

In [37]:
df4.head()

Unnamed: 0_level_0,name,deadline_time,average_entry_score,finished,deadline_time_epoch,highest_score,chip_plays,most_selected,most_transferred_in,top_element,transfers_made,most_captained,most_vice_captained,top_element_info.id,top_element_info.points
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
1,Gameweek 1,2019-08-09T18:00:00Z,65,True,1565373600,142.0,"[{'chip_name': 'bboost', 'num_played': 128770}...",183.0,1.0,214.0,0,191.0,189.0,214.0,20.0
2,Gameweek 2,2019-08-17T10:30:00Z,41,True,1566037800,119.0,"[{'chip_name': 'bboost', 'num_played': 114585}...",183.0,235.0,278.0,9998839,191.0,214.0,278.0,17.0
3,Gameweek 3,2019-08-23T18:00:00Z,44,True,1566583200,107.0,"[{'chip_name': 'bboost', 'num_played': 79958},...",183.0,278.0,191.0,13387638,214.0,191.0,191.0,15.0
4,Gameweek 4,2019-08-31T10:30:00Z,57,True,1567247400,136.0,"[{'chip_name': 'bboost', 'num_played': 72741},...",183.0,278.0,166.0,13215999,214.0,191.0,166.0,16.0
5,Gameweek 5,2019-09-14T10:30:00Z,52,True,1568457000,128.0,"[{'chip_name': 'bboost', 'num_played': 58835},...",183.0,243.0,342.0,12363678,191.0,214.0,342.0,16.0


In [38]:
df4.dropna(inplace=True)

Drops gameweek entries for gameweeks that have not been played yet (28 and above)

In [39]:
df4.dtypes

name                        object
deadline_time               object
average_entry_score          int64
finished                      bool
deadline_time_epoch          int64
highest_score              float64
chip_plays                  object
most_selected              float64
most_transferred_in        float64
top_element                float64
transfers_made               int64
most_captained             float64
most_vice_captained        float64
top_element_info.id        float64
top_element_info.points    float64
dtype: object

In [40]:
df4['most_selected']=df4['most_selected'].astype(int)
df4['most_transferred_in']=df4['most_transferred_in'].astype(int)
df4['top_element']=df4['top_element'].astype(int)
df4['most_captained']=df4['most_captained'].astype(int)
df4['most_vice_captained']=df4['most_vice_captained'].astype(int)
df4['top_element_info.id']=df4['top_element_info.id'].astype(int)

corrects data to int data type

In [41]:
df4.to_csv('C:/Users/nath1/Documents/CS/105/project/data_clean/events_clean.csv')

Saves cleaned events data file

## Elements (Players) Data Cleaning

In [42]:
df5=pd.read_csv('data/elements.csv')

In [43]:
df5.head()

Unnamed: 0.1,Unnamed: 0,chance_of_playing_next_round,chance_of_playing_this_round,code,cost_change_event,cost_change_event_fall,cost_change_start,cost_change_start_fall,dreamteam_count,element_type,...,penalties_missed,yellow_cards,red_cards,saves,bonus,bps,influence,creativity,threat,ict_index
0,0,100.0,100.0,69140,0,0,-4,4,0,2,...,0,0,0,0,1,144,174.2,15.4,107.0,29.8
1,1,100.0,100.0,98745,0,0,-1,1,0,2,...,0,2,0,0,3,113,116.6,37.7,38.0,19.4
2,2,0.0,100.0,111457,0,0,-3,3,0,2,...,0,3,0,0,1,222,190.0,157.3,67.0,41.2
3,3,100.0,100.0,154043,-1,1,-5,5,1,2,...,0,3,1,0,3,216,261.8,155.4,37.0,44.9
4,4,100.0,75.0,39476,-1,1,-2,2,1,2,...,0,5,0,0,5,286,431.6,31.8,106.0,57.1


In [44]:
df5.columns

Index(['Unnamed: 0', 'chance_of_playing_next_round',
       'chance_of_playing_this_round', 'code', 'cost_change_event',
       'cost_change_event_fall', 'cost_change_start', 'cost_change_start_fall',
       'dreamteam_count', 'element_type', 'ep_next', 'ep_this', 'event_points',
       'first_name', 'form', 'id', 'in_dreamteam', 'news', 'news_added',
       'now_cost', 'photo', 'points_per_game', 'second_name',
       'selected_by_percent', 'special', 'squad_number', 'status', 'team',
       'team_code', 'total_points', 'transfers_in', 'transfers_in_event',
       'transfers_out', 'transfers_out_event', 'value_form', 'value_season',
       'web_name', 'minutes', 'goals_scored', 'assists', 'clean_sheets',
       'goals_conceded', 'own_goals', 'penalties_saved', 'penalties_missed',
       'yellow_cards', 'red_cards', 'saves', 'bonus', 'bps', 'influence',
       'creativity', 'threat', 'ict_index'],
      dtype='object')

In [45]:
df5.drop(columns=['Unnamed: 0','chance_of_playing_next_round','chance_of_playing_this_round','cost_change_event','cost_change_event_fall', 'cost_change_start','cost_change_start_fall','dreamteam_count','ep_next', 'ep_this','in_dreamteam','news', 'news_added','photo','special', 'squad_number'], inplace=True)

Drops irrelevant data from elements dataframe. Most of these features are for internal use of FPL website and not important to data analysis. chance_of_playing_next_round and chance_of_playing_this_round would be useful, however there is no way to call the api for historical data of this feature, so this data is dropped as there are not enough data points to generate a significant chance of playing predictive model.

In [46]:
df5.head()

Unnamed: 0,code,element_type,event_points,first_name,form,id,now_cost,points_per_game,second_name,selected_by_percent,...,penalties_missed,yellow_cards,red_cards,saves,bonus,bps,influence,creativity,threat,ict_index
0,69140,2,1,Shkodran,4.3,1,51,3.2,Mustafi,0.4,...,0,0,0,0,1,144,174.2,15.4,107.0,29.8
1,98745,2,1,Héctor,5.0,2,54,3.4,Bellerín,1.0,...,0,2,0,0,3,113,116.6,37.7,38.0,19.4
2,111457,2,1,Sead,0.3,3,52,2.1,Kolasinac,0.5,...,0,3,0,0,1,222,190.0,157.3,67.0,41.2
3,154043,2,0,Ainsley,0.0,4,45,2.6,Maitland-Niles,2.4,...,0,3,1,0,3,216,261.8,155.4,37.0,44.9
4,39476,2,0,Sokratis,0.0,5,48,2.9,Papastathopoulos,1.3,...,0,5,0,0,5,286,431.6,31.8,106.0,57.1


In [47]:
for n in range (len(df5)):
    name=df5.iloc[n,3]
    lname=df5.iloc[n,8]
    df5.iloc[n,3]= (unicodedata.normalize('NFKD', name).encode('ascii', 'ignore')).decode('utf-8')
    df5.iloc[n,8] = (unicodedata.normalize('NFKD', lname).encode('ascii', 'ignore')).decode('utf-8')

Above code removes accents within player names

In [48]:
df5.head()

Unnamed: 0,code,element_type,event_points,first_name,form,id,now_cost,points_per_game,second_name,selected_by_percent,...,penalties_missed,yellow_cards,red_cards,saves,bonus,bps,influence,creativity,threat,ict_index
0,69140,2,1,Shkodran,4.3,1,51,3.2,Mustafi,0.4,...,0,0,0,0,1,144,174.2,15.4,107.0,29.8
1,98745,2,1,Hector,5.0,2,54,3.4,Bellerin,1.0,...,0,2,0,0,3,113,116.6,37.7,38.0,19.4
2,111457,2,1,Sead,0.3,3,52,2.1,Kolasinac,0.5,...,0,3,0,0,1,222,190.0,157.3,67.0,41.2
3,154043,2,0,Ainsley,0.0,4,45,2.6,Maitland-Niles,2.4,...,0,3,1,0,3,216,261.8,155.4,37.0,44.9
4,39476,2,0,Sokratis,0.0,5,48,2.9,Papastathopoulos,1.3,...,0,5,0,0,5,286,431.6,31.8,106.0,57.1


In [49]:
df5.dtypes

code                     int64
element_type             int64
event_points             int64
first_name              object
form                   float64
id                       int64
now_cost                 int64
points_per_game        float64
second_name             object
selected_by_percent    float64
status                  object
team                     int64
team_code                int64
total_points             int64
transfers_in             int64
transfers_in_event       int64
transfers_out            int64
transfers_out_event      int64
value_form             float64
value_season           float64
web_name                object
minutes                  int64
goals_scored             int64
assists                  int64
clean_sheets             int64
goals_conceded           int64
own_goals                int64
penalties_saved          int64
penalties_missed         int64
yellow_cards             int64
red_cards                int64
saves                    int64
bonus   

In [50]:
df5.to_csv('C:/Users/nath1/Documents/CS/105/project/data_clean/elements_clean.csv')

Saves cleaned elements data