In [377]:
import numpy as np
import pandas as pd

In [378]:
from market_value_predictor.data import get_data_from_gcp

In [379]:
df = get_data_from_gcp()

In [380]:
from sklearn.model_selection import train_test_split

df_train, df_test = train_test_split(df, test_size=0.2)

# FUT DB API

## API call

In [381]:
import requests
from requests.structures import CaseInsensitiveDict

url = "https://futdb.app/api/players/search"

headers = CaseInsensitiveDict()
headers["accept"] = "application/json"
headers["X-AUTH-TOKEN"] = "6ee5d299-299c-480c-ba52-514607532d6a"
headers["Content-Type"] = "application/json"

data = """
{
  "name": "Lionel Messi"
}
"""


resp = requests.post(url, headers=headers, data=data)

In [382]:
len(resp.json()["items"])

3

In [383]:
player_dict = resp.json()["items"][0]
player_dict

{'id': 1337,
 'resource_id': 158023,
 'name': 'Lionel Messi',
 'age': 34,
 'resource_base_id': 158023,
 'fut_bin_id': 371,
 'fut_wiz_id': 69,
 'first_name': 'Lionel',
 'last_name': 'Messi',
 'common_name': 'Messi',
 'height': 170,
 'weight': 72,
 'birth_date': '1987-06-24',
 'league': 16,
 'nation': 52,
 'club': 73,
 'rarity': 1,
 'traits': [{'id': 1, 'name': 'Chip Shot (CPU AI)'},
  {'id': 2, 'name': 'Outside Foot Shot'},
  {'id': 4, 'name': 'Finesse Shot'},
  {'id': 12, 'name': 'Technical Dribbler (CPU AI)'},
  {'id': 14, 'name': 'Playmaker (CPU AI)'},
  {'id': 15, 'name': 'Long Shot Taker (CPU AI)'},
  {'id': 31, 'name': 'One Club Player'}],
 'specialities': [],
 'position': 'RW',
 'skill_moves': 4,
 'weak_foot': 4,
 'foot': 'Left',
 'attack_work_rate': 'Med',
 'defense_work_rate': 'Low',
 'total_stats': 462,
 'total_stats_in_game': 2261,
 'rating': 93,
 'rating_average': 77,
 'pace': 85,
 'shooting': 92,
 'passing': 91,
 'dribbling': 95,
 'defending': 34,
 'physicality': 65,
 'pace

In [384]:
len(player_dict)

42

In [385]:
import collections.abc

def flatten(d, parent_key='', sep='_'):
    items = []
    for k, v in d.items():
        new_key = parent_key + sep + k if parent_key else k
        if isinstance(v, collections.MutableMapping):
            items.extend(flatten(v, new_key, sep=sep).items())
        else:
            items.append((new_key, v))
    return dict(items)

In [386]:
flat_player_dict = flatten(player_dict)

In [387]:
flat_player_dict.pop("traits", "removed")

[{'id': 1, 'name': 'Chip Shot (CPU AI)'},
 {'id': 2, 'name': 'Outside Foot Shot'},
 {'id': 4, 'name': 'Finesse Shot'},
 {'id': 12, 'name': 'Technical Dribbler (CPU AI)'},
 {'id': 14, 'name': 'Playmaker (CPU AI)'},
 {'id': 15, 'name': 'Long Shot Taker (CPU AI)'},
 {'id': 31, 'name': 'One Club Player'}]

In [388]:
flat_player_dict.pop("specialities", "removed")

[]

In [389]:
flat_df = pd.DataFrame(flat_player_dict, index=[0])

In [390]:
list(flat_df.columns)

['id',
 'resource_id',
 'name',
 'age',
 'resource_base_id',
 'fut_bin_id',
 'fut_wiz_id',
 'first_name',
 'last_name',
 'common_name',
 'height',
 'weight',
 'birth_date',
 'league',
 'nation',
 'club',
 'rarity',
 'position',
 'skill_moves',
 'weak_foot',
 'foot',
 'attack_work_rate',
 'defense_work_rate',
 'total_stats',
 'total_stats_in_game',
 'rating',
 'rating_average',
 'pace',
 'shooting',
 'passing',
 'dribbling',
 'defending',
 'physicality',
 'pace_attributes_acceleration',
 'pace_attributes_sprint_speed',
 'shooting_attributes_positioning',
 'shooting_attributes_finishing',
 'shooting_attributes_shot_power',
 'shooting_attributes_long_shots',
 'shooting_attributes_volleys',
 'shooting_attributes_penalties',
 'passing_attributes_vision',
 'passing_attributes_crossing',
 'passing_attributes_free_kick_accuracy',
 'passing_attributes_short_passing',
 'passing_attributes_long_passing',
 'passing_attributes_curve',
 'dribbling_attributes_agility',
 'dribbling_attributes_balance'

In [391]:
flat_df = flat_df.rename(columns={
    'id': "_id",
     'resource_id': "_resource_id",
     'name': "_name",
     'age': "age",
     'resource_base_id': "_resource_base_id",
     'fut_bin_id': "_fut_bin_id",
     'fut_wiz_id': "_fut_wiz_id",
     'first_name': "_first_name",
     'last_name': "_last_name",
     'common_name': "_common_name",
     'height': "height_cm",
     'weight': "weight_kg",
     'birth_date': "_birth_date",
     'league': "league_name",
     'nation': "nationality",
     'club': "club_name",
     'rarity': "_rarity",
     'position': "team_position",
     'skill_moves': "skill_moves",
     'weak_foot': "weak_foot",
     'foot': "preferred_foot",
     'attack_work_rate': "_attack_work_rate",
     'defense_work_rate': "_defense_work_rate",
     'total_stats': "_total_stats",
     'total_stats_in_game': "_total_stats_in_game",
     'rating': "overall",
     'rating_average': "_rating_average",
     'pace': "pace",
     'shooting': "shooting",
     'passing': "passing",
     'dribbling': "dribbling",
     'defending': "defending",
     'physicality': "physic",
     'pace_attributes_acceleration': "movement_acceleration",
     'pace_attributes_sprint_speed': "movement_sprint_speed",
     'shooting_attributes_positioning': "mentality_positioning",
     'shooting_attributes_finishing': "attacking_finishing",
     'shooting_attributes_shot_power': "power_shot_power",
     'shooting_attributes_long_shots': "power_long_shots",
     'shooting_attributes_volleys': "attacking_volleys",
     'shooting_attributes_penalties': "mentality_penalties",
     'passing_attributes_vision': "mentality_vision",
     'passing_attributes_crossing': "attacking_crossing",
     'passing_attributes_free_kick_accuracy': "skill_fk_accuracy",
     'passing_attributes_short_passing': "attacking_short_passing",
     'passing_attributes_long_passing': "skill_long_passing",
     'passing_attributes_curve': "skill_curve",
     'dribbling_attributes_agility': "movement_agility",
     'dribbling_attributes_balance': "movement_balance",
     'dribbling_attributes_reactions': "movement_reactions",
     'dribbling_attributes_ball_control': "skill_ball_control",
     'dribbling_attributes_dribbling': "skill_dribbling",
     'dribbling_attributes_composure': "mentality_composure",
     'defending_attributes_interceptions': "mentality_interceptions",
     'defending_attributes_heading_accuracy': "attacking_heading_accuracy",
     'defending_attributes_standing_tackle': "defending_standing_tackle",
     'defending_attributes_sliding_tackle': "defending_sliding_tackle",
     'physicality_attributes_jumping': "power_jumping",
     'physicality_attributes_stamina': "power_stamina",
     'physicality_attributes_strength': "power_strength",
     'physicality_attributes_aggression': "mentality_aggression",
     'goalkeeper_attributes_diving': "gk_diving",
     'goalkeeper_attributes_handling': "gk_handling",
     'goalkeeper_attributes_kicking': "gk_kicking",
     'goalkeeper_attributes_positioning': "gk_positioning",
     'goalkeeper_attributes_reflexes': "gk_reflexes"
})

In [392]:
no_match = []

In [393]:
for elem in list(flat_df.columns):
    if elem[:1]=="_":
        no_match.append(elem)

In [394]:
no_match

['_id',
 '_resource_id',
 '_name',
 '_resource_base_id',
 '_fut_bin_id',
 '_fut_wiz_id',
 '_first_name',
 '_last_name',
 '_common_name',
 '_birth_date',
 '_rarity',
 '_attack_work_rate',
 '_defense_work_rate',
 '_total_stats',
 '_total_stats_in_game',
 '_rating_average']

In [395]:
flat_df = flat_df.drop(columns=no_match)

In [396]:
flat_df.nationality

0    52
Name: nationality, dtype: int64

### Adding back player_traits

In [397]:
traits = []
for i in range(len(player_dict["traits"])):
    traits.append(player_dict["traits"][i]["name"])

traits_joined = ", ".join(traits)

flat_df["player_traits"] = traits_joined

In [398]:
X = pd.read_csv("../../raw_data/master_df_with_webscraping.csv").drop(columns="fee_cleaned")

In [399]:
for elem in list(flat_df.columns):
    if elem not in list(X.columns):
        print(elem)

## Get league matching table

init_league_list = []

headers = CaseInsensitiveDict()
headers["accept"] = "application/json"
headers["X-AUTH-TOKEN"] = "6ee5d299-299c-480c-ba52-514607532d6a"

for i in [1, 2, 3]:
    init_league_list.append(requests.get("https://futdb.app/api/leagues?page=" + str(i), headers=headers).json()["items"])
    
leagues_matching_table = pd.DataFrame([item for sublist in init_league_list for item in sublist]).set_index("id")
leagues_matching_table.to_csv("../../raw_data/leagues_matching_from_API.csv")

## Get clubs matching table

init_club_list = []

headers = CaseInsensitiveDict()
headers["accept"] = "application/json"
headers["X-AUTH-TOKEN"] = "6ee5d299-299c-480c-ba52-514607532d6a"

for i in list(range(1, 35)):
    init_club_list.append(
        requests.get("https://futdb.app/api/clubs?page=" + str(i),
                     headers=headers).json()["items"])

clubs_matching_table = pd.DataFrame([
    item for sublist in init_club_list for item in sublist
]).set_index("id").drop(columns="league")
clubs_matching_table.to_csv("../../raw_data/clubs_matching_from_API.csv")

## Get nations matching table

init_nation_list = []

headers = CaseInsensitiveDict()
headers["accept"] = "application/json"
headers["X-AUTH-TOKEN"] = "6ee5d299-299c-480c-ba52-514607532d6a"

for i in list(range(1, 9)):
    init_nation_list.append(
        requests.get("https://futdb.app/api/nations?page=" + str(i),
                     headers=headers).json()["items"])

nations_matching_table = pd.DataFrame([
    item for sublist in init_nation_list for item in sublist
]).set_index("id")
nations_matching_table.to_csv("../../raw_data/nations_matching_from_API.csv")

# Adding strings to league, club, nation

In [400]:
flat_df.nationality = flat_df.nationality.map(lambda x: list(nations_matching_table.loc[x])[0])

In [401]:
flat_df.club_name = flat_df.club_name.map(lambda x: list(clubs_matching_table.loc[x])[0])

In [402]:
flat_df.league_name = flat_df.league_name.map(lambda x: list(leagues_matching_table.loc[x])[0])

In [403]:
list(flat_df.columns)

['age',
 'height_cm',
 'weight_kg',
 'league_name',
 'nationality',
 'club_name',
 'team_position',
 'skill_moves',
 'weak_foot',
 'preferred_foot',
 'overall',
 'pace',
 'shooting',
 'passing',
 'dribbling',
 'defending',
 'physic',
 'movement_acceleration',
 'movement_sprint_speed',
 'mentality_positioning',
 'attacking_finishing',
 'power_shot_power',
 'power_long_shots',
 'attacking_volleys',
 'mentality_penalties',
 'mentality_vision',
 'attacking_crossing',
 'skill_fk_accuracy',
 'attacking_short_passing',
 'skill_long_passing',
 'skill_curve',
 'movement_agility',
 'movement_balance',
 'movement_reactions',
 'skill_ball_control',
 'skill_dribbling',
 'mentality_composure',
 'mentality_interceptions',
 'attacking_heading_accuracy',
 'defending_standing_tackle',
 'defending_sliding_tackle',
 'power_jumping',
 'power_stamina',
 'power_strength',
 'mentality_aggression',
 'gk_diving',
 'gk_handling',
 'gk_kicking',
 'gk_positioning',
 'gk_reflexes',
 'player_traits']

In [404]:
usable_columns = [
    'age', 'height_cm', 'weight_kg', 'league_name', 'nationality', 'club_name',
    'team_position', 'skill_moves', 'weak_foot', 'preferred_foot', 'overall',
    'pace', 'shooting', 'passing', 'dribbling', 'defending', 'physic',
    'movement_acceleration', 'movement_sprint_speed', 'mentality_positioning',
    'attacking_finishing', 'power_shot_power', 'power_long_shots',
    'attacking_volleys', 'mentality_penalties', 'mentality_vision',
    'attacking_crossing', 'skill_fk_accuracy', 'attacking_short_passing',
    'skill_long_passing', 'skill_curve', 'movement_agility',
    'movement_balance', 'movement_reactions', 'skill_ball_control',
    'skill_dribbling', 'mentality_composure', 'mentality_interceptions',
    'attacking_heading_accuracy', 'defending_standing_tackle',
    'defending_sliding_tackle', 'power_jumping', 'power_stamina',
    'power_strength', 'mentality_aggression', 'goalkeeping_diving',
    'goalkeeping_handling', 'goalkeeping_kicking', 'goalkeeping_positioning',
    'goalkeeping_reflexes', 'player_traits'
]

In [427]:
flat_df.dtypes

age                             int64
height_cm                       int64
weight_kg                       int64
league_name                    object
nationality                    object
club_name                      object
team_position                  object
skill_moves                     int64
weak_foot                       int64
preferred_foot                 object
overall                         int64
pace                            int64
shooting                        int64
passing                         int64
dribbling                       int64
defending                       int64
physic                          int64
movement_acceleration           int64
movement_sprint_speed           int64
mentality_positioning           int64
attacking_finishing             int64
power_shot_power                int64
power_long_shots                int64
attacking_volleys               int64
mentality_penalties             int64
mentality_vision                int64
attacking_cr

In [423]:
flat_df = flat_df.fillna(value=np.nan)

In [426]:
for col in ["gk_diving", "gk_handling", "gk_kicking", "gk_positioning", "gk_reflexes"]:
    flat_df[col] = flat_df[col].astype(float)