In [9]:
# Purpose:
# To house common functions

def create_fifa_players_dataframe(dataset_location):
    import pandas

    return pandas.read_csv(dataset_location, encoding='UTF-8')

def drop_columns_from_dataframe(data, columns):
    data_copy = data.copy()

    for column in data.columns:
        if column in columns:
            del data[column]

    return data

def create_dataset_by_pos(data, position, ex_columns, show):
    # import pandas as pd
    if(position == 'GK') :
        filtered_data = data.loc[data['Position'] == position]
    else:  filtered_data = data.loc[data['Position'] != position]
    # display(goal_keepers_data)
    filtered_data = drop_columns_from_dataframe(filtered_data, ex_columns)

    if show:
        print(filtered_data)
    
    return filtered_data

def create_schema_off_dataset(data, file_name):
    import json
    import pandas
    schema = pandas.io.json.build_table_schema(data, index=False, primary_key=None, version=True)
    # print(schema)
    with open(file_name, 'w') as file:
        file.write(json.dumps(schema))
    
def encode_field(data, column):
    from sklearn.preprocessing import LabelEncoder
    le = LabelEncoder()
    data_copy = data.copy()
    data_copy[column] = le.fit_transform(data[column])
    # display(data_copy)
    return data_copy

cols_to_drop_for_keepers = ['Photo', 'Flag',
    'International Reputation', 'Weak Foot', 'Skill Moves', 'Work Rate'
     'Body Type', 'Real Face',  'Jersey Number',
    'Joined', 'Loaned From', 'Contract Valid Until',
     'Crossing', 'Finishing', 'HeadingAccuracy', 'ShortPassing', 'Volleys',
       'Dribbling', 'Curve', 'FKAccuracy', 'LongPassing', 'BallControl',
       'Acceleration', 'SprintSpeed',
     'LS', 'ST', 'RS', 'LW', 'LF', 'CF', 'RF', 'RW',
     'LAM', 'CAM', 'RAM', 'LM', 'LCM', 'CM', 'RCM',
     'RM', 'LWB', 'LDM', 'CDM', 'RDM', 'RWB', 'LB',
     'LCB', 'CB', 'RCB', 'RB', 
     'Club Logo', 'Release Clause', 'Special', 'Preferred Foot',
     'Marking', 'StandingTackle', 'SlidingTackle']

cols_to_drop_for_players = ['Photo', 'Flag',
    'International Reputation', 'Weak Foot', 'Jersey Number', 'Work Rate'
    'Joined', 'Loaned From', 'Contract Valid Until', 'Body Type', 'Real Face', 'Skill Moves',
     'LS', 'ST', 'RS', 'LW', 'LF', 'CF', 'RF', 'RW',
     'LAM', 'CAM', 'RAM', 'LM', 'LCM', 'CM', 'RCM',
     'RM', 'LWB', 'LDM', 'CDM', 'RDM', 'RWB', 'LB',
     'LCB', 'CB', 'RCB', 'RB', 
     'Photo', 'Flag', 'Club Logo', 'Release Clause', 'Special', 'Preferred Foot', 'GKDiving', 'GKHandling', 'GKKicking', 'GKPositioning', 'GKReflexes']

def create_fifa_players_dataset(data_in, file_loc, schema_file, csv_file, show=False):
    from pathlib import Path
    
    # check and drop nulls
    count_na = data_in.isna().sum().sum()
    # print(count_na)
    if count_na > 0:
        data_in = data_in.dropna()

    # https://stackoverflow.com/questions/48366506/calculate-new-column-as-the-mean-of-other-columns-pandas/48366525
    col = []
    col = data_in.loc[:, 'Overall':'Potential']

    data_in['Rating'] = (col.mean(axis=1))

    data_in = data_in.infer_objects()
    # store values for creating sql schema
    data_in = drop_columns_from_dataframe(data_in, ['Overall', 'Potential'])
    # file_name = schema_file
    create_schema_off_dataset(data_in, Path(file_loc, schema_file))
    # file_name = csv_file
    data_in.to_csv(Path(file_loc, csv_file), index = False, line_terminator='\n')

    if show:
        print(col.mean(axis=1))
        print(data_in.info())
        print(data_in.describe(exclude =[object, int]))

    return data_in

In [10]:
def build_goal_keepers_stats_dataframe(goal_keepers_df):
    goalkeepers_columns = [
    'rating'
    , 'movement_agility', 'movement_reactions', 'movement_balance'  #, 'movement_acceleration', 'movement_sprint_speed'
    , 'power_shot_power', 'power_jumping', 'power_stamina', 'power_strength', 'power_long_shots'
    , 'mental_aggression', 'mental_interceptions', 'mental_vision', 'mental_penalties', 'mental_composure' #, 'mental_reflexes'
    , 'defending_diving', 'defending_handling', 'defending_positioning'
    ]

    import pandas
    goalkeepers_stats_df = pandas.DataFrame(columns=[goalkeepers_columns])

    goalkeepers_stats_df['rating'] = goal_keepers_df['Rating']
    # goalkeepers_stats_df['movement_acceleration'] = goal_keepers_df['']
    # goalkeepers_stats_df['movement_sprint_speed'] = goal_keepers_df['']
    goalkeepers_stats_df['movement_agility'] = goal_keepers_df['Agility']
    goalkeepers_stats_df['movement_reactions'] = goal_keepers_df['Reactions']
    goalkeepers_stats_df['movement_balance'] = goal_keepers_df['Balance']
    goalkeepers_stats_df['power_shot_power'] = goal_keepers_df['ShotPower']
    goalkeepers_stats_df['power_jumping'] = goal_keepers_df['Jumping']
    goalkeepers_stats_df['power_stamina'] = goal_keepers_df['Stamina']
    goalkeepers_stats_df['power_strength'] = goal_keepers_df['Strength']
    goalkeepers_stats_df['power_long_shots'] = goal_keepers_df['LongShots']
    goalkeepers_stats_df['mental_aggression'] = goal_keepers_df['Aggression']
    goalkeepers_stats_df['mental_interceptions'] = goal_keepers_df['Interceptions']
    # goalkeepers_stats_df['mental_positioning'] = goal_keepers_df['']
    goalkeepers_stats_df['mental_vision'] = goal_keepers_df['Vision']
    goalkeepers_stats_df['mental_penalties'] = goal_keepers_df['Penalties']
    goalkeepers_stats_df['mental_composure'] = goal_keepers_df['Composure']
    goalkeepers_stats_df['defending_diving'] = goal_keepers_df['GKDiving']
    goalkeepers_stats_df['defending_handling'] = goal_keepers_df['GKHandling']
    goalkeepers_stats_df['defending_positioning'] = goal_keepers_df['GKPositioning']
    goalkeepers_stats_df['defending_reflexes'] = goal_keepers_df['GKReflexes']

    return goalkeepers_stats_df

In [1]:
#player stat features
def build_players_stats_dataframe(players_df):
    players_columns = [
    'rating'
    , 'attacking_crossing', 'attacking_finishing', 'attacking_heading_accuracy', 'attacking_short_passing', 'attacking_volleys'
    , 'skill_dribbling', 'skill_curve', 'skill_fk_accuracy', 'skill_long_passing', 'skill_ball_control'
    , 'movement_acceleration', 'movement_sprint_speed', 'movement_agility', 'movement_reactions', 'movement_balance'
    , 'power_shot_power', 'power_jumping', 'power_stamina', 'power_strength', 'power_long_shots'
    , 'mental_aggression', 'mental_interceptions', 'mental_positioning', 'mental_vision', 'mental_penalties', 'mental_composure'
    , 'defending_marking', 'defending_standing_tackle', 'defending_sliding_tackle']

    import pandas
    players_stats_df = pandas.DataFrame(columns=[players_columns])

    players_stats_df['rating'] = players_df['Rating']
    players_stats_df['attacking_crossing'] = players_df['Crossing']
    players_stats_df['attacking_finishing'] = players_df['Finishing']
    players_stats_df['attacking_heading_accuracy'] = players_df['HeadingAccuracy']
    players_stats_df['attacking_short_passing'] = players_df['ShortPassing']
    players_stats_df['attacking_volleys'] = players_df['Volleys']
    players_stats_df['skill_dribbling'] = players_df['Dribbling']
    players_stats_df['skill_curve'] = players_df['Curve']
    players_stats_df['skill_fk_accuracy'] = players_df['FKAccuracy']
    players_stats_df['skill_long_passing'] = players_df['LongPassing']
    players_stats_df['skill_ball_control'] = players_df['BallControl']
    players_stats_df['movement_acceleration'] = players_df['Acceleration']
    players_stats_df['movement_sprint_speed'] = players_df['SprintSpeed']
    players_stats_df['movement_agility'] = players_df['Agility']
    players_stats_df['movement_reactions'] = players_df['Reactions']
    players_stats_df['movement_balance'] = players_df['Balance']
    players_stats_df['power_shot_power'] = players_df['ShotPower']
    players_stats_df['power_jumping'] = players_df['Jumping']
    players_stats_df['power_stamina'] = players_df['Stamina']
    players_stats_df['power_strength'] = players_df['Strength']
    players_stats_df['power_long_shots'] = players_df['LongShots']
    players_stats_df['mental_aggression'] = players_df['Aggression']
    players_stats_df['mental_interceptions'] = players_df['Interceptions']
    players_stats_df['mental_positioning'] = players_df['Positioning']
    players_stats_df['mental_vision'] = players_df['Vision']
    players_stats_df['mental_penalties'] = players_df['Penalties']
    players_stats_df['mental_composure'] = players_df['Composure']
    players_stats_df['defending_marking'] = players_df['Marking']
    players_stats_df['defending_standing_tackle'] = players_df['StandingTackle']
    players_stats_df['defending_sliding_tackle'] = players_df['SlidingTackle']

    return players_stats_df

In [2]:
def adjust_rating(x):
    if  x > 85:
        return "Excellent"
    elif 70 < x <= 85:
        return "Good"
    return "Decent"