### Import data

In [232]:
import pandas as pd
import numpy as np

df = pd.read_csv('data.csv')

### Drop unnecessary columns and rows having missing / null values

In [233]:
columns_to_drop = [0, 1, 3, 4, 5, 8, 9, 18, 19, 20, 21, 22, 23, 24, 25, 26, 87]
df.drop(labels=df.columns[columns_to_drop], axis=1, inplace=True)
df.dropna(axis=0, inplace=True)
df.reset_index(drop=True, inplace=True)

### Remove currency symbols in Wage and Value columns, turn them into numeric type, and change the column names

In [234]:
def clean_value(v):
    v = (v[1:-1].strip())
    if v == '':
        return 0
    return float(v)
df.Wage = df.Wage.apply(clean_value)
df.Value = df.Value.apply(clean_value)
df.rename(columns={'Wage': 'Wage_in_K', 'Value': 'Value_in M'}, inplace=True)

### Remove plus signs in columns[11:37], add the two numbers, and covert the columns to numeric type

In [235]:
def clean_value(v):
    v = v.split('+')
    v = list(map(int, v))
    return sum(v)
for i in df.columns[11:37]:
    df[i] = df[i].apply(clean_value)

### Retrieve categorical columns

In [236]:
num_cols = df._get_numeric_data().columns
cat_cols = list(set(df.columns) - set(num_cols))

### Create dictionary of ordinal to integer mapping to Work Rate column

In [237]:
from sklearn.preprocessing import LabelEncoder

work_rate_dict = {'Low/ Low':2, 
            'Low/ Medium':3, 
            'Low/ High':4, 
            'Medium/ Low':3, 
            'Medium/ Medium':4, 
            'Medium/ High':5,
            'High/ Low':4,
            'High/ Medium':5,
            'High/ High':6}
df['Work Rate'] = df['Work Rate'].map(work_rate_dict)

### Create two dummies columns for Preferred Foot column

In [239]:
foot_dummies = pd.get_dummies(df['Preferred Foot'], prefix='Preferred Foot')
df = pd.concat([df, foot_dummies], axis=1)
df.drop(labels='Preferred Foot', axis=1, inplace=True)

In [240]:
df[:5]

Unnamed: 0,Age,Overall,Potential,Value_in M,Wage_in_K,Special,International Reputation,Weak Foot,Skill Moves,Work Rate,...,Marking,StandingTackle,SlidingTackle,GKDiving,GKHandling,GKKicking,GKPositioning,GKReflexes,Preferred Foot_Left,Preferred Foot_Right
0,31,94,94,110.5,565.0,2202,5.0,4.0,4.0,4,...,33.0,28.0,26.0,6.0,11.0,15.0,14.0,8.0,1,0
1,33,94,94,77.0,405.0,2228,5.0,4.0,5.0,4,...,28.0,31.0,23.0,7.0,11.0,15.0,14.0,11.0,0,1
2,26,92,93,118.5,290.0,2143,5.0,5.0,5.0,5,...,27.0,24.0,33.0,9.0,9.0,15.0,15.0,11.0,0,1
3,27,91,92,102.0,355.0,2281,4.0,5.0,4.0,6,...,68.0,58.0,51.0,15.0,13.0,5.0,10.0,13.0,0,1
4,27,91,91,93.0,340.0,2142,4.0,4.0,4.0,5,...,34.0,27.0,22.0,11.0,12.0,6.0,8.0,8.0,0,1
