In [39]:
import pandas as pd
import numpy as np
pd.set_option('display.max_columns', None)
import math
import warnings
warnings.filterwarnings('ignore')
from sklearn import linear_model
from sklearn import preprocessing
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

# Reviewing Org Data

In [None]:
data = pd.read_csv('fifa21_training.csv')
data.head()
#OVA column is our target

In [None]:
#data.dtypes

In [None]:
for c in data.select_dtypes(np.object).columns.tolist():         # know the unique values for each column
    print(c, len(data[c].unique()))

In [None]:
data.isna().sum()  

In [None]:
#code for checking categorical column summaries!

cat_cols= data.select_dtypes(np.object).columns    # categorical columns

print('Categorical Features:', len(cat_cols))
print('----------')
for c in cat_cols:
    print('Name: {}'.format(data[c].name))    # column name
    print('Type: {}'.format(data[c].dtype))   # column type
    print('Unique values: {}'.format(len(data[c].unique())))   # column unique values
    print(data[c].unique())
    print(((data[c].value_counts()/ sum(data[c].value_counts()))*100))   # percentage
    print('\n----------')

# Trimmed Data

In [40]:
data = pd.read_csv('fifa21_training_trimmed.csv')
data.head()
#OVA column is our target

Unnamed: 0,Name,Age,Nationality,Club,Default Position,Height,Weight,Value,Attacking,Crossing,Finishing,Heading Accuracy,Short Passing,Volleys,Skill,Dribbling,Curve,FK Accuracy,Long Passing,Ball Control,Movement,Acceleration,Sprint Speed,Agility,Reactions,Balance,Power,Shot Power,Jumping,Stamina,Strength,Long Shots,Mentality,Aggression,Interceptions,Positioning,Vision,Penalties,Composure,Defending,Marking,Standing Tackle,Sliding Tackle,Goalkeeping,GK Diving,GK Handling,GK Kicking,GK Positioning,GK Reflexes,Total Stats,Base Stats,W/F,SM,AW,DW,IR,PAC,SHO,PAS,DRI,DEF,PHY,OVA
0,A. Pasche,26,Switzerland,FC Lausanne-Sport,CM,"5'9""",161lbs,€525K,258,54,47,43,70,44.0,286,61,44.0,55,63,63,346,64,73,61.0,66,82.0,306,62,73.0,71,55,45,290,54,52.0,62.0,68.0,54,54.0,148,49,56,43.0,48,7,12,14,9,6,1682,357,4 ★,2★,High,Medium,1 ★,69,51,63,63,51,60,64
1,Alan Carvalho,30,China PR,Beijing Sinobo Guoan FC,ST,"6'0""",159lbs,€8.5M,365,66,79,76,68,76.0,375,83,78.0,72,63,79,404,83,83,88.0,75,75.0,372,74,81.0,75,74,68,313,54,33.0,78.0,72.0,76,70.0,77,35,20,22.0,55,11,7,14,7,16,1961,412,3 ★,4★,High,Low,2 ★,83,75,68,82,33,71,77
2,S. Giovinco,33,Italy,Al Hilal,CAM,"5'4""",134lbs,€9M,336,73,76,34,78,75.0,424,85,89.0,91,74,85,424,84,76,93.0,78,93.0,308,79,34.0,75,42,78,332,75,26.0,80.0,78.0,73,82.0,80,23,29,28.0,21,6,3,6,3,3,1925,404,4 ★,4★,High,Medium,2 ★,80,77,78,86,27,56,80
3,J. Evans,22,Wales,Swansea City,CDM,"5'10""",152lbs,€275K,242,44,42,58,62,36.0,259,54,41.0,46,57,61,282,54,59,59.0,55,55.0,277,57,60.0,64,58,38,257,61,57.0,31.0,54.0,54,48.0,168,55,58,55.0,42,8,9,6,7,12,1527,329,2 ★,2★,Medium,Medium,1 ★,57,44,54,57,57,60,59
4,Y. Demoncy,23,France,US Orléans Loiret Football,CDM,"5'11""",150lbs,€725K,249,49,37,61,68,34.0,280,64,44.0,45,61,66,324,66,66,61.0,62,69.0,280,61,34.0,81,61,43,294,66,60.0,55.0,64.0,49,58.0,185,58,61,66.0,52,8,9,15,5,15,1664,360,2 ★,3★,Low,Medium,1 ★,66,44,60,64,60,66,65


In [41]:
#cleaning functions for star ratings
def clean (x):
    if x in ['4 ★','4★']:
        x = 4
    elif x in ['5 ★','5★']:
        x = 5
    elif x in ['3 ★','3★']:
        x = 3
    elif x in ['2 ★','2★']:
        x = 2
    elif x in ['1 ★','1★']:
        x = 1
    return x

In [42]:
#apply functions
data['SM'] = list(map(clean, data['SM']))
data['IR'] = list(map(clean, data['IR']))
data['W/F'] = list(map(clean, data['W/F']))

In [43]:
data['Weight']

0        161lbs
1        159lbs
2        134lbs
3        152lbs
4        150lbs
          ...  
13695    143lbs
13696    176lbs
13697    146lbs
13698    176lbs
13699    150lbs
Name: Weight, Length: 13700, dtype: object

In [68]:
#stripping "lbs" string
data['Weight'].apply(lambda x: x.strip("lbs"))
data['Weight'] = [x.strip("lbs") for x in data['Weight']]

In [70]:
#force integers into Weight column
data['Weight'] = data['Weight'].astype('int')
data['Weight']

0        161
1        159
2        134
3        152
4        150
        ... 
13695    143
13696    176
13697    146
13698    176
13699    150
Name: Weight, Length: 13700, dtype: int64

In [71]:
#function for converting pounds to kilos
def convert (x):
    x = int(x)/2.2046
    return int(x)

In [72]:
#apply function to convert & review
data['Weight'] = list(map(convert, data['Weight']))
data['Weight'].unique()

array([ 73,  72,  60,  68,  69,  87,  78,  58,  77,  71,  74,  89,  76,
        79,  82,  55,  81,  88,  86,  63,  62,  64,  83,  66,  84,  59,
        67,  57,  93,  97,  91,  92,  96,  94, 102,  54,  53,  99, 107,
       101,  98, 110, 103, 106])

In [None]:
#If needed, standardize header names
data.columns = cols
data.columns = [e.lower().replace(' ','_')for e in data.columns]
data.head()