# Fifa 18 Value Prediction
## Data Modeling

In [1]:
# Imports
import os
import pandas as pd
import numpy as np
from IPython.display import display, Markdown as md
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Config
%matplotlib notebook
pd.options.display.max_columns = None

### Load the data
Loads *.csv* file into *pandas DataFrame*

In [2]:
df = pd.read_csv("data/processed/CompleteDataset_cleaned.csv", sep=";")

### Prepare the data

In [3]:
df_model = df.drop("Name", axis="columns")

exclude_GK = df_model["Preferred Position_GK"] == 0
df_model = df_model[exclude_GK]

### Train and Test Split
Splits the dataset into a *train set* (80%) and *test set* (20%)

In [4]:
target = "Value"
features = df_model.columns.values.tolist()
features.remove(target)

X = df_model.loc[:, features]
y = df_model.loc[:, [target]]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

display(md("*X_train* shape: {0} - *X_test* shape: {1}".format(X_train.shape, X_test.shape)))
display(md("*y_train* shape: {0} - *y_test* shape: {1}".format(y_train.shape, y_test.shape)))


*X_train* shape: (12761, 81) - *X_test* shape: (3191, 81)

*y_train* shape: (12761, 1) - *y_test* shape: (3191, 1)

### Linear Regression Model

In [5]:
lin_reg = LinearRegression()
lin_reg.fit(X_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
         normalize=False)

In [6]:
predictions = lin_reg.predict(X_test)

### Model Evaluation

In [7]:
# Root Mean Squared Error
lin_mse = mean_squared_error(predictions, y_test)
lin_rmse = np.sqrt(lin_mse)

display(md("**Root Mean Squared Error: €{0}**".format(int(lin_rmse))))

**Root Mean Squared Error: €2470056**

In [8]:
# Cross-Validation
from sklearn.model_selection import cross_val_score

lin_scores = cross_val_score(lin_reg, X_train, y_train, scoring="neg_mean_squared_error", cv=10)
lin_rmse_scores = np.sqrt(-lin_scores)

display(md("Scores: {0}".format(lin_rmse_scores)))
display(md("Mean: {0:.2f}".format(lin_rmse_scores.mean())))
display(md("Standard deviation: {0:.2f}".format(lin_rmse_scores.std())))

Scores: [2670552.59169354 2505697.50488006 2428020.66506239 2764196.83287113
 2068694.68587324 3192660.56906027 2463300.93038895 2546529.93772246
 2610852.22458909 2332197.58796222]

Mean: 2558270.35

Standard deviation: 279268.32

### Export the results

In [9]:
predictions_lr = pd.Series(data=predictions.flatten().tolist(), index=X_test.index, name="Prediction LR")
results_df = pd.concat([X_test, y_test, predictions_lr], axis=1, ignore_index=False)

display(results_df.head())

results_df.to_csv("data/output/predictions.csv", sep=";", index=False)

Unnamed: 0,Age,Overall,Potential,Wage,Special,Acceleration,Aggression,Agility,Balance,Ball control,Composure,Crossing,Curve,Dribbling,Finishing,Free kick accuracy,GK diving,GK handling,GK kicking,GK positioning,GK reflexes,Heading accuracy,Interceptions,Jumping,Long passing,Long shots,Marking,Penalties,Positioning,Reactions,Short passing,Shot power,Sliding tackle,Sprint speed,Stamina,Standing tackle,Strength,Vision,Volleys,CAM,CB,CDM,CF,CM,ID,LAM,LB,LCB,LCM,LDM,LF,LM,LS,LW,LWB,RAM,RB,RCB,RCM,RDM,RF,RM,RS,RW,RWB,ST,Preferred Position_CAM,Preferred Position_CB,Preferred Position_CDM,Preferred Position_CF,Preferred Position_CM,Preferred Position_GK,Preferred Position_LB,Preferred Position_LM,Preferred Position_LW,Preferred Position_LWB,Preferred Position_RB,Preferred Position_RM,Preferred Position_RW,Preferred Position_RWB,Preferred Position_ST,Value,Prediction LR
1486,0.322581,0.625,0.625,0.024779,0.769034,0.823529,0.435294,0.792683,0.729412,0.804598,0.758242,0.732558,0.72093,0.789474,0.752688,0.764045,0.066667,0.122222,0.159574,0.111111,0.089888,0.733333,0.465909,0.817073,0.767442,0.764045,0.579545,0.735632,0.763441,0.647059,0.792683,0.824176,0.275862,0.847059,0.518072,0.340909,0.546512,0.761905,0.701149,0.738462,0.451613,0.542373,0.738462,0.701754,0.87523,0.738462,0.5,0.451613,0.701754,0.542373,0.738462,0.741935,0.704918,0.753846,0.54717,0.738462,0.5,0.451613,0.701754,0.542373,0.738462,0.741935,0.704918,0.753846,0.54717,0.704918,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,8500000.0,4685312.0
14611,0.16129,0.291667,0.458333,0.00354,0.516955,0.870588,0.552941,0.695122,0.517647,0.563218,0.527473,0.581395,0.546512,0.610526,0.387097,0.303371,0.066667,0.088889,0.12766,0.144444,0.067416,0.555556,0.579545,0.585366,0.453488,0.438202,0.568182,0.310345,0.473118,0.264706,0.560976,0.43956,0.574713,0.870588,0.698795,0.613636,0.581395,0.369048,0.16092,0.4,0.516129,0.491525,0.4,0.385965,0.935349,0.4,0.537037,0.516129,0.385965,0.491525,0.4,0.467742,0.327869,0.461538,0.528302,0.4,0.537037,0.516129,0.385965,0.491525,0.4,0.467742,0.327869,0.461538,0.528302,0.327869,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,260000.0,-1582336.0
5858,0.129032,0.479167,0.708333,0.028319,0.614203,0.788235,0.529412,0.890244,0.8,0.735632,0.736264,0.627907,0.767442,0.736842,0.677419,0.764045,0.111111,0.144444,0.138298,0.122222,0.146067,0.288889,0.284091,0.52439,0.662791,0.707865,0.136364,0.632184,0.634409,0.5,0.670732,0.626374,0.241379,0.741176,0.554217,0.272727,0.209302,0.678571,0.643678,0.630769,0.177419,0.355932,0.615385,0.561404,0.914918,0.630769,0.296296,0.177419,0.561404,0.355932,0.615385,0.629032,0.491803,0.646154,0.358491,0.630769,0.296296,0.177419,0.561404,0.355932,0.615385,0.629032,0.491803,0.646154,0.358491,0.491803,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1800000.0,5759232.0
11677,0.225806,0.375,0.479167,0.00708,0.661548,0.776471,0.658824,0.682927,0.670588,0.632184,0.615385,0.534884,0.453488,0.652632,0.419355,0.41573,0.1,0.111111,0.085106,0.088889,0.067416,0.566667,0.670455,0.731707,0.651163,0.662921,0.613636,0.436782,0.623656,0.485294,0.658537,0.769231,0.666667,0.741176,0.771084,0.670455,0.616279,0.619048,0.551724,0.538462,0.596774,0.644068,0.538462,0.578947,0.990962,0.538462,0.611111,0.596774,0.578947,0.644068,0.538462,0.564516,0.47541,0.538462,0.622642,0.538462,0.611111,0.596774,0.578947,0.644068,0.538462,0.564516,0.47541,0.538462,0.622642,0.47541,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,600000.0,587264.0
13011,0.322581,0.333333,0.395833,0.00177,0.637876,0.729412,0.741176,0.682927,0.588235,0.632184,0.626374,0.616279,0.616279,0.515789,0.569892,0.662921,0.166667,0.055556,0.085106,0.133333,0.078652,0.544444,0.568182,0.45122,0.569767,0.573034,0.409091,0.45977,0.698925,0.5,0.658537,0.681319,0.551724,0.741176,0.939759,0.568182,0.546512,0.547619,0.54023,0.523077,0.483871,0.559322,0.523077,0.54386,0.784464,0.523077,0.555556,0.483871,0.54386,0.559322,0.523077,0.548387,0.491803,0.538462,0.566038,0.523077,0.555556,0.483871,0.54386,0.559322,0.523077,0.548387,0.491803,0.538462,0.566038,0.491803,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,375000.0,-438528.0
