# FIFA 21

## ETL

In [None]:
import pandas as pd

df_data_1 = pd.read_csv("../input/fifa-21/players_fifa21.csv")
df_data_1.head(10)


In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import OrdinalEncoder
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.decomposition import PCA
from sklearn.decomposition import KernelPCA
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.model_selection import cross_val_predict
from sklearn.feature_selection import RFECV

### DATA EXPLORATION

In [None]:
df_fifa = df_data_1

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt

In [None]:
plt.figure(figsize = (12,8))
plt.scatter(x = df_fifa["Potential"], y = df_fifa["Overall"])
m, b = np.polyfit(df_fifa["Potential"], df_fifa["Overall"], 1)
plt.plot(df_fifa["Potential"], m*df_fifa["Potential"]+b, "r-")
plt.xlabel("Potential")
plt.ylabel("Overall")
plt.title("Overall Rating vs Potential Rating", fontsize = 16)
plt.show()

In [None]:
plt.figure(figsize = (12,8))
plt.hist(df_fifa["Age"])
plt.xlabel("Age")
plt.ylabel("Frequency")
plt.title("Histogram of Age")

In [None]:
df_fifa_top = df_fifa.head(10)

In [None]:
plt.figure(figsize = (12,8))
plt.barh("Name", "Overall", data = df_fifa_top, color = "blue")
plt.xlim((85,95))
plt.xticks(rotation = 90)
plt.gca().invert_yaxis()
plt.ylabel("Player")
plt.xlabel("Overall Ratings")
plt.title("Best Players on FIFA 21", fontsize = 16);

## MODELING

In [None]:
df_fifa_new = df_fifa[['Potential','Name','Growth', 'WeakFoot', 'SkillMoves', 'AttackingWorkRate','DefensiveWorkRate','ReleaseClause', 'ValueEUR', 'WageEUR', 'Age']]

In [None]:
import seaborn as sns;sns.set(style="ticks")

In [None]:
plt.figure(figsize = (20,8))
cor = df_fifa_new.corr()
sns.heatmap(cor, annot = True, cmap = "viridis")
plt.show();

In [None]:
df_fifa_new.shape

In [None]:
df_fifa_new.columns

In [None]:
df_fifa_x = df_fifa_new.iloc[:, 1:]
df_fifa_y = df_fifa_new.iloc[:, :1]
x_train, x_test, y_train, y_test = train_test_split(df_fifa_x, df_fifa_y, random_state = 10, train_size = 0.8)

In [None]:
x_train1 = x_train.iloc[:,1:]
x_test1 = x_test.iloc[:,1:]

In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.feature_selection import SelectFromModel

In [None]:
### Pre-processing
ord_enc = OrdinalEncoder()
ord_enc.fit([['Low', 1], ['Medium', 2], ['High', 3]])
one_hot = OneHotEncoder()
df_fifa_new_list = df_fifa_new[['AttackingWorkRate', 'DefensiveWorkRate']]
ord_list = list(df_fifa_new_list)
column_pipeline = ColumnTransformer([("ord", ord_enc, ord_list),("std",StandardScaler(), ['Growth', 'WeakFoot','SkillMoves','ReleaseClause', 'ValueEUR', 'WageEUR', 'Age'])])
lin_reg = LinearRegression()

In [None]:
full_pipeline = Pipeline([("column", column_pipeline), ("feature selection", RFECV(Lasso(alpha = 0.1), cv = 5)), ("reg", RandomForestRegressor())])

In [None]:
param_grid = [{'reg__n_estimators': np.arange(1,50), 'reg__max_features':np.arange(1,10)},]
grid_search = GridSearchCV(full_pipeline, param_grid, cv = 5, scoring = "r2")

In [None]:
y_train1 = y_train.values.ravel()
y_train1

In [None]:
grid_search.fit(x_train1, y_train1)

In [None]:
grid_search.best_estimator_

In [None]:
grid_search.best_score_

In [None]:
final_model = grid_search.best_estimator_

In [None]:
train_pipeline = Pipeline([("column", column_pipeline), ("feature selection", RFECV(Lasso(alpha = 0.1), cv = 5)), ("reg", RandomForestRegressor(max_features=7, n_estimators = 22))])
from sklearn.pipeline import make_pipeline

In [None]:
y_pred_cv = cross_val_predict(train_pipeline, x_train1, y_train1, cv = 5)

In [None]:
x_train_new = x_train.copy()
x_train_new["Potential"] = y_train
x_train_new["Predicted"] = y_pred_cv
x_train_final = x_train_new.reset_index()
x_train_final = x_train_final.drop("index", axis = 1)
x_train_final.head(20)

In [None]:
train_pipeline.fit(x_train1, y_train)

In [None]:
y_pred = train_pipeline.predict(x_test)

In [None]:
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
r2_score(y_test, y_pred)

In [None]:
import math
math.sqrt(mean_squared_error(y_test, y_pred))

In [None]:
x_test_new = x_test.copy()
x_test_new["Potential"] = y_test
x_test_new["Predicted"] = y_pred
x_test_final = x_test_new.reset_index()
x_test_final = x_test_final.drop("index", axis = 1)
x_test_final.head(20)