In [1]:
import pandas as pd

data = pd.read_csv('./FIFA2.csv')
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18159 entries, 0 to 18158
Data columns (total 83 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Name                      18159 non-null  object 
 1   Age                       18159 non-null  int64  
 2   Nationality               18159 non-null  object 
 3   Overall                   18159 non-null  int64  
 4   Potential                 18159 non-null  int64  
 5   Club                      17918 non-null  object 
 6   Value                     18159 non-null  int64  
 7   Wage                      18159 non-null  int64  
 8   Special                   18159 non-null  int64  
 9   Preferred Foot            18159 non-null  object 
 10  International Reputation  18159 non-null  float64
 11  Weak Foot                 18159 non-null  float64
 12  Skill Moves               18159 non-null  float64
 13  Work Rate                 18159 non-null  object 
 14  Body T

In [2]:
map_position = {'FW':0, 'MD':1, 'DF':2, 'GK':3}
col = ['Position simplified']
data[col] = data[col].applymap(map_position.get)
data['Position simplified']

0        0
1        0
2        0
3        3
4        1
        ..
18154    1
18155    0
18156    0
18157    0
18158    1
Name: Position simplified, Length: 18159, dtype: int64

In [3]:
X = data.loc[:, 'Crossing':'GKReflexes']
y = data.loc[:, 'Position simplified']
xy = pd.concat([X, y], axis=1)

In [4]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0, stratify=y)

In [5]:
from sklearn.metrics import classification_report, f1_score, confusion_matrix
def get_model_train_eval(model, X_train=None, X_test=None, y_train=None, y_test=None):
    model.fit(X_train, y_train)
    print('{} Test Accuracy: {}%'.format(model, round(model.score(X_test, y_test)*100, 2)))


    pred_model = model.predict(X_test)
    print('{} report:{}\n'.format(model.__class__.__name__, classification_report(y_test, pred_model)))

In [12]:
import multiprocessing
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from lightgbm import LGBMClassifier

pipe = make_pipeline(StandardScaler(), LGBMClassifier())

param_grid = [
    {'lgbmclassifier__n_estimators': [1000],
    'lgbmclassifier__learning_rate': [0.01, 0.02, 0.03, 0.04, 0.05], 
    'lgbmclassifier__max_depth': [2, 12, 24],
    'lgbmclassifier__min_child_weight': [30, 60, 90],
    'lgbmclassifier__num_iterations': [50, 100, 150, 200],
    'lgbmclassifier__num_leaves': [300, 400, 500, 600] }
]

grid_model_lgbm = GridSearchCV(
    estimator=pipe,
    param_grid=param_grid,
    n_jobs=multiprocessing.cpu_count(),
    cv=3,
    verbose=True
)

grid_model_lgbm.fit(X_train, y_train)

Fitting 3 folds for each of 720 candidates, totalling 2160 fits
[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:   25.0s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:  2.0min
[Parallel(n_jobs=8)]: Done 434 tasks      | elapsed:  6.6min
[Parallel(n_jobs=8)]: Done 784 tasks      | elapsed: 11.1min
[Parallel(n_jobs=8)]: Done 1234 tasks      | elapsed: 16.2min
[Parallel(n_jobs=8)]: Done 1784 tasks      | elapsed: 21.8min
[Parallel(n_jobs=8)]: Done 2160 out of 2160 | elapsed: 25.7min finished


GridSearchCV(cv=3,
             estimator=Pipeline(steps=[('standardscaler', StandardScaler()),
                                       ('lgbmclassifier', LGBMClassifier())]),
             n_jobs=8,
             param_grid=[{'lgbmclassifier__learning_rate': [0.01, 0.02, 0.03,
                                                            0.04, 0.05],
                          'lgbmclassifier__max_depth': [2, 12, 24],
                          'lgbmclassifier__min_child_weight': [30, 60, 90],
                          'lgbmclassifier__n_estimators': [1000],
                          'lgbmclassifier__num_iterations': [50, 100, 150, 200],
                          'lgbmclassifier__num_leaves': [300, 400, 500, 600]}],
             verbose=True)

In [13]:
grid_model_lgbm.best_estimator_

Pipeline(steps=[('standardscaler', StandardScaler()),
                ('lgbmclassifier',
                 LGBMClassifier(learning_rate=0.04, max_depth=12,
                                min_child_weight=30, n_estimators=1000,
                                num_iterations=150, num_leaves=300))])

In [14]:
get_model_train_eval(grid_model_lgbm, X_train, X_test, y_train, y_test)

Fitting 3 folds for each of 720 candidates, totalling 2160 fits
[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:   11.2s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:  1.7min
[Parallel(n_jobs=8)]: Done 434 tasks      | elapsed:  5.8min
[Parallel(n_jobs=8)]: Done 784 tasks      | elapsed:  9.9min
[Parallel(n_jobs=8)]: Done 1234 tasks      | elapsed: 14.8min
[Parallel(n_jobs=8)]: Done 1784 tasks      | elapsed: 20.1min
[Parallel(n_jobs=8)]: Done 2160 out of 2160 | elapsed: 24.0min finished
GridSearchCV(cv=3,
             estimator=Pipeline(steps=[('standardscaler', StandardScaler()),
                                       ('lgbmclassifier', LGBMClassifier())]),
             n_jobs=8,
             param_grid=[{'lgbmclassifier__learning_rate': [0.01, 0.02, 0.03,
                                                            0.04, 0.05],
                          'lgbmclassifier__max_depth': [2, 12, 24],
    

In [15]:
import multiprocessing
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from lightgbm import LGBMClassifier

pipe = make_pipeline(StandardScaler(), LGBMClassifier())

param_grid = [
    {'lgbmclassifier__n_estimators': [1000],
    'lgbmclassifier__learning_rate': [0.03, 0.04, 0.05], 
    'lgbmclassifier__max_depth': [7, 12, 17],
    'lgbmclassifier__min_child_weight': [10, 30, 50],
    'lgbmclassifier__num_iterations': [125, 150, 175],
    'lgbmclassifier__num_leaves': [100, 200, 300, 400] }
]

grid_model_lgbm = GridSearchCV(
    estimator=pipe,
    param_grid=param_grid,
    n_jobs=multiprocessing.cpu_count(),
    cv=3,
    verbose=True
)

grid_model_lgbm.fit(X_train, y_train)

Fitting 3 folds for each of 324 candidates, totalling 972 fits
[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:   52.0s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:  3.1min
[Parallel(n_jobs=8)]: Done 434 tasks      | elapsed:  7.5min
[Parallel(n_jobs=8)]: Done 784 tasks      | elapsed: 13.3min
[Parallel(n_jobs=8)]: Done 972 out of 972 | elapsed: 16.1min finished


GridSearchCV(cv=3,
             estimator=Pipeline(steps=[('standardscaler', StandardScaler()),
                                       ('lgbmclassifier', LGBMClassifier())]),
             n_jobs=8,
             param_grid=[{'lgbmclassifier__learning_rate': [0.03, 0.04, 0.05],
                          'lgbmclassifier__max_depth': [7, 12, 17],
                          'lgbmclassifier__min_child_weight': [10, 30, 50],
                          'lgbmclassifier__n_estimators': [1000],
                          'lgbmclassifier__num_iterations': [125, 150, 175],
                          'lgbmclassifier__num_leaves': [100, 200, 300, 400]}],
             verbose=True)

In [18]:
grid_model_lgbm.best_estimator_

Pipeline(steps=[('standardscaler', StandardScaler()),
                ('lgbmclassifier',
                 LGBMClassifier(learning_rate=0.04, max_depth=12,
                                min_child_weight=30, n_estimators=1000,
                                num_iterations=150, num_leaves=100))])

In [19]:
get_model_train_eval(grid_model_lgbm, X_train, X_test, y_train, y_test)

Fitting 3 folds for each of 324 candidates, totalling 972 fits
[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:   37.0s
[Parallel(n_jobs=8)]: Done 184 tasks      | elapsed:  3.2min
[Parallel(n_jobs=8)]: Done 434 tasks      | elapsed:  7.4min
[Parallel(n_jobs=8)]: Done 784 tasks      | elapsed: 12.9min
[Parallel(n_jobs=8)]: Done 972 out of 972 | elapsed: 16.0min finished
GridSearchCV(cv=3,
             estimator=Pipeline(steps=[('standardscaler', StandardScaler()),
                                       ('lgbmclassifier', LGBMClassifier())]),
             n_jobs=8,
             param_grid=[{'lgbmclassifier__learning_rate': [0.03, 0.04, 0.05],
                          'lgbmclassifier__max_depth': [7, 12, 17],
                          'lgbmclassifier__min_child_weight': [10, 30, 50],
                          'lgbmclassifier__n_estimators': [1000],
                          'lgbmclassifier__num_iterations': [

In [20]:
pipe = make_pipeline(StandardScaler(), LGBMClassifier())

param_grid = [
    {'lgbmclassifier__n_estimators': [1000],
    'lgbmclassifier__learning_rate': [0.04], 
    'lgbmclassifier__max_depth': [12],
    'lgbmclassifier__min_child_weight': [30],
    'lgbmclassifier__num_iterations': [150],
    'lgbmclassifier__num_leaves': [30, 60, 90, 100, 130, 160, 190] }
]

grid_model_lgbm = GridSearchCV(
    estimator=pipe,
    param_grid=param_grid,
    n_jobs=multiprocessing.cpu_count(),
    cv=3,
    verbose=True
)

grid_model_lgbm.fit(X_train, y_train)

Fitting 3 folds for each of 7 candidates, totalling 21 fits
[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  21 out of  21 | elapsed:   36.1s finished


GridSearchCV(cv=3,
             estimator=Pipeline(steps=[('standardscaler', StandardScaler()),
                                       ('lgbmclassifier', LGBMClassifier())]),
             n_jobs=8,
             param_grid=[{'lgbmclassifier__learning_rate': [0.04],
                          'lgbmclassifier__max_depth': [12],
                          'lgbmclassifier__min_child_weight': [30],
                          'lgbmclassifier__n_estimators': [1000],
                          'lgbmclassifier__num_iterations': [150],
                          'lgbmclassifier__num_leaves': [30, 60, 90, 100, 130,
                                                         160, 190]}],
             verbose=True)

In [21]:
grid_model_lgbm.best_estimator_

Pipeline(steps=[('standardscaler', StandardScaler()),
                ('lgbmclassifier',
                 LGBMClassifier(learning_rate=0.04, max_depth=12,
                                min_child_weight=30, n_estimators=1000,
                                num_iterations=150, num_leaves=90))])

In [22]:
get_model_train_eval(grid_model_lgbm, X_train, X_test, y_train, y_test)

Fitting 3 folds for each of 7 candidates, totalling 21 fits
[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  21 out of  21 | elapsed:   19.1s finished
GridSearchCV(cv=3,
             estimator=Pipeline(steps=[('standardscaler', StandardScaler()),
                                       ('lgbmclassifier', LGBMClassifier())]),
             n_jobs=8,
             param_grid=[{'lgbmclassifier__learning_rate': [0.04],
                          'lgbmclassifier__max_depth': [12],
                          'lgbmclassifier__min_child_weight': [30],
                          'lgbmclassifier__n_estimators': [1000],
                          'lgbmclassifier__num_iterations': [150],
                          'lgbmclassifier__num_leaves': [30, 60, 90, 100, 130,
                                                         160, 190]}],
             verbose=True) Test Accuracy: 88.0%
GridSearchCV report:              precision    recall  f1-score   suppo

In [23]:
model = LGBMClassifier(max_depth=4, min_child_weight=1,
                           learning_rate=0.1, subsample1=0.6,
                          num_leaves=30, num_iterations=100, n_estimators=100)
get_model_train_eval(model, X_train, X_test, y_train, y_test)

LGBMClassifier(max_depth=4, min_child_weight=1, num_iterations=100,
               num_leaves=30, subsample1=0.6) Test Accuracy: 88.0%
LGBMClassifier report:              precision    recall  f1-score   support

           0       0.83      0.75      0.79       684
           1       0.83      0.86      0.85      1368
           2       0.92      0.94      0.93      1173
           3       1.00      0.99      0.99       407

    accuracy                           0.88      3632
   macro avg       0.90      0.89      0.89      3632
weighted avg       0.88      0.88      0.88      3632




In [25]:
model2 = LGBMClassifier(learning_rate= 0.01, n_estimators= 500, objective='multiclass')
get_model_train_eval(model2, X_train, X_test, y_train, y_test)

LGBMClassifier(learning_rate=0.01, n_estimators=500, objective='multiclass') Test Accuracy: 88.11%
LGBMClassifier report:              precision    recall  f1-score   support

           0       0.85      0.76      0.80       684
           1       0.83      0.87      0.85      1368
           2       0.92      0.93      0.92      1173
           3       1.00      0.99      0.99       407

    accuracy                           0.88      3632
   macro avg       0.90      0.89      0.89      3632
weighted avg       0.88      0.88      0.88      3632




In [29]:
model3 = LGBMClassifier(learning_rate= 0.04, n_estimators= 1000,min_child_weight=30, objective='multiclass', num_iterations=150)
get_model_train_eval(model3, X_train, X_test, y_train, y_test)

LGBMClassifier(learning_rate=0.04, min_child_weight=30, n_estimators=1000,
               num_iterations=150, objective='multiclass') Test Accuracy: 88.13%
LGBMClassifier report:              precision    recall  f1-score   support

           0       0.84      0.76      0.80       684
           1       0.84      0.86      0.85      1368
           2       0.92      0.94      0.93      1173
           3       1.00      0.99      0.99       407

    accuracy                           0.88      3632
   macro avg       0.90      0.89      0.89      3632
weighted avg       0.88      0.88      0.88      3632


