In [1]:
import pandas as pd

data = pd.read_csv('./FIFA2.csv')
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18159 entries, 0 to 18158
Data columns (total 83 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Name                      18159 non-null  object 
 1   Age                       18159 non-null  int64  
 2   Nationality               18159 non-null  object 
 3   Overall                   18159 non-null  int64  
 4   Potential                 18159 non-null  int64  
 5   Club                      17918 non-null  object 
 6   Value                     18159 non-null  int64  
 7   Wage                      18159 non-null  int64  
 8   Special                   18159 non-null  int64  
 9   Preferred Foot            18159 non-null  object 
 10  International Reputation  18159 non-null  float64
 11  Weak Foot                 18159 non-null  float64
 12  Skill Moves               18159 non-null  float64
 13  Work Rate                 18159 non-null  object 
 14  Body T

In [2]:
map_position = {'FW':0, 'MD':1, 'DF':2, 'GK':3}
col = ['Position simplified']
data[col] = data[col].applymap(map_position.get)
data['Position simplified']

0        0
1        0
2        0
3        3
4        1
        ..
18154    1
18155    0
18156    0
18157    0
18158    1
Name: Position simplified, Length: 18159, dtype: int64

In [3]:
X = data.loc[:, 'Crossing':'GKReflexes']
XX = X.drop('Strength', axis=1)
y = data.loc[:, 'Position simplified']
xy = pd.concat([X, y], axis=1)
xxy = pd.concat([XX, y], axis=1)

In [4]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0, stratify=y)

In [5]:
from sklearn.metrics import classification_report, f1_score, confusion_matrix
def get_model_train_eval(model, X_train=None, X_test=None, y_train=None, y_test=None):
    model.fit(X_train, y_train)
    print('{} Test Accuracy: {}%'.format(model, round(model.score(X_test, y_test)*100, 2)))


    pred_model = model.predict(X_test)
    print('{} report:{}\n'.format(model.__class__.__name__, classification_report(y_test, pred_model)))

## 모든 열

In [6]:
import multiprocessing
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC

pipe = make_pipeline(StandardScaler(), SVC(random_state=0))

param_range = [0.0001, 0.001, 0.01, 0.1, 1.0, 10.0, 100.0, 1000.0]

param_grid = [
    {'svc__C': param_range, 'svc__gamma': param_range, 'svc__kernel': ['rbf']}
]

grid_model_svc = GridSearchCV(
    estimator=pipe,
    param_grid=param_grid,
    n_jobs=multiprocessing.cpu_count(),
    cv=3,
    verbose=True
)

grid_model_svc.fit(X_train, y_train)

Fitting 3 folds for each of 64 candidates, totalling 192 fits
[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:  4.0min
[Parallel(n_jobs=8)]: Done 192 out of 192 | elapsed: 19.7min finished


GridSearchCV(cv=3,
             estimator=Pipeline(steps=[('standardscaler', StandardScaler()),
                                       ('svc', SVC(random_state=0))]),
             n_jobs=8,
             param_grid=[{'svc__C': [0.0001, 0.001, 0.01, 0.1, 1.0, 10.0, 100.0,
                                     1000.0],
                          'svc__gamma': [0.0001, 0.001, 0.01, 0.1, 1.0, 10.0,
                                         100.0, 1000.0],
                          'svc__kernel': ['rbf']}],
             verbose=True)

In [7]:
grid_model_svc.best_estimator_

Pipeline(steps=[('standardscaler', StandardScaler()),
                ('svc', SVC(C=10.0, gamma=0.01, random_state=0))])

In [8]:
get_model_train_eval(grid_model_svc.best_estimator_, X_train, X_test, y_train, y_test)

Pipeline(steps=[('standardscaler', StandardScaler()),
                ('svc', SVC(C=10.0, gamma=0.01, random_state=0))]) Test Accuracy: 88.49%
Pipeline report:              precision    recall  f1-score   support

           0       0.87      0.74      0.80       684
           1       0.83      0.89      0.86      1368
           2       0.92      0.93      0.93      1173
           3       1.00      0.99      0.99       407

    accuracy                           0.88      3632
   macro avg       0.91      0.89      0.89      3632
weighted avg       0.89      0.88      0.88      3632




## 열 제외

In [8]:
XX_train, XX_test, yy_train, yy_test = train_test_split(XX, y, test_size=0.2, random_state=0, stratify=y)

In [10]:
import multiprocessing
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC

pipe = make_pipeline(StandardScaler(), SVC(random_state=0))

param_range = [0.0001, 0.001, 0.01, 0.1, 1.0, 10.0, 100.0, 1000.0]

param_grid = [
    {'svc__C': param_range, 'svc__gamma': param_range, 'svc__kernel': ['rbf']}
]

grid_model_svc2 = GridSearchCV(
    estimator=pipe,
    param_grid=param_grid,
    n_jobs=multiprocessing.cpu_count(),
    cv=3,
    verbose=True
)

grid_model_svc2.fit(XX_train, yy_train)

Fitting 3 folds for each of 64 candidates, totalling 192 fits
[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:  3.4min
[Parallel(n_jobs=8)]: Done 192 out of 192 | elapsed: 17.3min finished


GridSearchCV(cv=3,
             estimator=Pipeline(steps=[('standardscaler', StandardScaler()),
                                       ('svc', SVC(random_state=0))]),
             n_jobs=8,
             param_grid=[{'svc__C': [0.0001, 0.001, 0.01, 0.1, 1.0, 10.0, 100.0,
                                     1000.0],
                          'svc__gamma': [0.0001, 0.001, 0.01, 0.1, 1.0, 10.0,
                                         100.0, 1000.0],
                          'svc__kernel': ['rbf']}],
             verbose=True)

In [11]:
grid_model_svc2.best_estimator_

Pipeline(steps=[('standardscaler', StandardScaler()),
                ('svc', SVC(gamma=0.1, random_state=0))])

In [12]:
get_model_train_eval(grid_model_svc2, XX_train, XX_test, yy_train, yy_test)

Fitting 3 folds for each of 64 candidates, totalling 192 fits
[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:  3.2min
[Parallel(n_jobs=8)]: Done 192 out of 192 | elapsed: 18.0min finished
GridSearchCV(cv=3,
             estimator=Pipeline(steps=[('standardscaler', StandardScaler()),
                                       ('svc', SVC(random_state=0))]),
             n_jobs=8,
             param_grid=[{'svc__C': [0.0001, 0.001, 0.01, 0.1, 1.0, 10.0, 100.0,
                                     1000.0],
                          'svc__gamma': [0.0001, 0.001, 0.01, 0.1, 1.0, 10.0,
                                         100.0, 1000.0],
                          'svc__kernel': ['rbf']}],
             verbose=True) Test Accuracy: 88.41%
GridSearchCV report:              precision    recall  f1-score   support

           0       0.86      0.75      0.80       684
           1       0.83      0.88      0.86      1

# 다시 전체

In [7]:
import multiprocessing
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC

pipe = make_pipeline(StandardScaler(), SVC(random_state=0))

param_grid = [
    {'svc__C': [ 1.0, 5.0, 10.0, 40.0, 70.0, 100.0],
    'svc__gamma': [0.001,0.005, 0.01, 0.05, 0.1],
    'svc__kernel': ['rbf']}
]

grid_model_svc3 = GridSearchCV(
    estimator=pipe,
    param_grid=param_grid,
    n_jobs=multiprocessing.cpu_count(),
    cv=3,
    verbose=True
)

grid_model_svc3.fit(X_train, y_train)

Fitting 3 folds for each of 30 candidates, totalling 90 fits
[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:  1.1min
[Parallel(n_jobs=8)]: Done  90 out of  90 | elapsed:  2.6min finished


GridSearchCV(cv=3,
             estimator=Pipeline(steps=[('standardscaler', StandardScaler()),
                                       ('svc', SVC(random_state=0))]),
             n_jobs=8,
             param_grid=[{'svc__C': [1.0, 5.0, 10.0, 40.0, 70.0, 100.0],
                          'svc__gamma': [0.001, 0.005, 0.01, 0.05, 0.1],
                          'svc__kernel': ['rbf']}],
             verbose=True)

In [8]:
grid_model_svc3.best_estimator_

Pipeline(steps=[('standardscaler', StandardScaler()),
                ('svc', SVC(C=70.0, gamma=0.005, random_state=0))])

In [9]:
get_model_train_eval(grid_model_svc3, X_train, X_test, y_train, y_test)

Fitting 3 folds for each of 30 candidates, totalling 90 fits
[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  34 tasks      | elapsed:   47.7s
[Parallel(n_jobs=8)]: Done  90 out of  90 | elapsed:  2.2min finished
GridSearchCV(cv=3,
             estimator=Pipeline(steps=[('standardscaler', StandardScaler()),
                                       ('svc', SVC(random_state=0))]),
             n_jobs=8,
             param_grid=[{'svc__C': [1.0, 5.0, 10.0, 40.0, 70.0, 100.0],
                          'svc__gamma': [0.001, 0.005, 0.01, 0.05, 0.1],
                          'svc__kernel': ['rbf']}],
             verbose=True) Test Accuracy: 88.44%
GridSearchCV report:              precision    recall  f1-score   support

           0       0.87      0.74      0.80       684
           1       0.83      0.89      0.86      1368
           2       0.92      0.93      0.93      1173
           3       1.00      0.99      0.99       407

    

In [13]:
pipe = make_pipeline(StandardScaler(), SVC(random_state=0))

param_grid = [
    {'svc__C': [0.0, 5.0, 10.0, 20.0, 30.0],
    'svc__gamma': [0.001, 0.003, 0.005, 0.007, 0.01],
    'svc__kernel': ['rbf']}
]

grid_model_svc4 = GridSearchCV(
    estimator=pipe,
    param_grid=param_grid,
    n_jobs=multiprocessing.cpu_count(),
    cv=3,
    verbose=True
)

grid_model_svc4.fit(X_train, y_train)

Fitting 3 folds for each of 25 candidates, totalling 75 fits
[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  50 tasks      | elapsed:   53.3s
[Parallel(n_jobs=8)]: Done  75 out of  75 | elapsed:  1.3min finished


GridSearchCV(cv=3,
             estimator=Pipeline(steps=[('standardscaler', StandardScaler()),
                                       ('svc', SVC(random_state=0))]),
             n_jobs=8,
             param_grid=[{'svc__C': [0.0, 5.0, 10.0, 20.0, 30.0],
                          'svc__gamma': [0.001, 0.003, 0.005, 0.007, 0.01],
                          'svc__kernel': ['rbf']}],
             verbose=True)

In [14]:
grid_model_svc4.best_estimator_

Pipeline(steps=[('standardscaler', StandardScaler()),
                ('svc', SVC(C=20.0, gamma=0.01, random_state=0))])

In [15]:
get_model_train_eval(grid_model_svc4, X_train, X_test, y_train, y_test)

Fitting 3 folds for each of 25 candidates, totalling 75 fits
[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  49 tasks      | elapsed:   41.1s
[Parallel(n_jobs=8)]: Done  75 out of  75 | elapsed:  1.1min finished
GridSearchCV(cv=3,
             estimator=Pipeline(steps=[('standardscaler', StandardScaler()),
                                       ('svc', SVC(random_state=0))]),
             n_jobs=8,
             param_grid=[{'svc__C': [0.0, 5.0, 10.0, 20.0, 30.0],
                          'svc__gamma': [0.001, 0.003, 0.005, 0.007, 0.01],
                          'svc__kernel': ['rbf']}],
             verbose=True) Test Accuracy: 88.41%
GridSearchCV report:              precision    recall  f1-score   support

           0       0.87      0.74      0.80       684
           1       0.83      0.89      0.86      1368
           2       0.92      0.93      0.93      1173
           3       1.00      0.99      0.99       407

    accu

In [21]:
pipe = make_pipeline(StandardScaler(), SVC(random_state=0))

param_grid = [
    {'svc__C': [10.0],
    'svc__gamma': [0.005],
    'svc__kernel': ['rbf']}
]

grid_model_svc5 = GridSearchCV(
    estimator=pipe,
    param_grid=param_grid,
    n_jobs=multiprocessing.cpu_count(),
    cv=3,
    verbose=True
)

grid_model_svc5.fit(X_train, y_train)

Fitting 3 folds for each of 1 candidates, totalling 3 fits
[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   3 out of   3 | elapsed:    6.3s finished


GridSearchCV(cv=3,
             estimator=Pipeline(steps=[('standardscaler', StandardScaler()),
                                       ('svc', SVC(random_state=0))]),
             n_jobs=8,
             param_grid=[{'svc__C': [10.0], 'svc__gamma': [0.005],
                          'svc__kernel': ['rbf']}],
             verbose=True)

In [22]:
grid_model_svc5.best_estimator_

Pipeline(steps=[('standardscaler', StandardScaler()),
                ('svc', SVC(C=10.0, gamma=0.005, random_state=0))])

In [23]:
get_model_train_eval(grid_model_svc5, X_train, X_test, y_train, y_test)

Fitting 3 folds for each of 1 candidates, totalling 3 fits
[Parallel(n_jobs=8)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done   3 out of   3 | elapsed:    5.5s finished
GridSearchCV(cv=3,
             estimator=Pipeline(steps=[('standardscaler', StandardScaler()),
                                       ('svc', SVC(random_state=0))]),
             n_jobs=8,
             param_grid=[{'svc__C': [10.0], 'svc__gamma': [0.005],
                          'svc__kernel': ['rbf']}],
             verbose=True) Test Accuracy: 88.49%
GridSearchCV report:              precision    recall  f1-score   support

           0       0.88      0.74      0.80       684
           1       0.83      0.89      0.86      1368
           2       0.92      0.93      0.92      1173
           3       1.00      0.99      0.99       407

    accuracy                           0.88      3632
   macro avg       0.91      0.89      0.89      3632
weighted avg       0.89      0.88     