# Position fitness analysis

In [1]:
%matplotlib inline
%config InlineBackend.figure_formats = {'png', 'retina'}

import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
import pandas as pd
import MySQLdb

from sklearn.tree import export_graphviz
from sklearn.datasets import load_iris
from sklearn.cross_validation import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

### Connect DB & Make QUERY

In [2]:
db = MySQLdb.connect(
    "db.fastcamp.us",
    "root",
    "dkstncks",
    "football",
    charset='utf8',
)

# forword feature
# apps_sub, mins, goals, assists, yel, spg, motm, aw, tackles, inter, drb, blocks, keyp_x, fouled, unstch, avgp          

# defenser feature
# goals, assists, spg, ps_x, motm, aw, tackles, inter, fouls, clear, owng, keyp_x, fouled, unstch, avgp          

# midfilder feature
# age, apps_sub, mins, goals, assists, spg, ps_x, motm, aw, tackles, inter, keyp_x, fouled, avgp

def make_query(position):
    """
    
    parameter------------
    position : M, D, F, G
    
    return---------------
    SQL_QUERY String
    
    """
    SQL_QUERY = """
        SELECT 
            tall, weight, apps_sub, mins, goals, assists
            , spg, ps_x, motm, aw, tackles, inter, fouls, clear, drb
            , owng, keyp_x, fouled, off, disp, unstch, avgp
        FROM player
        WHERE position not like "%,%" and position like "%{position}%" and mins > 270
        
        ;
    """.format(position=position)
    
    return SQL_QUERY

### Get Each Position Dataframe

In [5]:
# forword
SQL_QUERY = make_query("F")
forward_df = pd.read_sql(SQL_QUERY, db)

# midfilder
SQL_QUERY = make_query("M")
midfielder_df = pd.read_sql(SQL_QUERY, db)

# defencer
SQL_QUERY = make_query("D")
defender_df = pd.read_sql(SQL_QUERY, db)

# goalkeeper
SQL_QUERY = make_query("G")
goalkeeper_df = pd.read_sql(SQL_QUERY, db)

len(forward_df), len(midfielder_df), len(defender_df), len(goalkeeper_df)

(356, 997, 971, 213)

### Set Class per Position & Concat Dataframes

In [9]:
forward_df["position"] = 0
forward_df

midfielder_df["position"] = 1
midfielder_df

defender_df["position"] = 2
defender_df

goalkeeper_df["position"] = 3
goalkeeper_df

merged_df = pd.concat([forward_df, midfielder_df, defender_df, goalkeeper_df])
merged_df.tail()

Unnamed: 0,tall,weight,apps_sub,mins,goals,assists,spg,ps_x,motm,aw,...,clear,drb,owng,keyp_x,fouled,off,disp,unstch,avgp,position
208,187,82,0,2430,0,0,0.0,59.7,0,0.5,...,1.2,0.0,0,0.0,0.3,0.0,0.0,0.1,25.5,3
209,198,83,2,1397,0,0,0.0,47.0,0,0.4,...,1.1,0.1,0,0.0,0.2,0.0,0.0,0.0,19.8,3
210,199,91,0,2020,0,0,0.0,48.2,0,0.3,...,1.1,0.0,0,0.0,0.0,0.0,0.0,0.0,22.7,3
211,188,78,0,2970,0,0,0.0,54.2,0,0.2,...,0.8,0.1,0,0.0,0.2,0.0,0.0,0.0,28.9,3
212,185,81,0,450,0,0,0.0,62.8,0,0.2,...,0.8,0.0,0,0.2,0.0,0.0,0.0,0.0,27.4,3


### Classification Model

In [12]:
# Split train data and test data
X_train, X_test, y_train, y_test = train_test_split(merged_df.ix[:,:-1], merged_df.ix[:,-1], test_size=0.2, random_state=1)

# Make DecisionTreeClassifier
model = DecisionTreeClassifier(criterion='entropy', max_depth=3).fit(X_train, y_train)

In [13]:
# Check Confusion_matrix
confusion_matrix(y_test, model.predict(X_test))

array([[ 28,  28,   2,   0],
       [  3, 189,  14,   0],
       [  1,  48, 154,   0],
       [  0,   0,   0,  41]])

In [14]:
# Check Classification Report
print(classification_report(y_test, model.predict(X_test)))

             precision    recall  f1-score   support

          0       0.88      0.48      0.62        58
          1       0.71      0.92      0.80       206
          2       0.91      0.76      0.83       203
          3       1.00      1.00      1.00        41

avg / total       0.83      0.81      0.81       508



### Recommend Position

In [15]:
SQL_QUERY = """
    SELECT 
        tall, weight, apps_sub, mins, goals, assists
        , spg, ps_x, motm, aw, tackles, inter, fouls, clear, drb
        , owng, keyp_x, fouled, off, disp, unstch, avgp, position
    FROM player
    WHERE position like "%,%" and mins > 270
    ;
"""

multi_position_player_df = pd.read_sql(SQL_QUERY, db)
len(multi_position_player_df)

586

In [16]:
test_data = model.predict(multi_position_player_df.ix[:,:-1])
multi_position_player_df["recommend_position"] = test_data

In [18]:
# Recomend Result
# 0 : Forword, 1 : Midfilder, 2 : Defencer, 3 : Goalkeeper
multi_position_player_df.ix[:10,-2:]

Unnamed: 0,position,recommend_position
0,"M(CLR),FW",1
1,"D(C),DMC",1
2,"D(LR),M(CR)",2
3,"D(L),M(L)",1
4,"D(C),M(C)",2
5,"D(C),M(C)",1
6,"AM(L),FW",0
7,"AM(CLR),FW",1
8,"D(C),M(CLR)",1
9,"AM(CR),FW",1


### 결론
- 포지션 분류 모델의 정확도는 약 82%로 결과가 나왔으나, 공격수와 미드필더를 잘 분류하지 못하는 한계가 있었다. 그러나 포지션 별이 아닌 Role 별로 선수를 나눈다면 더 나은 모델이 될것이라고 확신한다.