# 동점골과 경기 결과간의 상관관계

## 기본 컨셉
* 동점골 여부 혹은 횟수가 경기 결과에 영향을 줄 것이다

## 사용 데이터
* 2013~2017년 득점 기록 중 지고있던 상황에서 동점골을 득점한 기록을 추출
* 경기의 주체는 Home 팀으로 가정

## 데이터 추출

In [1]:
import db_conn
import pandas as pd
import numpy as np
import copy
import statsmodels.formula.api as sm
import scipy.stats as st

##### Game Records

In [2]:
sql = """SELECT * FROM game_records"""

In [3]:
# SQL 실행
game_records = db_conn.select_query(sql)

In [4]:
columns = ['game_id', 'winning_team', 'home_team_id', 'away_team_id']

data_source = [[item[key] for key in columns] for item in game_records]
game_records_pd = pd.DataFrame(data_source, columns=columns)
game_records_pd.index = game_records_pd.game_id
game_records_pd = game_records_pd.drop('game_id', axis=1)
get_game_info = lambda x: game_records_pd.loc[x]

game_records_pd.head()

Unnamed: 0_level_0,winning_team,home_team_id,away_team_id
game_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2013-1-001,0,10,25
2013-1-002,19,19,5
2013-1-003,23,21,23
2013-1-004,13,12,13
2013-1-005,0,20,2


##### Goal Records

In [5]:
sql = """SELECT id, score_player, match_id, score_team_id, play_time FROM goal_records_rev"""

In [6]:
# SQL 실행
goal_records = db_conn.select_query(sql)

In [7]:
columns = ['id', 'score_player', 'match_id', 'score_team_id', 'play_time']

data_source = [[item[key] for key in columns] for item in goal_records]
goal_records_pd = pd.DataFrame(data_source, columns=columns)
goal_records_pd = pd.DataFrame(goal_records_pd, columns=['id', 'location', 'score_player', 'match_id', 'home_team_id', 'score_team_id', 'play_time', 'score_point', 'diff_score', 'previous_status', 'tying_goal_flag'])

goal_records_pd.head()

Unnamed: 0,id,location,score_player,match_id,home_team_id,score_team_id,play_time,score_point,diff_score,previous_status,tying_goal_flag
0,2,,1,2013-1-001,,10,29,,,,
1,3,,2,2013-1-001,,25,32,,,,
2,4,,3,2013-1-001,,10,2,,,,
3,5,,4,2013-1-001,,25,38,,,,
4,6,,5,2013-1-002,,5,4,,,,


## 데이터 전처리

### 동점골이란?
* 1점차이로 추격하는 상황에서의 득점
* 그 이상의 점수차이에서 발생하는 추격골은 제외
* 각 경기의 득점 중 득점 전 상황이 -1인 상태(1점 뒤쳐진 상태)에서 득점한 경우 동점골로 판단

In [8]:
home_goal_pd = copy.deepcopy(goal_records_pd)
home_goal_pd.home_team_id = home_goal_pd.match_id.apply(get_game_info)['home_team_id']
home_goal_pd.score_point = np.where(home_goal_pd.home_team_id == home_goal_pd.score_team_id, 1, -1)
home_goal_pd.diff_score = home_goal_pd.groupby(['match_id'])['score_point'].cumsum()
home_goal_pd.previous_status = home_goal_pd.groupby(['match_id'])['diff_score'].shift(1).fillna(0)
home_goal_pd.tying_goal_flag = np.where(home_goal_pd.diff_score != 0, False, np.where(home_goal_pd.previous_status == -1, True, False))
home_goal_pd.location = 1
home_goal_pd.head()

Unnamed: 0,id,location,score_player,match_id,home_team_id,score_team_id,play_time,score_point,diff_score,previous_status,tying_goal_flag
0,2,1,1,2013-1-001,10,10,29,1,1,0.0,False
1,3,1,2,2013-1-001,10,25,32,-1,0,1.0,False
2,4,1,3,2013-1-001,10,10,2,1,1,0.0,False
3,5,1,4,2013-1-001,10,25,38,-1,0,1.0,False
4,6,1,5,2013-1-002,19,5,4,-1,-1,0.0,False


In [9]:
away_goal_pd = copy.deepcopy(goal_records_pd)
away_goal_pd.home_team_id = away_goal_pd.match_id.apply(get_game_info)['away_team_id']
away_goal_pd.score_point = np.where(away_goal_pd.home_team_id == away_goal_pd.score_team_id, 1, -1)
away_goal_pd.diff_score = away_goal_pd.groupby(['match_id'])['score_point'].cumsum()
away_goal_pd.previous_status = away_goal_pd.groupby(['match_id'])['diff_score'].shift(1).fillna(0)
away_goal_pd.tying_goal_flag = np.where(away_goal_pd.diff_score != 0, False, np.where(away_goal_pd.previous_status == -1, True, False))
away_goal_pd.location = 0
away_goal_pd.head()

Unnamed: 0,id,location,score_player,match_id,home_team_id,score_team_id,play_time,score_point,diff_score,previous_status,tying_goal_flag
0,2,0,1,2013-1-001,25,10,29,-1,-1,0.0,False
1,3,0,2,2013-1-001,25,25,32,1,0,-1.0,True
2,4,0,3,2013-1-001,25,10,2,-1,-1,0.0,False
3,5,0,4,2013-1-001,25,25,38,1,0,-1.0,True
4,6,0,5,2013-1-002,5,5,4,1,1,0.0,False


* 동점골 여부만 판단하는 tying_goal_pd와 동점골의 횟수까지 판단하는 tying_goal_count_pd로 분리

In [10]:
# target = 'home'
# target = 'away'
target = 'total'

if target == 'home':
    target_pd = home_goal_pd
elif target == 'away':
    target_pd = away_goal_pd
elif target == 'total':
    target_pd = home_goal_pd.append(away_goal_pd, ignore_index=True)
    
target_pd.head()

Unnamed: 0,id,location,score_player,match_id,home_team_id,score_team_id,play_time,score_point,diff_score,previous_status,tying_goal_flag
0,2,1,1,2013-1-001,10,10,29,1,1,0.0,False
1,3,1,2,2013-1-001,10,25,32,-1,0,1.0,False
2,4,1,3,2013-1-001,10,10,2,1,1,0.0,False
3,5,1,4,2013-1-001,10,25,38,-1,0,1.0,False
4,6,1,5,2013-1-002,19,5,4,-1,-1,0.0,False


In [11]:
tying_goal_pd = pd.DataFrame(target_pd.groupby(['match_id', 'location']).agg({'tying_goal_flag': 'sum', 'home_team_id': 'min'})
                             , columns=['home_team_id', 'tying_goal_flag', 'winning_flag'])
tying_goal_pd.reset_index(level=0, inplace=True)
tying_goal_pd.winning_flag = np.where(tying_goal_pd.match_id.apply(get_game_info)['winning_team'] == 0, 0, np.where(tying_goal_pd.home_team_id == tying_goal_pd.match_id.apply(get_game_info)['winning_team'], 1, -1))
tying_goal_count_pd = copy.deepcopy(tying_goal_pd)
tying_goal_pd.tying_goal_flag = np.where(tying_goal_pd.tying_goal_flag > 0, 1, 0)
tying_goal_pd.head()

Unnamed: 0_level_0,match_id,home_team_id,tying_goal_flag,winning_flag
location,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,2013-1-001,25,1,0
1,2013-1-001,10,0,0
0,2013-1-002,5,0,-1
1,2013-1-002,19,1,1
0,2013-1-003,23,0,1


## 통계치 분석

### 동점골 여부와 경기 결과 Cross Table

In [12]:
total_match_counts = len(tying_goal_pd)
victory_count = [{'win': sum(np.where(tying_goal_pd.tying_goal_flag == 0, np.where(tying_goal_pd.winning_flag == 1, 1, 0), 0)), 
  'draw': sum(np.where(tying_goal_pd.tying_goal_flag == 0, np.where(tying_goal_pd.winning_flag == 0, 1, 0), 0)), 
  'lose': sum(np.where(tying_goal_pd.tying_goal_flag == 0, np.where(tying_goal_pd.winning_flag == -1, 1, 0), 0))}, 
 {'win': sum(np.where(tying_goal_pd.tying_goal_flag == 1, np.where(tying_goal_pd.winning_flag == 1, 1, 0), 0)), 
  'draw': sum(np.where(tying_goal_pd.tying_goal_flag == 1, np.where(tying_goal_pd.winning_flag == 0, 1, 0), 0)), 
  'lose': sum(np.where(tying_goal_pd.tying_goal_flag == 1, np.where(tying_goal_pd.winning_flag == -1, 1, 0), 0))}]

tying_goal_outcome_pd = pd.DataFrame(victory_count, columns = ['win', 'draw', 'lose'], index = ['non_tying_goal', 'tying_goal'])
tying_goal_outcome_pd = tying_goal_outcome_pd
tying_goal_outcome_pd / total_match_counts * 100

Unnamed: 0,win,draw,lose
non_tying_goal,32.755946,8.945191,31.902792
tying_goal,6.747673,12.04757,7.600827


* 동점골(Tying goal)이 없는 경우
    1. 승: 동점상황없이 계속 앞선 경우
    2. 무: 0:0
    3. 패: 돔정상황없이 계속 뒤진 경우
* 동점골이 있는 경우
    1. 승: 역전승
    2. 무: 동점상황을 만들고 종료 or 역전 후 다시 동점
    3. 패: 동점을 만들었으나 다시 격차가 벌어진 경우 or 역전을 했으나 다시 역전된 경우

#### 카이제곱 검정

In [13]:
print(tying_goal_outcome_pd.values)
result = st.chi2_contingency(tying_goal_outcome_pd.values)

result

[[1267  346 1234]
 [ 261  466  294]]


(509.9686491482283,
 1.8269003779208847e-111,
 2,
 array([[1124.6680455,  597.663909 , 1124.6680455],
        [ 403.3319545,  214.336091 ,  403.3319545]]))

* x^2는 509.97, P-value는 0.01보다 매우 작은 수치, 자유도는 2
* 동점골 여부에 따라 경기결과에 차이가 존재

#### 통계 확인

In [14]:
t2 = pd.DataFrame(tying_goal_outcome_pd, columns = ['win', 'draw', 'lose', 'total', 'win_prob', 'draw_prob', 'lose_prob'], index = ['non_tying_goal', 'tying_goal', 'total'])
t2.total = t2.win + t2.draw + t2.lose
t2.loc['total'] = pd.Series([sum(t2[:2].win), sum(t2[:2].draw), sum(t2[:2].lose), sum(t2[:2].total)], index = ['win', 'draw', 'lose', 'total'])
t2.win_prob = t2.win / t2.total
t2.draw_prob = t2.draw / t2.total
t2.lose_prob = t2.lose / t2.total
t2

Unnamed: 0,win,draw,lose,total,win_prob,draw_prob,lose_prob
non_tying_goal,1267.0,346.0,1234.0,2847.0,0.44503,0.121531,0.433439
tying_goal,261.0,466.0,294.0,1021.0,0.255632,0.456415,0.287953
total,1528.0,812.0,1528.0,3868.0,0.395036,0.209928,0.395036


* 동점골이 있는 경우 승/패의 비율은 줄어들지만, 무승부의 비율이 증가

### 동점골의 횟수와 경기 결과 Cross Table

In [15]:
max(tying_goal_count_pd.tying_goal_flag)

3.0

In [16]:
victory_count2 = \
[{'win': sum(np.where(tying_goal_count_pd.tying_goal_flag == 0, np.where(tying_goal_count_pd.winning_flag == 1, 1, 0), 0)), 
  'draw': sum(np.where(tying_goal_count_pd.tying_goal_flag == 0, np.where(tying_goal_count_pd.winning_flag == 0, 1, 0), 0)), 
  'lose': sum(np.where(tying_goal_count_pd.tying_goal_flag == 0, np.where(tying_goal_count_pd.winning_flag == -1, 1, 0), 0))}, 
 {'win': sum(np.where(tying_goal_count_pd.tying_goal_flag == 1, np.where(tying_goal_count_pd.winning_flag == 1, 1, 0), 0)), 
  'draw': sum(np.where(tying_goal_count_pd.tying_goal_flag == 1, np.where(tying_goal_count_pd.winning_flag == 0, 1, 0), 0)), 
  'lose': sum(np.where(tying_goal_count_pd.tying_goal_flag == 1, np.where(tying_goal_count_pd.winning_flag == -1, 1, 0), 0))},
{'win': sum(np.where(tying_goal_count_pd.tying_goal_flag == 2, np.where(tying_goal_count_pd.winning_flag == 1, 1, 0), 0)), 
  'draw': sum(np.where(tying_goal_count_pd.tying_goal_flag == 2, np.where(tying_goal_count_pd.winning_flag == 0, 1, 0), 0)), 
  'lose': sum(np.where(tying_goal_count_pd.tying_goal_flag == 2, np.where(tying_goal_count_pd.winning_flag == -1, 1, 0), 0))}]

if target != 'away':
    victory_count2.append({'win': sum(np.where(tying_goal_count_pd.tying_goal_flag == 3, np.where(tying_goal_count_pd.winning_flag == 1, 1, 0), 0)), 
      'draw': sum(np.where(tying_goal_count_pd.tying_goal_flag == 3, np.where(tying_goal_count_pd.winning_flag == 0, 1, 0), 0)), 
      'lose': sum(np.where(tying_goal_count_pd.tying_goal_flag == 3, np.where(tying_goal_count_pd.winning_flag == -1, 1, 0), 0))})

tying_goal_count_outcome_pd = pd.DataFrame(victory_count2, columns = ['win', 'draw', 'lose'])
tying_goal_count_outcome_pd

Unnamed: 0,win,draw,lose
0,1267,346,1234
1,245,412,274
2,16,53,19
3,0,1,1


#### 카이제곱 검정

In [17]:
print(tying_goal_count_outcome_pd.values)
result = st.chi2_contingency(tying_goal_count_outcome_pd.values)

result

[[1267  346 1234]
 [ 245  412  274]
 [  16   53   19]
 [   0    1    1]]


(522.9163223755935,
 9.710907562634509e-110,
 6,
 array([[1.12466805e+03, 5.97663909e+02, 1.12466805e+03],
        [3.67778697e+02, 1.95442606e+02, 3.67778697e+02],
        [3.47631851e+01, 1.84736298e+01, 3.47631851e+01],
        [7.90072389e-01, 4.19855222e-01, 7.90072389e-01]]))

* x^2는 522.92, P-value는 0.01보다 매우 작은 수치, 자유도는 6
* 동점골 횟수에 따라 경기결과에 차이가 존재

In [18]:
st.chi2_contingency(tying_goal_count_outcome_pd[['win', 'draw']].values)

(408.31214120924494,
 3.5046209376961894e-88,
 3,
 array([[1.05327521e+03, 5.59724786e+02],
        [4.29015385e+02, 2.27984615e+02],
        [4.50564103e+01, 2.39435897e+01],
        [6.52991453e-01, 3.47008547e-01]]))

In [19]:
st.chi2_contingency(tying_goal_count_outcome_pd[['win', 'lose']].values)

(3.3129925789111825, 0.3458384096037368, 3, array([[1.2505e+03, 1.2505e+03],
        [2.5950e+02, 2.5950e+02],
        [1.7500e+01, 1.7500e+01],
        [5.0000e-01, 5.0000e-01]]))

In [20]:
st.chi2_contingency(tying_goal_count_outcome_pd[['lose', 'draw']].values)

(357.25985525212417,
 3.99623356125984e-77,
 3,
 array([[1.03172650e+03, 5.48273504e+02],
        [4.47952137e+02, 2.38047863e+02],
        [4.70153846e+01, 2.49846154e+01],
        [1.30598291e+00, 6.94017094e-01]]))

* 무승부와 승/패 간에는 유의미한 차이가 존재하지만, 승리와 패배 사이에는 유의미한 차이가 존재하지 않음(p<0.01)

|       | HOME                                           | AWAY                                           | TOTAL                                      |
| ----- | ---------------------------------------------- | ---------------------------------------------- | ------------------------------------------ |
| W/D/L | 3~~11.74120350643744,  2.490740857336074e-64~~ | ~~220.23906253853656,  1.665273690589081e-46~~ | 522.9163223755935, 9.710907562634509e-110  |
| W/D   | ~~265.0451946706612,  3.642475777190881e-57~~  | ~~173.6019224685825,  2.0082672778738617e-38~~ | 408.31214120924494, 3.5046209376961894e-88 |
| W/L   | ~~8.25462939028626,  0.04103221604893905~~     | ~~1.1445635943916925,  0.5642364954521273~~    | 3.3129925789111825, 0.3458384096037368     |
| D/L   | ~~184.45299764006117,  9.632944153489867e-40~~ | ~~149.86311447197244, 2.868389816598579e-33~~  | 357.25985525212417, 3.99623356125984e-77   |

#### 통계 확인

In [21]:
if target != 'away':
    mr = 4
else:
    mr = 3
    
index = list(range(mr))
index = index.append('total')

t = pd.DataFrame(tying_goal_count_outcome_pd, columns = ['win', 'draw', 'lose', 'total', 'win_prob', 'draw_prob', 'lose_prob'], index = index)
t.total = t.win + t.draw + t.lose
t.loc['total'] = pd.Series([sum(t[:mr].win), sum(t[:mr].draw), sum(t[:mr].lose), sum(t[:mr].total)], index = ['win', 'draw', 'lose', 'total'])
t.loc[2] = pd.Series([sum(t[2:mr].win), sum(t[2:mr].draw), sum(t[2:mr].lose), sum(t[2:mr].total)], index = ['win', 'draw', 'lose', 'total'])
if mr == 4: t = t.drop(3)
t.win_prob = t.win / t.total
t.draw_prob = t.draw / t.total
t.lose_prob = t.lose / t.total
t.index = ['non tying goal', '1 tying goal', '2 or more tying goal', 'total']
t

Unnamed: 0,win,draw,lose,total,win_prob,draw_prob,lose_prob
non tying goal,1267.0,346.0,1234.0,2847.0,0.44503,0.121531,0.433439
1 tying goal,245.0,412.0,274.0,931.0,0.263158,0.442535,0.294307
2 or more tying goal,16.0,54.0,20.0,90.0,0.177778,0.6,0.222222
total,1528.0,812.0,1528.0,3868.0,0.395036,0.209928,0.395036


* 동점골 횟수가 증가함에 따라 승리비율은 감소, 패배 비율도 감소로 판단 가능
* 동점골 횟수가 증가하면서 무승부의 비율 증가

## 참고자료

#### 로지스틱 회귀분석
* 독립변수: 홈/어웨이, 동점골여부(혹은 횟수)
* 종속변수: 무승부여부(무 vs. 승/패)

In [22]:
logit_data = pd.DataFrame(tying_goal_pd, columns=['match_id', 'tying_goal_flag', 'winning_flag'])
logit_data.reset_index(level=0, inplace=True)
logit_data.match_id = logit_data.match_id.str.split('-').str.get(0)
logit_data.match_id = logit_data.match_id.apply(pd.to_numeric)
logit_data.columns = ['location', 'year', 'tying_goal', 'victory']
logit_data.victory = np.where(logit_data.victory == 0, 1, 0)

# 무승부의 경기 수가 적은 부분을 보완하기 위해 동일한 갯수의 Sample 추출
# 샘플 보완 없이 회귀분석 시 무조건 무승부 아님으로 예측 (이 경우에도 F1-score는 0.5 이상)
logit_data = logit_data[logit_data.victory == 1].append(logit_data[logit_data.victory == 0].sample(len(logit_data[logit_data.victory == 1])))

train_set = logit_data[logit_data.year < 2017]
test_set = logit_data[logit_data.year == 2017]

logit_data.head()

Unnamed: 0,location,year,tying_goal,victory
0,0,2013,1,1
1,1,2013,0,1
8,0,2013,1,1
9,1,2013,0,1
18,0,2013,0,1


* Training Data와 Testing Data 분리
    * 2013~2016년 vs. 2017년 / Random Sampling 두 가지 방식 사용   

In [23]:
from patsy import dmatrices
from sklearn.cross_validation import train_test_split

# 2017년을 test set으로 하는 sampling
y_train, x_train = dmatrices('victory ~ tying_goal', train_set, return_type='dataframe')
y_test, x_test = dmatrices('victory ~ tying_goal', test_set, return_type='dataframe')
y_train = np.ravel(y_train)
y_test = np.ravel(y_test)

# Random sampling
# y, X = dmatrices('victory ~ tying_goal', logit_data, return_type='dataframe')
# y = np.ravel(y)
# x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
model = model.fit(x_train, y_train)

# 로지스틱 회귀식의 절편과 alpha 값
list(zip(x_train.columns, np.transpose(model.coef_)))



[('Intercept', array([-0.3183715])), ('tying_goal', array([1.73824051]))]

In [24]:
# Training Set 에서의 정확도
model.score(x_train, y_train)

0.6935105551211884

In [25]:
model.sparsify()

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

In [35]:
# Test Set을 이용하여 예측 수행

from sklearn import metrics

predicted = model.predict(x_test)
predicted_probs = model.predict_proba(x_test)[:, 1]

# Test Set 에서의 정확도
metrics.accuracy_score(y_test, predicted), metrics.roc_auc_score(y_test, predicted_probs)

(0.6985507246376812, 0.6995866102036701)

In [27]:
confusion_mt = metrics.confusion_matrix(y_test, predicted)
tn, fp, fn, tp = confusion_mt.ravel()
tn, fp, fn, tp, confusion_mt
pd.DataFrame([{'Real Positive': tn, 'Real Negative': fp}, {'Real Positive': fn, 'Real Negative': tn}], index=['Predicted Positive', 'Predicted Negative'])

Unnamed: 0,Real Negative,Real Positive
Predicted Positive,31,140
Predicted Negative,140,73


In [28]:
# F-measure 수행
print(metrics.classification_report(y_test, predicted))

             precision    recall  f1-score   support

        0.0       0.66      0.82      0.73       171
        1.0       0.77      0.58      0.66       174

avg / total       0.71      0.70      0.69       345



#### 동점골을 가장 많이 넣은 선수는??

In [29]:
sum(target_pd.location), len(target_pd.location), len(target_pd[target_pd.tying_goal_flag == True])

(5460, 10920, 1113)

In [30]:
target_pd[target_pd.tying_goal_flag == True].head()

Unnamed: 0,id,location,score_player,match_id,home_team_id,score_team_id,play_time,score_point,diff_score,previous_status,tying_goal_flag
5,7,1,6,2013-1-002,19,19,44,1,0,-1.0,True
9,11,1,10,2013-1-004,12,12,22,1,0,-1.0,True
25,27,1,25,2013-1-009,10,10,23,1,0,-1.0,True
29,31,1,29,2013-1-011,23,23,43,1,0,-1.0,True
43,45,1,32,2013-1-018,2,2,1,1,0,-1.0,True


In [31]:
player_list = db_conn.select_query('SELECT * FROM player_info')

In [32]:
player_list_pd = pd.DataFrame(player_list, columns=['player_id', 'player_name'])
get_player_name = lambda x: player_list_pd.loc[x-1].get(1)

##### 1. 전체(5년) 기간 중 동점골 득점 랭킹

In [33]:
tgpd = pd.DataFrame(target_pd.groupby(['score_player']).sum()[['tying_goal_flag']], columns = ['player_name', 'tying_goal_flag'])
tgpd.reset_index(level=0, inplace=True)
tgpd.score_player = tgpd.score_player.apply(pd.to_numeric)
tgpd.player_name = tgpd.score_player.apply(get_player_name)
tgpd = tgpd.sort_values(by=['tying_goal_flag'], ascending=False)
tgpd[tgpd.tying_goal_flag >= 10]

Unnamed: 0,score_player,player_name,tying_goal_flag
142,227,주민규,15.0
51,145,산토스,12.0
95,185,양동현,12.0
556,60,이종호,12.0
132,218,고경민,11.0
122,209,정조국,11.0
136,221,알렉스8,10.0


##### 2. 연도별 동점골 득점 랭킹

In [34]:
tgpd_year = pd.DataFrame(target_pd, columns = ['id', 'location', 'score_player', 'year', 'match_id', 'home_team_id', 'score_team_id', 'play_time', 'score_point', 'diff_score', 'previous_status', 'tying_goal_flag'])
tgpd_year.year = tgpd_year.match_id.str.split('-').str.get(0)
tgpd_year.year = tgpd_year.year.apply(pd.to_numeric)
tgpd_year = pd.DataFrame(tgpd_year.groupby(['year', 'score_player']).sum()[['tying_goal_flag']], columns= ['player_name', 'tying_goal_flag', 'rank'])
tgpd_year.reset_index(level=[0,1], inplace=True)
tgpd_year.score_player = tgpd_year.score_player.apply(pd.to_numeric)
tgpd_year.player_name = tgpd_year.score_player.apply(get_player_name)
tgpd_year['rank'] = tgpd_year.groupby('year')['tying_goal_flag'].rank(method='min', ascending=False)
tgpd_year = tgpd_year.sort_values(by=['rank', 'year'])
tgpd_year.head(20)

Unnamed: 0,year,score_player,player_name,tying_goal_flag,rank
0,2013,1,데얀,5.0,1.0
434,2014,318,스토야노비치,6.0,1.0
660,2015,218,고경민,6.0,1.0
661,2015,227,주민규,6.0,1.0
835,2015,524,타라바이,6.0,1.0
964,2016,209,정조국,8.0,1.0
1520,2017,752,말컹,6.0,1.0
131,2013,217,박종찬,4.0,2.0
251,2013,46,황진성,4.0,2.0
372,2014,221,알렉스8,5.0,2.0


동점골은 무승부로 갈 확률을 높게한다  - GOOD

이와 같은 개념과 방법으로 결승골 분석을 해보면 좋을 듯..


