### 강의에서 소개된 파이썬 주요 기능
- numpy.linalg.norm: https://numpy.org/doc/stable/reference/generated/numpy.linalg.norm.html
- numpy.arctan: https://numpy.org/doc/stable/reference/generated/numpy.arctan.html
- numpy.where: https://numpy.org/doc/stable/reference/generated/numpy.where.html

### 전경기 슈팅 데이터 불러오기

In [1]:
import os
import numpy as np
import pandas as pd
from tqdm import tqdm

##### (1) 이벤트 데이터 불러오기 

In [2]:
competitions = [x for x in os.listdir('data/refined_events') if not x.startswith('.')]
match_events_list = []

for competition_name in competitions:
    match_df = pd.read_csv(f'data/refined_events/{competition_name}/matches.csv', index_col=0, encoding='utf-8-sig')

    for match_id in tqdm(match_df.index, desc=f"{competition_name + ' ':10s}"):
        match_events = pd.read_pickle(f'data/refined_events/{competition_name}/{match_id}.pkl')
        match_events['competition_name'] = competition_name
        match_events_list.append(match_events)

events = pd.concat(match_events_list, ignore_index=True)
events

England   : 100%|███████████████████████████████████████████████████████████████████| 380/380 [00:01<00:00, 213.84it/s]
European_Championship : 100%|█████████████████████████████████████████████████████████| 51/51 [00:00<00:00, 209.88it/s]
France    : 100%|███████████████████████████████████████████████████████████████████| 380/380 [00:01<00:00, 233.13it/s]
Germany   : 100%|███████████████████████████████████████████████████████████████████| 306/306 [00:01<00:00, 212.05it/s]
Italy     : 100%|███████████████████████████████████████████████████████████████████| 380/380 [00:01<00:00, 233.42it/s]
Spain     : 100%|███████████████████████████████████████████████████████████████████| 380/380 [00:01<00:00, 227.95it/s]
World_Cup : 100%|█████████████████████████████████████████████████████████████████████| 64/64 [00:00<00:00, 243.34it/s]


Unnamed: 0,match_id,event_id,period,time,team_id,team_name,player_id,player_name,event_type,sub_event_type,tags,start_x,start_y,end_x,end_y,competition_name
0,2499719,177959171,1H,2.759,1609,Arsenal,25413,A. Lacazette,Pass,Simple pass,[Accurate],50.96,34.68,32.24,14.96,England
1,2499719,177959172,1H,4.947,1609,Arsenal,370224,R. Holding,Pass,High pass,[Accurate],32.24,14.96,53.04,17.00,England
2,2499719,177959173,1H,6.542,1609,Arsenal,3319,M. Özil,Pass,Head pass,[Accurate],53.04,17.00,36.40,19.72,England
3,2499719,177959174,1H,8.143,1609,Arsenal,120339,Mohamed Elneny,Pass,Head pass,[Accurate],36.40,19.72,42.64,3.40,England
4,2499719,177959175,1H,10.302,1609,Arsenal,167145,Bellerín,Pass,Simple pass,[Accurate],42.64,3.40,74.88,8.16,England
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3273243,2058017,263885652,2H,2978.302,9598,Croatia,3476,I. Rakitić,Pass,Simple pass,[Accurate],47.84,54.40,66.56,63.92,World_Cup
3273244,2058017,263885653,2H,2979.085,9598,Croatia,14812,I. Perišić,Others on the ball,Touch,[],66.56,63.92,85.28,66.64,World_Cup
3273245,2058017,263885654,2H,2983.449,9598,Croatia,14812,I. Perišić,Pass,Cross,"[Left foot, High, Not accurate]",85.28,66.64,104.00,34.00,World_Cup
3273246,2058017,263885613,2H,2985.869,4418,France,25381,H. Lloris,Goalkeeper leaving line,Goalkeeper leaving line,[],0.00,34.00,14.56,38.76,World_Cup


##### (2) 슈팅 데이터 필터링

In [3]:
shots = events[
    (events['event_type'] == 'Shot') | (events['sub_event_type'].isin(['Free kick shot', 'Penalty']))
].reset_index(drop=True)
shots

Unnamed: 0,match_id,event_id,period,time,team_id,team_name,player_id,player_name,event_type,sub_event_type,tags,start_x,start_y,end_x,end_y,competition_name
0,2499719,177959212,1H,94.596,1609,Arsenal,25413,A. Lacazette,Shot,Shot,"[Goal, Right foot, Opportunity, Position: Goal...",91.52,40.12,104.0,34.00,England
1,2499719,177959247,1H,179.855,1631,Leicester City,26150,R. Mahrez,Shot,Shot,"[Left foot, Opportunity, Position: Out center ...",88.40,32.64,104.0,34.00,England
2,2499719,177959280,1H,254.745,1631,Leicester City,14763,S. Okazaki,Shot,Shot,"[Goal, Head/body, Opportunity, Position: Goal ...",99.84,32.64,104.0,34.00,England
3,2499719,177959289,1H,425.824,1609,Arsenal,7868,A. Oxlade-Chamberlain,Shot,Shot,"[Left foot, Opportunity, Position: Out high le...",84.24,45.56,104.0,34.00,England
4,2499719,177959429,1H,815.462,1609,Arsenal,7868,A. Oxlade-Chamberlain,Shot,Shot,"[Right foot, Opportunity, Position: Goal low l...",78.00,47.60,104.0,34.00,England
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45940,2058017,263885208,2H,1389.183,9598,Croatia,14943,M. Mandžukić,Shot,Shot,"[Goal, Right foot, Opportunity, Position: Goal...",95.68,31.96,104.0,34.00,World_Cup
45941,2058017,263885299,2H,1802.299,9598,Croatia,69409,Š. Vrsaljko,Shot,Shot,"[Right foot, Opportunity, Position: Out center...",67.60,26.52,104.0,34.00,World_Cup
45942,2058017,263885348,2H,1933.996,9598,Croatia,3476,I. Rakitić,Shot,Shot,"[Left foot, Blocked, Not accurate]",84.24,45.56,93.6,43.52,World_Cup
45943,2058017,263885485,2H,2487.443,4418,France,28115,N. Fekir,Shot,Shot,"[Left foot, Opportunity, Position: Goal center...",81.12,16.32,104.0,34.00,World_Cup


### 슈팅별 특징 추출

##### (1) 슈팅 위치 및 거리 계산

In [4]:
shot_features = pd.DataFrame(index=shots.index)
shot_features['x'] = 104 - shots['start_x']
shot_features['y'] = shots['start_y'] - 34
shot_features['distance'] = shot_features[['x', 'y']].apply(np.linalg.norm, axis=1)
shot_features

Unnamed: 0,x,y,distance
0,12.48,6.12,13.899813
1,15.60,-1.36,15.659170
2,4.16,-1.36,4.376665
3,19.76,11.56,22.893038
4,26.00,13.60,29.342120
...,...,...,...
45940,8.32,-2.04,8.566446
45941,36.40,-7.48,37.160603
45942,19.76,11.56,22.893038
45943,22.88,-17.68,28.914993


##### (2) 슈팅 각도 계산

In [5]:
x = shot_features['x']
y = shot_features['y']
goal_width = 7.32
angles = np.arctan((goal_width * x) / (x ** 2 + y ** 2 - (goal_width / 2) ** 2)) * 180 / np.pi
shot_features['angle'] = np.where(angles >= 0, angles, angles + 180)
shot_features

Unnamed: 0,x,y,distance,angle
0,12.48,6.12,13.899813,26.933236
1,15.60,-1.36,15.659170,26.224941
2,4.16,-1.36,4.376665,79.289489
3,19.76,11.56,22.893038,15.813597
4,26.00,13.60,29.342120,12.655803
...,...,...,...,...
45940,8.32,-2.04,8.566446,45.433179
45941,36.40,-7.48,37.160603,11.025434
45942,19.76,11.56,22.893038,15.813597
45943,22.88,-17.68,28.914993,11.507036


##### (3) 슈팅 유형 및 득점 여부 추출

In [6]:
shot_features['freekick'] = (shots['event_type'] == 'Free kick').astype(int)
shot_features['header'] = shots['tags'].apply(lambda x: 'Head/body' in x).astype(int)
shot_features['goal'] = shots['tags'].apply(lambda x: 'Goal' in x).astype(int)
shot_features

Unnamed: 0,x,y,distance,angle,freekick,header,goal
0,12.48,6.12,13.899813,26.933236,0,0,1
1,15.60,-1.36,15.659170,26.224941,0,0,0
2,4.16,-1.36,4.376665,79.289489,0,1,1
3,19.76,11.56,22.893038,15.813597,0,0,0
4,26.00,13.60,29.342120,12.655803,0,0,0
...,...,...,...,...,...,...,...
45940,8.32,-2.04,8.566446,45.433179,0,0,1
45941,36.40,-7.48,37.160603,11.025434,0,0,0
45942,19.76,11.56,22.893038,15.813597,0,0,0
45943,22.88,-17.68,28.914993,11.507036,0,0,0


##### (4) 슈팅 데이터 연결 및 저장

In [7]:
shots = pd.concat([shots[['competition_name'] + shots.columns[:-5].tolist()], shot_features], axis=1)
shots

Unnamed: 0,competition_name,match_id,event_id,period,time,team_id,team_name,player_id,player_name,event_type,sub_event_type,tags,x,y,distance,angle,freekick,header,goal
0,England,2499719,177959212,1H,94.596,1609,Arsenal,25413,A. Lacazette,Shot,Shot,"[Goal, Right foot, Opportunity, Position: Goal...",12.48,6.12,13.899813,26.933236,0,0,1
1,England,2499719,177959247,1H,179.855,1631,Leicester City,26150,R. Mahrez,Shot,Shot,"[Left foot, Opportunity, Position: Out center ...",15.60,-1.36,15.659170,26.224941,0,0,0
2,England,2499719,177959280,1H,254.745,1631,Leicester City,14763,S. Okazaki,Shot,Shot,"[Goal, Head/body, Opportunity, Position: Goal ...",4.16,-1.36,4.376665,79.289489,0,1,1
3,England,2499719,177959289,1H,425.824,1609,Arsenal,7868,A. Oxlade-Chamberlain,Shot,Shot,"[Left foot, Opportunity, Position: Out high le...",19.76,11.56,22.893038,15.813597,0,0,0
4,England,2499719,177959429,1H,815.462,1609,Arsenal,7868,A. Oxlade-Chamberlain,Shot,Shot,"[Right foot, Opportunity, Position: Goal low l...",26.00,13.60,29.342120,12.655803,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45940,World_Cup,2058017,263885208,2H,1389.183,9598,Croatia,14943,M. Mandžukić,Shot,Shot,"[Goal, Right foot, Opportunity, Position: Goal...",8.32,-2.04,8.566446,45.433179,0,0,1
45941,World_Cup,2058017,263885299,2H,1802.299,9598,Croatia,69409,Š. Vrsaljko,Shot,Shot,"[Right foot, Opportunity, Position: Out center...",36.40,-7.48,37.160603,11.025434,0,0,0
45942,World_Cup,2058017,263885348,2H,1933.996,9598,Croatia,3476,I. Rakitić,Shot,Shot,"[Left foot, Blocked, Not accurate]",19.76,11.56,22.893038,15.813597,0,0,0
45943,World_Cup,2058017,263885485,2H,2487.443,4418,France,28115,N. Fekir,Shot,Shot,"[Left foot, Opportunity, Position: Goal center...",22.88,-17.68,28.914993,11.507036,0,0,0


In [8]:
shots.to_pickle('data/shots.pkl')