# Wyscout Data for Inter 2017-2018

In questo notebook vengono analizzati i tiri effettuati dai calciatori dell'Inter durante la stagione 2017-2018 <br>
In particolare: <br>
1) conteggio dei tiri per ogni calciatore durante la stagione <br>
2) conteggio dei goal per ogni calciatore durante la stagione <br>
3) rank dei calciatore per percentuale realizzativa o numero di tiri per goal 

## utilities

In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import util
import warnings
warnings.simplefilter("ignore")
path = "C:/Users/Mauro/OneDrive/Documenti/Football/Inter_2017_2018"
inter_id = 3161

**COUNT OF TOTAL SHOTS FOR EACH PLAYER IN EACH MATCH**

In [2]:
inter_matches_events = pd.read_csv(os.path.join(path, "inter_matches_events.csv"))
inter_matches_events.head(3)

Unnamed: 0,eventId,eventName,eventSec,id,matchId,matchPeriod,playerId,subEventId,subEventName,teamId,...,post_high_center,post_high_left,post_high_right,through,fairplay,lost,neutral,won,accurate,not_accurate
0,8,Pass,0.924246,180460660,2575963,1H,269152,85.0,Simple pass,3176,...,no,no,no,no,no,no,no,no,yes,no
1,8,Pass,1.679327,180458825,2575963,1H,26518,85.0,Simple pass,3176,...,no,no,no,no,no,no,no,no,yes,no
2,8,Pass,2.980452,180458841,2575963,1H,20866,83.0,High pass,3176,...,no,no,no,no,no,no,no,no,yes,no


In [3]:
#filter by Inter shots (eventId == 10 and teamId == 3161) 
shots = inter_matches_events.query("eventId == 10 and teamId == " + str(inter_id))
shots.head(3)

Unnamed: 0,eventId,eventName,eventSec,id,matchId,matchPeriod,playerId,subEventId,subEventName,teamId,...,post_high_center,post_high_left,post_high_right,through,fairplay,lost,neutral,won,accurate,not_accurate
165,10,Shot,531.299507,180458918,2575963,1H,116349,100.0,Shot,3161,...,no,no,no,no,no,no,no,no,no,yes
204,10,Shot,642.466815,180458960,2575963,1H,116349,100.0,Shot,3161,...,no,no,no,no,no,no,no,no,no,yes
218,10,Shot,683.112315,180458970,2575963,1H,14812,100.0,Shot,3161,...,no,no,no,no,no,no,no,no,no,yes


In [4]:
#merge with match data and player data
inter_matches = pd.read_csv(os.path.join(path, "inter_matches.csv"))
inter_players = pd.read_csv(os.path.join(path, "inter_players.csv"))
shots = pd.merge(shots, inter_matches[["wyId", "gameweek", "label"]], left_on="matchId", right_on="wyId")
shots = pd.merge(shots, inter_players[["wyId", "shortName", "role_name"]], left_on="playerId", right_on="wyId")
shots.head(3)

Unnamed: 0,eventId,eventName,eventSec,id,matchId,matchPeriod,playerId,subEventId,subEventName,teamId,...,neutral,won,accurate,not_accurate,wyId_x,gameweek,label,wyId_y,shortName,role_name
0,10,Shot,531.299507,180458918,2575963,1H,116349,100.0,Shot,3161,...,no,no,no,yes,2575963,1,"Internazionale - Fiorentina, 3 - 0",116349,M. Vecino,Midfielder
1,10,Shot,642.466815,180458960,2575963,1H,116349,100.0,Shot,3161,...,no,no,no,yes,2575963,1,"Internazionale - Fiorentina, 3 - 0",116349,M. Vecino,Midfielder
2,10,Shot,1509.942769,182257924,2575976,2H,116349,100.0,Shot,3161,...,no,no,no,yes,2575976,2,"Roma - Internazionale, 1 - 3",116349,M. Vecino,Midfielder


In [19]:
#group by for match (matchId) and player (playerId)
total_shots = shots.groupby(["matchId", "gameweek", "label", "playerId", "shortName"]).size().reset_index()
total_shots.columns = ["matchId", "gameweek", "label", "playerId", "shortName", "#shots"]
total_shots.head(3)

Unnamed: 0,matchId,gameweek,label,playerId,shortName,#shots
0,2575963,1,"Internazionale - Fiorentina, 3 - 0",14812,I. Perišić,3
1,2575963,1,"Internazionale - Fiorentina, 3 - 0",20556,A. Candreva,2
2,2575963,1,"Internazionale - Fiorentina, 3 - 0",69968,M. Brozović,1


In [20]:
total_shots = pd.DataFrame(total_shots.groupby(["playerId", "shortName"])["#shots"].sum().sort_values(ascending=False))
total_shots

Unnamed: 0_level_0,Unnamed: 1_level_0,#shots
playerId,shortName,Unnamed: 2_level_1
14812,I. Perišić,112
206314,M. Icardi,87
20556,A. Candreva,70
69968,M. Brozović,41
116349,M. Vecino,37
20626,Eder,26
241676,R. Gagliardini,22
352993,Y. Karamoh,20
138408,M. Škriniar,19
3543,Borja Valero,17


In [18]:
goals = shots.query("goal == 'yes' and accurate == 'yes'").groupby(["matchId", "gameweek", "label", "playerId", "shortName"]).size().reset_index()
goals.columns = ["matchId", "gameweek", "label", "playerId", "shortName", "#goals"]
goals = pd.DataFrame(goals.groupby(["playerId", "shortName"])["#goals"].sum().sort_values(ascending=False))
goals

Unnamed: 0_level_0,Unnamed: 1_level_0,#goals
playerId,shortName,Unnamed: 2_level_1
206314,M. Icardi,23
14812,I. Perišić,11
138408,M. Škriniar,4
116349,M. Vecino,3
69968,M. Brozović,3
20626,Eder,3
21094,D. D'Ambrosio,2
20519,A. Ranocchia,2
3543,Borja Valero,2
3344,Rafinha,2


**CALCULATE SHOT PER GOAL AND SHOTS TO GOAL PERCENTAGE (MIN 10 SHOTS)**

In [33]:
df = pd.merge(total_shots, goals, left_index=True, right_index=True)
df = df[df["#shots"] > 10]
df["shots_per_goal"] = round(df["#shots"] / df["#goals"], 2)
df["shots_to_goal_percentage"] = round(df["#goals"] / df["#shots"] * 100, 2)
df.sort_values(by="shots_to_goal_percentage", ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,#shots,#goals,shots_per_goal,shots_to_goal_percentage
playerId,shortName,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
206314,M. Icardi,87,23,3.78,26.44
138408,M. Škriniar,19,4,4.75,21.05
21094,D. D'Ambrosio,15,2,7.5,13.33
3344,Rafinha,16,2,8.0,12.5
3543,Borja Valero,17,2,8.5,11.76
20626,Eder,26,3,8.67,11.54
14812,I. Perišić,112,11,10.18,9.82
116349,M. Vecino,37,3,12.33,8.11
69968,M. Brozović,41,3,13.67,7.32
352993,Y. Karamoh,20,1,20.0,5.0
