In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mplsoccer import PyPizza, FontManager
from scipy import stats
import ast
pd.options.mode.chained_assignment = None 
import json
import pickle

### Radar Plots

Starting with radar plots to scout players.

I'll use variables from wyscout: assists, key passes, smart passes, aerial duels won, ground attacking duels won

And variables created by me: non penalty xg, passes in final third and receptions in final third

In [2]:
class Summary:
    def __init__(self, country):
        self.base_df = pd.read_csv(f"data_wyscout/{country}_17_18.csv")
        self.evs =  self.base_df[self.base_df.positions.apply(lambda x: len(ast.literal_eval(x)) == 2)]
        self.matches = pd.read_csv(f"data_wyscout/matches_{country}_17_18.csv")
        
    def calculate_angle(self, x, y, w = 7.32):
        up = w*x
        bottom = x**2 + y**2 - (w/2)**2

        tan_ang = up/bottom

        ang = np.arctan(tan_ang)
        if ang <= 0:
            ang = ang+np.pi

        return ang

    def xg_calc(self):
        full_df = self.evs
        with open('models/xg_headers.pkl', 'rb') as handle:
            da_lr_headers = pickle.load(handle)

        with open('models/xg_shots.pkl', 'rb') as handle:
            da_lr_shots = pickle.load(handle)

        with open('models/sc_headers.pkl', 'rb') as handle:
            sc_headers = pickle.load(handle)

        with open('models/sc_shots.pkl', 'rb') as handle:
            sc_shots = pickle.load(handle)
            
        df = full_df[full_df["eventName"] == "Shot"]

        df["loc_x"] = df.positions.apply(lambda x: (100 - ast.literal_eval(x)[0]["x"])* (105/100)) #100 - x = distance to goal. it helps to calc goal dist
        df["loc_y"] = df.positions.apply(lambda x: ast.literal_eval(x)[0]["y"] * 68/100)
        df["c"] = df.positions.apply(lambda x: abs(ast.literal_eval(x)[0]["y"] - 50) * 68/100) #distance to the center of the goal (50)

        df["dist"] = df.apply(lambda x: np.sqrt(x["loc_x"]**2 + x["c"]**2), axis=1) #pytaghoras

        df["angle"] = df.apply(lambda x: self.calculate_angle(x["loc_x"], x["c"]), axis=1)

        headers = df[df.tags.apply(lambda x: True in [True if (i["id"] == 403) else False for i in ast.literal_eval(x)])]
        shots = df.drop(headers.index)

        headers["xG"] = da_lr_headers.predict_proba(sc_headers.transform(headers[["dist", "angle"]]))[:,1]

        shots["xG"] = da_lr_shots.predict_proba(sc_shots.transform(shots[["dist", "angle"]]))[:,1]

        xg_full = pd.concat([shots, headers]).groupby("playerId").sum()["xG"].reset_index()
        return xg_full
    
    

    def FinalThirdPasses(self):
        final_third = 100/3
        aux = self.evs
        
        aux["matchPeriod"] = aux.matchPeriod.apply(lambda x: int (x[0]))
        
        aux["loc_x"] = aux.positions.apply(lambda x: (100 - ast.literal_eval(x)[0]["x"])* (105/100)) #100 - x = distance to goal. it helps to calc goal dist
        aux["loc_y"] = aux.positions.apply(lambda x: ast.literal_eval(x)[0]["y"] * 68/100)

        aux["end_loc_x"] = aux.positions.apply(lambda x: (100 - ast.literal_eval(x)[1]["x"])* (105/100)) #100 - x = distance to goal. it helps to calc goal dist
        aux["end_loc_y"] = aux.positions.apply(lambda x: ast.literal_eval(x)[1]["y"] * 68/100)
        
        matches = aux.matchId.drop_duplicates()
        full_df_pass = pd.DataFrame()

        for m in matches:
            df_m = aux[aux["matchId"] == m]
            #pass_maker = []
            #pass_receiver = []


            #periods = df_m.matchPeriod.drop_duplicates()

            #for p in periods:
            pass_maker = []
            pass_receiver = []

            df_p = df_m.sort_values(["matchPeriod", "eventSec"])

            df_p["nextPlayer"] = df_p["playerId"].shift(-1)

            df_pass = df_p[df_p["eventName"] == "Pass"]
            
            df_ft = df_pass[df_pass["end_loc_x"] <= final_third]

            #print(len(df_pass))

            for i, vals in df_ft.iterrows():

                tags = vals.tags
                #print(tags)
                #loc_rec = vals.end_loc_x#.values[0]
                kind = vals.eventName#.values[0]
                player = vals.playerId#.values[0]
                receiver = vals.nextPlayer#.values[0]


                #if loc_rec <= final_third:
                    #print(tags)
                if "{'id': 1801}" in tags:#True in [1801 == x["id"] for x in ast.literal_eval(tags)]:
                    #print(receiver)
                    pass_maker.append(player)
                    pass_receiver.append(receiver)
                else:
                    pass_maker.append("")
                    pass_receiver.append("")
                #else:
                #pass_maker.append("")
                 #   pass_receiver.append("")

            #print(len(pass_maker))
            df_ft["pass_maker"] = pass_maker
            df_ft["pass_receiver"] = pass_receiver
            full_df_pass = full_df_pass.append(df_ft)

        ft = full_df_pass[full_df_pass["pass_maker"] != ""]
        ft_recs = full_df_pass[full_df_pass["pass_receiver"] != ""]

        makers = ft.groupby("pass_maker").end_loc_x.count().reset_index().sort_values("end_loc_x").rename(columns = {"end_loc_x": "passes", "pass_maker": "playerId"})

        recs = ft_recs.groupby("pass_receiver").end_loc_x.count().reset_index().sort_values("end_loc_x").rename(columns = {"end_loc_x": "recs", "pass_receiver": "playerId"})

        pass_ft = pd.merge(makers, recs).fillna(0)

        return pass_ft
    
    def duels(self):
    
        ad = self.evs[self.evs["subEventName"] == "Air duel"]

        won_air_duels = ad[ad.tags.apply(lambda x: "{'id': 703}" in x)].groupby("playerId").end_loc_x.count().reset_index().sort_values("end_loc_x").rename(columns = {"end_loc_x": "won air duels"})

        gd = self.evs[self.evs["subEventName"] == "Ground attacking duel"]

        won_atk_duels = gd[gd.tags.apply(lambda x: "{'id': 703}" in x)].groupby("playerId").end_loc_x.count().reset_index().sort_values("end_loc_x").rename(columns = {"end_loc_x": "won atk ground duels"})

        duels = pd.merge(won_atk_duels, won_air_duels, how="outer").fillna(0)

        return duels

    def majorStats(self):
        df = self.evs
        shots = df.loc[df["subEventName"] == "Shot"]

        goals = shots[shots.tags.apply(lambda x: "{'id': 101}" in x)].groupby("playerId").end_loc_x.count().reset_index().sort_values("end_loc_x").rename(columns = {"end_loc_x": "goals"})

        passes = df.loc[df["eventName"] == "Pass"]
        assists = passes[passes.tags.apply(lambda x: "{'id': 301}" in x)].groupby("playerId").end_loc_x.count().reset_index().sort_values("end_loc_x").rename(columns = {"end_loc_x": "assists"})
        key_passes = passes[passes.tags.apply(lambda x: "{'id': 302}" in x)].groupby("playerId").end_loc_x.count().reset_index().sort_values("end_loc_x").rename(columns = {"end_loc_x": "key_passes"})

        pass_anl = pd.merge(assists, key_passes, how="outer").fillna(0)

        full_anl = pd.merge(goals, pass_anl, how="outer").fillna(0)

        return full_anl
    
    def minutes_played(self, df):
        ms = df.wyId.values

        #print(ms)
        player = []
        for m in ms:
            df_m = df[df["wyId"] == m]

            team_data = ast.literal_eval(df_m.teamsData.values[0])


            for team in team_data.keys():
                if team_data[team]["formation"]["substitutions"] != "null":
                    out_players = [a["playerOut"] for a in team_data[team]["formation"]["substitutions"]]
                    in_players = [a["playerIn"] for a in team_data[team]["formation"]["substitutions"]]
                    minute = [a["minute"] for a in team_data[team]["formation"]["substitutions"]]

                    for p in in_players:
                        time_played = minute[in_players.index(p)]
                        if time_played > 90:
                            player.append([p, 1])
                        else:
                            player.append([p, 90 - time_played])

                starting_lineup = team_data[team]["formation"]["lineup"]

                for s in starting_lineup:
                    p = s["playerId"]
                    if p in out_players:
                        time_played = minute[out_players.index(p)]
                        if time_played > 90:
                            player.append([s["playerId"], 90])
                        else:
                            player.append([s["playerId"], time_played])
                    else:
                        player.append([s["playerId"], 90])



        return player

    
    def createSummary(self, position, minutes):
        print("calculating xG")
        xg = self.xg_calc()
        print("calculating pass into Final Third")
        pass_ft = self.FinalThirdPasses()
        print("calculating duels")
        duels = self.duels()
        print("calculating major stats")
        ms = self.majorStats()
        
        print("making summary")
        summary = pd.merge(xg, pass_ft, how="outer").fillna(0)
        summary = pd.merge(summary, duels, how="outer").fillna(0)
        summary = pd.merge(summary, ms, how="outer").fillna(0)
        
        summ_name = pd.merge(summary, self.evs[["playerId", "shortName", "roleName"]].drop_duplicates(), left_on="playerId", right_on="playerId", how="left")
        z = self.minutes_played(self.matches)
        
        minP = pd.DataFrame(z, columns=["playerId", "MinutesPlayed"])

        mp = minP.groupby("playerId").sum().reset_index()

        summ_mp = pd.merge(summ_name, mp, left_on="playerId", right_on="playerId", how="left")

        summ_adj = summ_mp.copy()
        for col in summ_mp.columns[1:-3].values:
            summ_adj[col] = summ_mp.apply(lambda x: x[col]*90/x["MinutesPlayed"], axis=1)
        
        summ_pos = summ_adj[(summ_adj["roleName"] == position) & (summ_adj["MinutesPlayed"] >= minutes)]
        return summ_pos

In [3]:
s = Summary("England")

summ = s.createSummary("Forward", 400)

calculating xG
calculating pass into Final Third
calculating duels
calculating major stats
making summary


In [4]:
summ[summ["shortName"] == "Gabriel Jesus"]

Unnamed: 0,playerId,xG,passes,recs,won atk ground duels,won air duels,goals,assists,key_passes,shortName,roleName,MinutesPlayed
401,340386,0.552077,9.700599,14.550898,3.071856,1.077844,0.700599,0.107784,0.323353,Gabriel Jesus,Forward,1670


In [5]:
summ

Unnamed: 0,playerId,xG,passes,recs,won atk ground duels,won air duels,goals,assists,key_passes,shortName,roleName,MinutesPlayed
23,471,0.226927,4.474432,7.414773,4.730114,6.136364,0.255682,0.000000,0.127841,W. Bony,Forward,704
27,3324,0.452667,7.746615,13.534816,7.398453,2.088975,0.478723,0.174081,0.261122,Álvaro Morata,Forward,2068
28,3326,0.133727,5.792079,10.693069,4.900990,0.000000,0.148515,0.148515,0.445545,Jesé Rodríguez,Forward,606
29,3327,0.320795,6.031469,8.024476,2.622378,7.027972,0.209790,0.052448,0.209790,Joselu,Forward,1716
30,3348,0.148530,9.187935,11.693735,6.473318,0.000000,0.208817,0.000000,0.208817,Deulofeu,Forward,431
...,...,...,...,...,...,...,...,...,...,...,...,...
393,293687,0.260157,5.522946,11.670224,3.601921,7.107791,0.192102,0.192102,0.192102,D. Calvert-Lewin,Forward,1874
401,340386,0.552077,9.700599,14.550898,3.071856,1.077844,0.700599,0.107784,0.323353,Gabriel Jesus,Forward,1670
402,343951,0.338379,3.407105,7.181130,4.874782,2.725684,0.262085,0.052417,0.157251,T. Abraham,Forward,1717
410,377071,0.330892,7.136283,13.762832,7.773451,4.141593,0.159292,0.127434,0.159292,Richarlison,Forward,2825


In [6]:
summ.to_csv("summaries/summ_en_fwd_17_18.csv", index=False)

In [7]:
s = Summary("Spain")

summ = s.createSummary("Forward", 400)

calculating xG
calculating pass into Final Third
calculating duels
calculating major stats
making summary


In [8]:
summ.to_csv("summaries/summ_es_fwd_17_18.csv", index=False)

In [9]:
summ

Unnamed: 0,playerId,xG,passes,recs,won atk ground duels,won air duels,goals,assists,key_passes,shortName,roleName,MinutesPlayed
0,151,0.195644,8.737864,10.149365,3.226288,2.218073,0.201643,0.201643,0.268857,J. Guidetti,Forward,1339
3,1751,0.133740,12.700348,15.522648,6.271777,0.000000,0.000000,0.000000,0.313589,O. Tannane,Forward,574
11,3290,0.257270,5.496503,9.062937,2.937063,4.825175,0.251748,0.041958,0.083916,Aduriz,Forward,2145
12,3291,0.339999,7.521614,11.930836,2.723343,0.648415,0.518732,0.389049,0.389049,Paco Alcácer,Forward,694
13,3293,0.107930,9.159520,12.144082,5.660377,1.234991,0.051458,0.102916,0.411664,P. Piatti,Forward,1749
...,...,...,...,...,...,...,...,...,...,...,...,...
434,393881,0.035466,8.155340,11.941748,9.029126,0.000000,0.000000,0.145631,0.436893,H. Toledo,Forward,618
436,395636,0.369103,4.505852,7.636541,3.745124,2.604031,0.526658,0.058518,0.204811,Maxi Gómez,Forward,3076
445,424624,0.046086,8.215461,5.106908,5.773026,1.332237,0.000000,0.000000,0.074013,O. Etebo,Forward,1216
447,447821,0.218829,4.003759,10.093985,3.890977,5.018797,0.225564,0.000000,0.169173,Y. En-Nesyri,Forward,1596
