<a href="https://colab.research.google.com/github/sushant1985/CV/blob/main/Take_the_ball%2C_Pass_the_ball.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#This notebook will explore passing motifs A->B->A or A->B->C involving two (A,B) or three (A,B,C)players taking part in quick pass exchanges. Both passes need to be successful to be even considered. The time difference between B receiving the ball and releasing it back should be less than 2 seconds(feel free to play around with this cutoff). We are exploring everything from the point of view of player B - his abilities to combine quickly by receiving from and passing to other players. An A->B->A motif will be something like a wall pass. A->B->C motifs are more general. In particular, we will be looking at plotting out heatmaps using the wonderful mplsoccer package, and also look at some clustering techniques to group similar-looking seuences. The data comes from Statsbomb's Open Data repository. Run all the cells sequentially everytime. The codes have been hidden to give this the look of an "explorable" automated app, but feel free to click on SHOW CODE at every cell to explore the code. If you notice anything off/incorrect/inefficient, please reach out to me at @Soumyaj15209314 on Twitter. If you haven't explored google colab notebooks before, to run each cell, click on the small black disc with a triangle inside, right next to where it says SHOW CODE. Wait for the cell to run completely before moving on to the next one.

#Let's start by importing a whole bunch of stuff

In [None]:
#@title

!pip install socceraction --q
%load_ext autoreload
%autoreload 2
import os;
import warnings
import pandas as pd
pd.set_option('display.max_columns', None)
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)
import tqdm
import socceraction.spadl as spadl
import socceraction.spadl.statsbomb as statsbomb
import requests
import json
from pandas import json_normalize

import numpy as np
import matplotlib.pyplot as plt
from bs4 import BeautifulSoup
import requests
import pandas as pd
import json
import matplotlib as mpl
!pip install gdown --quiet
import ipywidgets as widgets
import gdown
gdown.download('https://drive.google.com/uc?id=13ztlAubCFZN6TT3k6KFbgxuRWJSMdkaD',
               'slabo.ttf',quiet=True)
import matplotlib.font_manager as fm
fm.fontManager.ttflist += fm.createFontList(['slabo.ttf'])
mpl.rc('font', family='Slabo 27px')
!pip install mplsoccer==0.0.21 --q
from mplsoccer.pitch import Pitch, add_image
import math
import matplotlib.image as mpimg
import matplotlib.patheffects as path_effects
import time
from PIL import Image
from urllib.request import urlopen
from matplotlib.collections import PatchCollection
from matplotlib import rcParams
mpl.rcParams['font.family'] = 'Slabo 27px'
anotherfont = 'Oxygen'
mpl.rcParams['figure.facecolor'] = '#082630'
mpl.rcParams['axes.facecolor'] = '#082630'
mpl.rcParams['axes.labelcolor'] = '#edece9'
mpl.rcParams['xtick.color'] = '#edece9'
mpl.rcParams['ytick.color'] = '#edece9'
mpl.rcParams['text.color'] = '#edece9' 
!pip install highlight_text==0.0.5 --q
from highlight_text import ax_text, fig_text  
scattercolor='grey'
from matplotlib import cm
from matplotlib.colors import ListedColormap, LinearSegmentedColormap
import ipywidgets as widgets
from ipywidgets import interact, interactive, fixed, interact_manual

from sklearn.cluster import KMeans 
from sklearn import metrics 
from scipy.spatial.distance import cdist
from sklearn.preprocessing import StandardScaler
from sklearn.manifold import TSNE
from sklearn.cluster import DBSCAN
from sklearn.cluster import OPTICS
from scipy.spatial import distance
from sklearn.cluster import AffinityPropagation
!pip install fastdtw --q
from fastdtw import fastdtw 
import pylab
import scipy.spatial.distance as sd
from numpy import (array, dot, arccos, clip)
from numpy.linalg import norm
from scipy.ndimage import gaussian_filter
from scipy.interpolate import RegularGridInterpolator

The createFontList function was deprecated in Matplotlib 3.2 and will be removed two minor releases later. Use FontManager.addfont instead.


#Two functions to read season data and match data, given season id and match id

In [None]:
#@title

def load_file(season_id, getter="remote", path = None):
    resp = requests.get(f"https://raw.githubusercontent.com/statsbomb/open-data/master/data/matches/11/{season_id}.json")
    season_dict = json.loads(resp.text)
    season_df = json_normalize(season_dict, sep="_")
    return season_dict, season_df
def load_match_file(match_id, getter="remote", path = None):
    resp = requests.get(f"https://raw.githubusercontent.com/statsbomb/open-data/master/data/events/{match_id}.json")
    match_dict = json.loads(resp.text)
    df = json_normalize(match_dict, sep="_")
    return match_dict, df

#Select the season

In [None]:
#@title

free_open_data_remote = "https://raw.githubusercontent.com/statsbomb/open-data/master/data/"
SBL = statsbomb.StatsBombLoader(root=free_open_data_remote, getter="remote")
competitions = SBL.competitions()
selected_competitions = competitions[competitions.competition_name=="La Liga"]
compdict = dict(zip(selected_competitions.season_name, selected_competitions.season_id))
complist = list(selected_competitions.season_name)
compselect = widgets.Dropdown(
      options=complist,
      value=complist[0],
      rows=5,
      description='Choose Season',
      layout={'width':'max-content'},
      style={'description_width': 'initial'},
      disabled=False
  )
compselect

Dropdown(description='Choose Season', layout=Layout(width='max-content'), options=('2019/2020', '2018/2019', '…

#Read in all the matches from the selected season; do some pre-processing, extract quick release passes - takes around 30-40 seconds. The quick release dataframe has start and end coordinates (x1,y1) and (x2,y2) of the first pass, and similarly start and end coordinates (x3,y3) and (x4,y4) of the second pass. x2 and x3 are typically pretty close to each other; similarly y2 and y3. We also add some more coordinate related columns - the averages of x2 and x3, the averages of y2 and y3, the angles between the vectors (x1,y1)->(x_avg,y_avg) and (x_avg,y_avg)->(x4,y4), and their respective lengths.

In [None]:
#@title
%%time

#Read match_id list corresponding to chosen season, loop over all match_ids and create a master dataframe mdf

selected_season = str(compdict[compselect.value])
season_dict, season_df = load_file(selected_season, getter="remote")    
match_id_list = season_df.match_id.tolist()
matches = [] 
for i in range(len(match_id_list)):
    match_dict, df = load_match_file(match_id_list[i], getter="remote")
    df['match_id'] = match_id_list[i]
    df['season_id'] = selected_season
    matches.append(df)
mdf = pd.concat(matches)

#Now select only Barcelona's actions and drop everything whose location is NaN

masterdf = mdf[(mdf.team_name=='Barcelona')&(mdf.location.notna())].reset_index(drop=True)

#Create a time column - units are seconds

masterdf['time'] = masterdf['minute']*60 + masterdf['second']

#Extract the time of the next action if its a ball receipt, else fill with -1 

masterdf['receive_time'] = np.where(masterdf.type_name.shift(-1)=='Ball Receipt*',masterdf.time.shift(-1),-1)

#Now drop all the ball receipts; so that next on-ball actions (Carry, next Pass etc) become contiguous

masterdf = masterdf[masterdf.type_name!='Ball Receipt*'].reset_index(drop=True)

#Extract the time of the next event, and calculate the difference between that and the receive time

masterdf['next_time'] = masterdf['time'].shift(-1)
masterdf['dT'] = np.where(masterdf.receive_time!=-1,masterdf['next_time'] - masterdf['receive_time'],-10000)

#Statsbomb successful passes aren't given any outcome names, 
#So the json to dataframe step assigns NaNs to successful pass outcomes
#Binarize the pass outcomes - succesful ==1, otherwise 0

masterdf['pass_outcome_name'] = np.where(masterdf.pass_outcome_name.notna(),0,1)

#Extract the next location, next end location, action, player name, pass recipient name and next pass outcome
#Also calculate the difference between the possession ids of the action and the next action

masterdf['next_location'] = masterdf['location'].shift(-1)
masterdf['next_end_location'] = masterdf['pass_end_location'].shift(-1)                                                     
masterdf['Poss_seq_diff'] = masterdf.possession.diff(-1).fillna(value=0)
masterdf['next_action'] = masterdf.type_name.shift(-1)
masterdf['next_player'] = masterdf.player_name.shift(-1)
masterdf['next_recipient'] = masterdf.pass_recipient_name.shift(-1)
masterdf['next_outcome'] = masterdf.pass_outcome_name.shift(-1)

# Here is how the quick release function is defined 
# Same possession sequence, not too much time elapsed, both actions are successful passes
# and the player who receives the first pass makes the second pass
# Some of the conditions are redundant and can potentially be dropped, but meh 
# Returns binary identifier for quick release

def quickrelease(args):
    dT,Poss_seq_diff,type_name,next_action,pass_recipient_name,next_player,pass_outcome_name,next_outcome = args
    cond1 = Poss_seq_diff == 0                    #  Same sequence
    cond2 = (dT>=0.0) & (dT<=2.0)                    #  Less than 2.0 seconds between passes
    cond3 = type_name=='Pass'                     #  First action is a pass 
    cond4 = next_action == 'Pass'                 #  Second action is a pass 
    cond5 = pass_outcome_name == 1                #  First pass is successful 
    cond6 = next_outcome == 1                     #  Second pass is successful 
    cond7 = pass_recipient_name == next_player    #   Recipient of the first pass plays the second pass

    if(cond1 & cond2 & cond3 & cond4 & cond5 & cond6 & cond7):
        return 1
    else:
        return 0
masterdf['QuickRelease'] = masterdf[['dT','Poss_seq_diff','type_name','next_action',
                                     'pass_recipient_name','next_player','pass_outcome_name',
                                     'next_outcome']].apply(quickrelease,axis=1)

# make dataframe with only quick releases, split all locations and end locations to corresponding x and y values

QR = masterdf[masterdf.QuickRelease==1].reset_index(drop=True)

# Add some extra columns, like x_avg = average of x2, and x3, y_avg = average of y2 and y3,
# angles between the angles between the vectors (x1,y1)->(x_avg,y_avg) and (x_avg,y_avg)->(x4,y4)
#, and their respective lengths.  

QR[['x1','y1']] = np.array(QR.location.tolist())
QR[['x2','y2']] = np.array(QR.pass_end_location.tolist())
QR[['x3','y3']] = np.array(QR.next_location.tolist()) 
QR[['x4','y4']] = np.array(QR.next_end_location.tolist())    
QR = QR[['player_name','pass_recipient_name','next_recipient','x1','y1','x2','y2','x3','y3','x4','y4']]

QR['x_avg'] = (QR['x2'] + QR['x3'])/2.0
QR['y_avg'] = (QR['y2'] + QR['y3'])/2.0

QR['delta_x_1'] = QR['x_avg']-QR['x1']
QR['delta_y_1'] = QR['y_avg']-QR['y1']
QR['delta_x_2'] = QR['x4']-QR['x_avg']
QR['delta_y_2'] = QR['y4']-QR['y_avg']

def angle_bw_vectors(coords):
    vector_1 = [coords[0],coords[1]]
    vector_2 = [coords[2],coords[3]]
    if ((np.linalg.norm(vector_1)==0)or(np.linalg.norm(vector_2))==0):
        return 0.0
    unit_vector_1 = vector_1 / np.linalg.norm(vector_1)
    unit_vector_2 = vector_2 / np.linalg.norm(vector_2)
    dot_product = np.dot(unit_vector_1, unit_vector_2)
    angle = np.arccos(clip(dot_product,-1,1))
    return angle

QR['angle'] = QR[['delta_x_1','delta_y_1','delta_x_2','delta_y_2']].apply(angle_bw_vectors,axis=1)
QR['len1'] = np.sqrt(QR['delta_x_1']**2 + QR['delta_y_1']**2)
QR['len2'] = np.sqrt(QR['delta_x_2']**2 + QR['delta_y_2']**2)

CPU times: user 29.5 s, sys: 659 ms, total: 30.1 s
Wall time: 39.9 s


#Check how the quick release dataframe looks like

In [None]:
QR.head()

Unnamed: 0,player_name,pass_recipient_name,next_recipient,x1,y1,x2,y2,x3,y3,x4,y4,x_avg,y_avg,delta_x_1,delta_y_1,delta_x_2,delta_y_2,angle,len1,len2
0,Luis Alberto Suárez Díaz,Sergio Busquets i Burgos,Clément Lenglet,60.0,40.0,43.2,40.7,43.8,40.4,36.4,31.4,43.5,40.55,-16.5,0.55,-7.1,-9.15,0.94421,16.509164,11.581559
1,Sergio Busquets i Burgos,Ricard Puig Martí,Jordi Alba Ramos,28.5,39.8,63.3,23.1,63.1,23.1,49.2,2.7,63.2,23.1,34.7,-16.7,-14.0,-20.4,1.723701,38.509479,24.741867
2,Ronald Federico Araújo da Silva,Sergio Busquets i Burgos,Jordi Alba Ramos,72.1,51.5,63.3,39.8,63.3,39.8,63.7,0.6,63.3,39.8,-8.8,-11.7,0.4,-39.2,0.655071,14.640014,39.202041
3,Ronald Federico Araújo da Silva,Sergi Roberto Carnicer,Ronald Federico Araújo da Silva,31.3,61.6,63.1,75.9,63.3,72.5,39.6,54.8,63.2,74.2,31.9,12.6,-23.6,-19.4,2.829735,34.298251,30.550286
4,Sergi Roberto Carnicer,Ronald Federico Araújo da Silva,Clément Lenglet,63.3,72.5,39.6,54.8,41.5,55.2,39.0,16.1,40.55,55.0,-22.75,-17.5,-1.55,-38.9,0.875276,28.702134,38.930868


#To begin, let's plot out a heatmap. The heatmap shows the location where the player B receives the first pass (x2,y2) and makes the second pass from (x3,y3). As you can see above, the two locations might not be exactly identical, so we use their average location given by (x_avg,y_avg). Select different players from the dropdown to explore their heatmaps.

In [None]:
#@title

def heatmapplotter(playername):
    playerdf = QR[QR.pass_recipient_name==playername].reset_index(drop=True)
    pitch = Pitch(pitch_type='statsbomb', figsize=(8, 12), pitch_color='#f7e9ec',orientation='vertical',
            stripe=False, line_zorder=2, view='full',line_color='#626060',tight_layout=False,
            constrained_layout=True)
    fig,ax=pitch.draw()
    from matplotlib.colors import ListedColormap, LinearSegmentedColormap
    cmaplist = ['#f7e9ec','#eec8d2','#810323']
    cmap = LinearSegmentedColormap.from_list("", cmaplist)
    fig.set_facecolor('#f7e9ec')

    bin_statistic = pitch.bin_statistic(playerdf.x_avg, playerdf.y_avg, bins=(38,25))
    bin_statistic['statistic'] = gaussian_filter(bin_statistic['statistic'], 1)
    pitch.heatmap(bin_statistic, ax=ax, cmap=cmap, edgecolors=None,vmin=bin_statistic['statistic'].min())
    ax.set_title(playername+'\n'+ 'Quick Releases Heatmap'+'\n'+
                    'Season - '+compselect.value,
                    fontsize=20,c='k')
    fig.text(0.1, 0.0, "Created by Soumyajit Bose / @Soumyaj15209314",fontstyle="italic",
             fontsize=15,color='k')
    
int1 = interactive(heatmapplotter, playername = QR.pass_recipient_name.unique().tolist())
int1

interactive(children=(Dropdown(description='playername', options=('Sergio Busquets i Burgos', 'Ricard Puig Mar…

#Now we start the real bit - clustering.
#(i) First, choose some or all of these features. By default, x1, y1, x_avg,y_avg,x4,y4,angle,len1 and len2 are selected. Remember, there are no apriori right or wrong answers as to which features should be selected. Try out various combinations. Also select the clustering technique you want to explore. There are three presented here so far - K-Means, OPTICS and Affinity Propagation.
#(ii) Mckay Johns has introduced k-means in his wonderful youtube videos(check out the link here : https://www.youtube.com/watch?v=rXHChgdXc0M&list=PL10a1_q15HwqVEcnqt3tXs1bgvawjsQNW&index=13. Also check out his work and visualizations on twitter @mckayjohns). If you choose K-means, first use the K-Means Elbow Check to get a rough idea of where the "Elbow" lies (for more details, read about the Elbow method). Then select K-Means plot and set Nc = number of clusters suggested by the elbow of the elbow plot (too many elbows gaaah). If Nc>5, only the top 5 combinations will be shown. Now, what you exactly are you doing in this elbow plot ? You are plotting something called "inertia" - sum of squared distances of samples to their closest cluster center. The more the clusters, the value of inertia decreases. At the point where the decrease rate becomes roughly linear, the "elbow" lies. Also, let me take this opportunity to express my personal dissatisfaction with this elbow technique - it's not great for complicated data like this. Keep in mind, inertia is not the only quantity you can check, there are other metrics you can evaluate as well, but none of them really work well for me.
#(iii) Piotr Wawrzynów discussed the OPTICS clustering when talking about similar passing chains. Do check out his wonderful work that he keeps posting on twitter (@pwawrzynow). In OPTICS clustering, we are going to play around with two parameters, min sample and xi. Note that OPTICS clustering does not try to put every single sample into some cluster, in sharp contrast to K-Means. Any element that is not put into a cluster gets a cluster label of -1. When using this clustering, try to minimize the number of unclustered samples and at the same time, make sure that there are not too many small clusters with 2 or 3 elements only. Hitting that balance might be hard to do for some players, so go for something that looks decent(I guess?)
#(iv)Affinity Propagation with DTW distance measure was used by Kuba Michalczyk (@KubaMichalczyk) in his wonderful article for OPTA where he tried to cluster entire sequences. Now, DTW is an overkill for something like this,so we are going to just use affinity propagation with default affinity measure. This is the only one technique we are treating here is parameter-less - that's not quite true as you can supply different pre-computed affinity measures, and play around with damping etc. Let's skip that for now. Note that this technique will try to put everything in some cluster. We once again plot out the top 5 cluster representatives.
#(v) As mentioned before, only top 5 clusters are displayed if number of clusters formed is greater than 5. Only the median representatives of each cluster are shown.
#(vi)For more details about the techniques themselves, please read up on your own. This notebook purely focuses on the application side of clustering.   

In [None]:
#@title
feature_selection = widgets.SelectMultiple(
    options=['x1','y1','x2','y2','x3','y3','x4','y4','x_avg','y_avg','delta_x_1','delta_y_1',
             'delta_x_2','delta_y_2','angle','len1','len2'],
    value=['x1','y1','x_avg','y_avg','x4','y4','len1','len2','angle'],
    rows=5,
    description='Choose features to do clustering',layout={'width':'max-content'},
    style={'description_width': 'initial'},
    disabled=False
)

technique = widgets.Dropdown(
    options=['K-Means Elbow Check','K-Means Plot','OPTICS','Affinity Propagation'],
    value='K-Means Elbow Check',
    description='Technique',
    disabled=False,
)

def kmeans_elbowcheck(dataframe,features,player):
    df_cluster = dataframe[dataframe.pass_recipient_name==player].reset_index(drop=True)
    seriesXY = np.array(df_cluster[features])
    scaler = StandardScaler()
    seriesXY = scaler.fit_transform(seriesXY)    
    Nmax = int(len(df_cluster)/4)
    inertia_scores = []
    # silhouette_scores = []
    # CH_scores = []
    # DB_scores = []
    for nc in range(2,Nmax):
        km = KMeans(n_clusters=nc, init='random', random_state=42)
        model = km.fit(seriesXY)
        y_hat = km.predict(seriesXY)
        labels = km.labels_
        inertia_scores.append(km.inertia_)
        # silhouette_scores.append(metrics.silhouette_score(seriesXY, labels, metric = 'euclidean'))
        # CH_scores.append(metrics.calinski_harabasz_score(seriesXY, labels))
        # DB_scores.append(metrics.davies_bouldin_score(seriesXY, labels))
    
    fig,ax = plt.subplots(figsize=(10,6))
    ax.plot(range(2,Nmax),inertia_scores,color='gold')
    ax.set_title('Clustering '+player+'\n'+'Inertia scores',fontsize=25)
    for s in ['top','bottom','left','right']:
        ax.spines[s].set_color('w')
    ax.set_xlabel('Number of clusters',fontsize=15)
    fig.text(0.1, 0.0, "Created by Soumyajit Bose / @Soumyaj15209314",fontstyle="italic",
             fontsize=15,color='#edece9')
    return fig

def kmeans_clusterplot(dataframe,features,player,Nc):
    df_cluster = dataframe[dataframe.pass_recipient_name==player].reset_index(drop=True)
    seriesXY = np.array(df_cluster[features])
    scaler = StandardScaler()
    seriesXY = scaler.fit_transform(seriesXY)
    km = KMeans(n_clusters=Nc, init='random', random_state=42)
    y_km = km.fit_predict(seriesXY)
    df_cluster['Cluster'] = y_km
    inversed = scaler.inverse_transform(km.cluster_centers_)
    cluster_groups = df_cluster.groupby('Cluster').count().reset_index().sort_values(by='x1',ascending=False)
    label_list = cluster_groups.Cluster.unique().tolist()
    if len(label_list)>5:
        label_list = label_list[:5]

    colordict = {0:'#a50026',1:'#d73027',2:'#f46d43',3:'#fdae61',4:'#fee090',
                 5:'#e0f3f8',6:'#abd9e9',7:'#74add1',8:'#4575b4',9:'#313695'}    

    pitch = Pitch(pitch_type='statsbomb', figsize=(12, 8), pitch_color='#082630',orientation='horizontal',
            stripe=False, line_zorder=2, view='full',line_color='gray',tight_layout=False,
            constrained_layout=True)
    fig,ax = pitch.draw()

    for i,labs in enumerate(label_list):
        # print(i,labs,inversed[labs][:6])
        X1,Y1,X2,Y2,X3,Y3 = inversed[labs][:6]


        pitch.arrows(X1, Y1, X2, Y2, width=2,
             headwidth=6, headlength=5, color=colordict[i], ax=ax,zorder=3)
    
        pitch.arrows(X2, Y2, X3, Y3, width=2,
             headwidth=6, headlength=5,
             color=colordict[i], ax=ax,zorder=3)
        
    ax.set_title('Top '+str(i+1)+' quick pass combo clusters for \n'+player+'\n'+'Season - '+compselect.value,fontsize=25)
    fig.text(0.1, 0.0, "Created by Soumyajit Bose / @Soumyaj15209314",fontstyle="italic",
             fontsize=15,color='#edece9')
        
def opticscluster(dataframe,features,player,xi,minsamples):
    df_cluster = dataframe[dataframe.pass_recipient_name==player].reset_index(drop=True)
    seriesXY = np.array(df_cluster[features])
    scaler = StandardScaler()
    seriesXY = scaler.fit_transform(seriesXY)
    clustering = OPTICS(metric='canberra',xi=xi,min_samples=minsamples).fit(seriesXY)
    df_cluster['Cluster'] = clustering.labels_
    
    cluster_groups = df_cluster[df_cluster.Cluster!=-1].groupby('Cluster').count().reset_index()
    cluster_groups = cluster_groups.sort_values(by='x1',ascending=False)
    label_list = cluster_groups.Cluster.unique().tolist()
    print(df_cluster.groupby('Cluster').size().reset_index(name='Count'))
    if len(label_list)>5:
        label_list = label_list[:5]
    
    pitch = Pitch(pitch_type='statsbomb', figsize=(12, 8), pitch_color='#082630',orientation='horizontal',
            stripe=False, line_zorder=2, view='full',line_color='gray',tight_layout=False,
            constrained_layout=True)
    fig,ax = pitch.draw()

    colordict = {0:'#a50026',1:'#d73027',2:'#f46d43',3:'#fdae61',4:'#fee090',
                 5:'#e0f3f8',6:'#abd9e9',7:'#74add1',8:'#4575b4',9:'#313695'}    

    for i,labs in enumerate(label_list):
        lab_df = df_cluster[df_cluster.Cluster==labs].reset_index(drop=True)
        X1 = lab_df.x1.median()
        X2 = lab_df.x_avg.median()
        X3 = lab_df.x4.median()
        Y1 = lab_df.y1.median()
        Y2 = lab_df.y_avg.median()
        Y3 = lab_df.y4.median()

        pitch.arrows(X1, Y1, X2, Y2, width=2,
             headwidth=6, headlength=5, color=colordict[i], ax=ax,zorder=3)
    
        pitch.arrows(X2, Y2, X3, Y3, width=2,
             headwidth=6, headlength=5,
             color=colordict[i], ax=ax,zorder=3)
        
    ax.set_title('Top '+str(i+1)+' quick pass combo clusters for \n'+player+'\n'+'Season - '+compselect.value,fontsize=25)
    fig.text(0.1, 0.0, "Created by Soumyajit Bose / @Soumyaj15209314",fontstyle="italic",
             fontsize=15,color='#edece9')

def my_fastdtw(s1, s2):
    return fastdtw(s1,s2)[0]

def affpropcluster(dataframe,features,player):
    df_cluster = dataframe[dataframe.pass_recipient_name==player].reset_index(drop=True)
    seriesXY = np.array(df_cluster[features])
    scaler = StandardScaler()
    seriesXY = scaler.fit_transform(seriesXY)
    distance_matrix = sd.pdist(seriesXY, my_fastdtw)
    train_dtw_distmat = -1.0*distance_matrix
    clustering = AffinityPropagation().fit(sd.squareform(train_dtw_distmat))
    cluster_centers_indices, labels = clustering.cluster_centers_indices_, clustering.labels_
    df_cluster['Cluster'] = labels
    
    cluster_groups = df_cluster[df_cluster.Cluster!=-1].groupby('Cluster').count().reset_index()
    cluster_groups = cluster_groups.sort_values(by='x1',ascending=False)
    label_list = cluster_groups.Cluster.unique().tolist()
    # print(df_cluster.groupby('Cluster').size().reset_index(name='Count'))
    if len(label_list)>5:
        label_list = label_list[:5]

    pitch = Pitch(pitch_type='statsbomb', figsize=(12, 8), pitch_color='#082630',orientation='horizontal',
            stripe=False, line_zorder=2, view='full',line_color='gray',tight_layout=False,
            constrained_layout=True)
    fig,ax = pitch.draw()

    colordict = {0:'#a50026',1:'#d73027',2:'#f46d43',3:'#fdae61',4:'#fee090',
                 5:'#e0f3f8',6:'#abd9e9',7:'#74add1',8:'#4575b4',9:'#313695'}    

    for i,labs in enumerate(label_list):
        lab_df = df_cluster[df_cluster.Cluster==labs].reset_index(drop=True)
        X1 = lab_df.x1.median()
        X2 = lab_df.x_avg.median()
        X3 = lab_df.x4.median()
        Y1 = lab_df.y1.median()
        Y2 = lab_df.y_avg.median()
        Y3 = lab_df.y4.median()

        pitch.arrows(X1, Y1, X2, Y2, width=2,
             headwidth=6, headlength=5, color=colordict[i], ax=ax,zorder=3)
    
        pitch.arrows(X2, Y2, X3, Y3, width=2,
             headwidth=6, headlength=5,
             color=colordict[i], ax=ax,zorder=3)
        
    ax.set_title('Top '+str(i+1)+' quick pass combo clusters for \n'+player+'\n'+'Season - '+compselect.value,fontsize=25)  
    fig.text(0.1, 0.0, "Created by Soumyajit Bose / @Soumyaj15209314",fontstyle="italic",
             fontsize=15,color='#edece9')
    return fig

In [None]:
#@title

def clustering(player, dataframe, options, features):
    if options=='K-Means Elbow Check':
        fig = kmeans_elbowcheck(dataframe,list(features),player)
        fig.set_facecolor('#082630')
    elif options=='K-Means Plot':
        df_cluster = dataframe[dataframe.pass_recipient_name==player].reset_index(drop=True)
        Nmax = int(len(df_cluster)/4)
        fig = interact(kmeans_clusterplot,dataframe=fixed(dataframe),features=fixed(list(features)),
                       player=fixed(player),Nc=range(2,Nmax))
    elif options=='OPTICS':
        fig = interact(opticscluster,dataframe=fixed(dataframe),features=fixed(list(features)),
                       player=fixed(player),xi=widgets.FloatSlider(min=0.0,max=0.2, step=0.001)
                                ,minsamples=[2,3,4,5,6,7,8,9,10])
    else:
        fig = affpropcluster(dataframe,list(features),player)
int2 = interactive(
                clustering, player = QR.pass_recipient_name.unique().tolist(),
                dataframe = fixed(QR), options = technique,
                features = feature_selection
                )
int2

interactive(children=(Dropdown(description='player', options=('Sergio Busquets i Burgos', 'Ricard Puig Martí',…