In [1]:
import pandas as pd
import numpy as np
import matplotlib
matplotlib.use('nbAgg')

%matplotlib inline

import matplotlib.pyplot as plt
import seaborn as sns

# Objective

Pressing Rate is measured by the Passes Allowed per Defensive Action (PPDA) metric.  
This is measured by :

    - Analysing how many passes were made by the opposition team (Completed passes only)
    - Defensive actions of the team in question
    
 Defensive actions in question are:
 
    - Tackles
    - Challenges
    - Fouls
    - Interceptions
    
Note : Only completed passes should be taken into account, while ALL attempts at defensive actions should be taken into account
  



# Importing and Cleaning the Data

In [2]:
mufc_def = pd.read_excel('mufc_defense.xlsx',)
mufc_pas = pd.read_excel('mufc_passing.xlsx',)

mufc_def = mufc_def.dropna()
mufc_pas = mufc_pas.dropna()

META_DATA = [
    
    'Date',
    'Match',
    'Competition',
    'Duration',
    'Team'
    
]

Cleaning the passing data

In [3]:
PAS_COLS = {
    
    'Passes / accurate' : 'Attempted Passes',
    'Unnamed: 7' : 'Completed Passes',
    'Unnamed: 8' : 'Accuracy (Passing)',
    
}

mufc_pas = mufc_pas.rename(PAS_COLS, axis = 'columns')
pressing_pas = mufc_pas[META_DATA + list(PAS_COLS.values())]

Cleaning the defensive data

In [4]:
DEF_COLS = {
    
    'Sliding tackles / successful' : 'Attempted Tackles',
    'Unnamed: 17' : 'Completed Tackles',
    'Unnamed: 18' : 'Accuracy (Tackles)',
    'Interceptions' : 'Interception',
    'Fouls' : 'Fouls'
    
}
mufc_def = mufc_def.rename(DEF_COLS, axis = 'columns')
pressing_def = mufc_def[META_DATA + list(DEF_COLS.values())]

# Calculating the metric

## Function definition

In [None]:
def pressing_df(defense_file, passing_file, name):
    
    df_def = pd.read_excel(defense_file)
    df_pas = pd.read_excel(passing_file)

    df_def = df_def.dropna()
    df_pas = df_pas.dropna()
    
    #print(df_def.shape, df_pas.shape)
    
    df_pas = df_pas.rename(PAS_COLS, axis = 'columns')
    pressing_pas = df_pas[META_DATA + list(PAS_COLS.values())]

    df_def = df_def.rename(DEF_COLS, axis = 'columns')
    pressing_def = df_def[META_DATA + list(DEF_COLS.values())]
    
    pressing_def['Defensive Actions'] = pressing_def['Attempted Tackles']\
                                    + pressing_def['Interception']\
                                    + pressing_def['Fouls']
        
    main_df = pressing_def[META_DATA + ['Defensive Actions']]
    main_df = main_df[main_df['Team'] == name]

    cop = pressing_pas[~(pressing_pas['Team'] == name)]['Completed Passes'].values
    
    #print(main_df.shape)
    main_df['Completed Opposition Passes'] = cop

    main_df['PPDA'] = main_df['Completed Opposition Passes']/main_df['Defensive Actions']

    #Sort by Date
    main_df = main_df.sort_values(by = 'Date')
    
    return main_df

## Manual

In [5]:
pressing_def['Defensive Actions'] = pressing_def['Attempted Tackles']\
                                    + pressing_def['Interception']\
                                    + pressing_def['Fouls']
        
main_df = pressing_def[META_DATA + ['Defensive Actions']]
main_df = main_df[main_df['Team'] == 'Manchester United']

cop = pressing_pas[~(pressing_pas['Team'] == 'Manchester United')]['Completed Passes'].values
main_df['Completed Opposition Passes'] = cop

main_df['PPDA'] = main_df['Completed Opposition Passes']/main_df['Defensive Actions']

#Sort by Date
main_df = main_df.sort_values(by = 'Date')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


# Visualizations

Objectives :  
    - Pre Ole vs Post Ole Press rate Comparison
    - Highest Pressing match
    - Lowest Pressing match

## Plotting functions

In [128]:
FONT_TITLE = "Arial Rounded MT Bold"
FONT_LABEL = "Franklin Gothic Medium"
FONT_TICK_LABELS = "Courier New"

def stylize_plot(ax, x_label, y_label, title):
    
    #ax.set_xticklabels(ax.get_xticklabels(), rotation = 90, family = FONT_TICK_LABELS, fontsize = )
    #ax.set_yticklabels(ax.get_yticklabels())
    
    ax.set_xlabel(x_label, family = FONT_LABEL , fontsize = 30)
    ax.set_ylabel(y_label, rotation = 90, family = FONT_LABEL, fontsize = 30)
    ax.set_title(title, family = FONT_TITLE, fontsize = 31, color = 'white')
    
    plt.tight_layout()
    
def init_plot(figsize, back):
    sns.set(rc={'figure.figsize':figsize})
    sns.set_style("dark", {"axes.facecolor": back})
    #sns.set_palette(sns.dark_palette("purple"))
    
def get_color(df):
    pos = df[df['Name'] == 'Harry Maguire'].index.values[0]

    color = []

    for i in range(n):
        if i is int(pos):
            color += ['#d32f2f']
        else:
            color += ['#414141']
            
    return color

def set_colors(ax):
    #Coloring and Styling
    ax.set_facecolor('#212121')

    ax.spines['bottom'].set_color('white')
    ax.spines['top'].set_color('white')
    ax.spines['left'].set_color('white')
    ax.spines['right'].set_color('white')

    ax.yaxis.label.set_color('white')
    ax.tick_params(axis = 'y', colors = 'white', labelsize = 30)
    
    ax.xaxis.label.set_color('white')
    ax.tick_params(axis= 'x', colors = 'white', labelsize = 30)
    



## Plotting

Calculating post Mourinho press rate

In [34]:
#Date Mourinho Sacked
DATE = '2018-12-16'

main_df = main_df.reset_index(drop = True)
ole = main_df[main_df['Date']> DATE]
mou = main_df[main_df['Date']< DATE]

n = 10
rm = main_df['PPDA'].rolling(window= n).mean()


Plotting the pressing rate

In [35]:
fig = plt.figure()

init_plot((10, 10), "212121")

plt.plot(rm,'o-',label='line2', color = '#f44336')

ax = plt.gca()
set_colors(ax)

stylize_plot(ax, "Gameweek", "Pressing Rate", "MUFC Pressing rate over 2018/19 PL Season")
fig.savefig('seasonal_press_rate.jpg', facecolor = '#212121', dpi = 600)

plt.close()

Mean of Ole's first 5 matches vs Mou's Average

In [42]:
print(ole[:5]['PPDA'].mean(), mou['PPDA'].mean())

4.8411578653035034 5.490001534176787


In [47]:
#Highest and lowest pressing match under ole

#Highest
print(ole.sort_values(by = 'PPDA')['Match'].iloc[0])

#Lowest
print(ole.sort_values(by = 'PPDA')['Match'].iloc[-1])

Cardiff City - Manchester United 1:5
Manchester United - Manchester City 0:2


## Pressing rate of top 6 Teams in PL

Creating acronyms for the Top 6 teams

In [97]:
ACRO = {
    'mcfc' : 'Manchester City',
    'afc' : 'Arsenal',
    'lfc' : 'Liverpool',
    'cfc' : 'Chelsea',
    'thfc': 'Tottenham Hotspur',
    'mufc': 'Manchester United'
    
}

Reading and fetching the data

In [135]:
import glob

data = {}
for hsh in ACRO.keys():
    files = glob.glob('T6Data/{}*'.format(hsh))
    df = pressing_df(files[0], files[1], ACRO[hsh])
    data[hsh] = df.reset_index(drop = True)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Calculating rolling averages

In [142]:
rolling_mean = {}
n = 10

minimum = 1000
maximum = 0

for hsh in ACRO.keys():
    rm = data[hsh]['PPDA'].rolling(window= n).mean()
    rolling_mean[ACRO[hsh]] = rm
    
    if rm.min() < minimum:
        minimum = rm.min()
    
    if rm.max() > maximum:
        maximum = rm.max()

### Plotting

In [162]:
fig = plt.figure()

init_plot((20, 20), "212121")


colors = [
    '#e040fb',
    '#e0e0e0',
    '#ffeb3b',
    '#2196f3',
    '#4caf50',
    '#b71c1c',
    
]

i = 0

for team in rolling_mean.keys():
    plt.plot(rolling_mean[team],'o-',label=team, color = colors[i])
    #plt.yticks(np.arange(np.floor(minimum), np.floor(maximum + 1.5), 0.25))
    i = i+1
    
ax = plt.gca()
set_colors(ax)


leg = ax.legend(loc = 'upper right', prop = {'size': 30, 'family' : FONT_TICK_LABELS})
for text in leg.get_texts():
    text.set_color("white")

stylize_plot(ax, "Gameweek", "Pressing Rate", "Top 6 PL Teams Pressing Rate (2018/19 PL)")

fig.savefig('top6_pressing_comparison.jpg', facecolor = '#212121', dpi = 600)

plt.close()

Calculating standard deviations

In [159]:
stds = {}

for team in rolling_mean.keys():
    std = rolling_mean[team].std()
    stds[team] = std
 
print(stds)

{'Manchester City': 0.28290307493233335, 'Arsenal': 0.46491779501973884, 'Liverpool': 0.383057066744916, 'Chelsea': 0.5290311436759015, 'Tottenham Hotspur': 0.2639726742124765, 'Manchester United': 0.5631823521509501}


Calculating the difference between max and min

In [160]:
diffs = {}

for team in rolling_mean.keys():
    mx = rolling_mean[team].max()
    mn = rolling_mean[team].min()
    diffs[team] = mx - mn
    
print(diffs)

{'Manchester City': 1.128401743068415, 'Arsenal': 1.5852942085840551, 'Liverpool': 1.368609519427066, 'Chelsea': 1.7287501615021519, 'Tottenham Hotspur': 1.0142145620922705, 'Manchester United': 2.3131684822192664}
