In [191]:
import pandas as pd
import numpy as np
import matplotlib
matplotlib.use('nbagg')

%matplotlib notebook

import matplotlib.pyplot as plt
import seaborn as sns

# Objective

Pressing Rate is measured by the Passes Allowed per Defensive Action (PPDA) metric.  
This is measured by :

    - Analysing how many passes were made by the opposition team (Completed passes only)
    - Defensive actions of the team in question
    
 Defensive actions in question are:
 
    - Tackles
    - Challenges
    - Fouls
    - Interceptions
    
Note : Only completed passes should be taken into account, while ALL attempts at defensive actions should be taken into account
  



# Importing and Cleaning the Data

In [192]:
mufc_def = pd.read_excel('mufc_defense.xlsx',)
mufc_pas = pd.read_excel('mufc_passing.xlsx',)

mufc_def = mufc_def.dropna()
mufc_pas = mufc_pas.dropna()

META_DATA = [
    
    'Date',
    'Match',
    'Competition',
    'Duration',
    'Team'
    
]

Cleaning the passing data

In [193]:
PAS_COLS = {
    
    'Passes / accurate' : 'Attempted Passes',
    'Unnamed: 7' : 'Completed Passes',
    'Unnamed: 8' : 'Accuracy (Passing)',
    
}

mufc_pas = mufc_pas.rename(PAS_COLS, axis = 'columns')
pressing_pas = mufc_pas[META_DATA + list(PAS_COLS.values())]

Cleaning the defensive data

In [194]:
DEF_COLS = {
    
    'Sliding tackles / successful' : 'Attempted Tackles',
    'Unnamed: 17' : 'Completed Tackles',
    'Unnamed: 18' : 'Accuracy (Tackles)',
    'Interceptions' : 'Interception',
    'Fouls' : 'Fouls'
    
}
mufc_def = mufc_def.rename(DEF_COLS, axis = 'columns')
pressing_def = mufc_def[META_DATA + list(DEF_COLS.values())]

# Calculating the metric

In [195]:
pressing_def['Defensive Actions'] = pressing_def['Attempted Tackles']\
                                    + pressing_def['Interception']\
                                    + pressing_def['Fouls']
        
main_df = pressing_def[META_DATA + ['Defensive Actions']]
main_df = main_df[main_df['Team'] == 'Manchester United']

cop = pressing_pas[~(pressing_pas['Team'] == 'Manchester United')]['Completed Passes'].values
main_df['Completed Opposition Passes'] = cop

main_df['PPDA'] = main_df['Completed Opposition Passes']/main_df['Defensive Actions']

#Sort by Date
main_df = main_df.sort_values(by = 'Date')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


# Visualizations

Objectives :  
    - Pre Ole vs Post Ole Press rate Comparison
    - Highest Pressing match
    - Lowest Pressing match

Calculating post Mourinho press rate

In [250]:
#Date Mourinho Sacked
DATE = '2018-12-16'

main_df = main_df.reset_index(drop = True)

#Seperating the df's
ole = main_df[main_df['Date']> DATE]
mou = main_df[main_df['Date']< DATE]

n = 10
rm = main_df['PPDA'].rolling(window= n).mean()

In [249]:
plt.plot(rm,'ro-',label='line2')

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x1b2297a48d0>]