In [15]:
import pandas as pd
import numpy as np
import matplotlib
import warnings

warnings.filterwarnings('ignore')
matplotlib.use('nbAgg')

import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.image as image
%matplotlib inline

# Preamble

<h2>Data points that need to be extracted out:</h2>

<h2>**1. Filtering**</h2>

    - Primary positions is AMF.
    - Only Top 5 Europen leagues.
    - 2018/19 Season
    - Minimum 1000 Minutes played.
   
<h2>**2. Player Specific**</h2>
   
><h3>a. General statistics</h3>
   
        - Name
        - Age
        - Team
        - Position
        - Foot
        - Minutes Played
        - Goals
        - Assists
        
 >  <h3>b. Attacking statistics</h3>
   
       - Non-penalty goals
       - Head goals total
       - Shots total
       - Shots on target %
       - Goal conv., %
       - Crosses per 90
       - Crosses from left per 90
       - Crosses from left, %
       - Crosses from right per 90
       - Crosses from right, %
       - Touches in box per 90
       
   ><h3>c. Passing statistics</h3>
   
       - Fwd passes per 90
       - Back passes per 90
       - Lat passes per 90
       - Fwd passes acc. %
       - Sh/m passes per 90
       - Lng passes per 90
       - Lng passes acc. %
       - Avg pass length, m
       - Avg lng pass length, m
       
   ><h3>d. Key Passing </h3>
   
       - Final 3rd passes per 90
       - Final 3rd passes acc. %
       - Passes to penalty area per 90
       - Passes to penalty area acc. %
       - Thru passes per 90
       - Thru passes acc. %
       
       
       

# 1. Data Cleaning

In [13]:
main_df = pd.read_excel('AMF_top_5_leagues.xlsx')
bruno_df = pd.read_excel('bruno_fernandes.xlsx')
main_df = pd.concat([main_df, bruno_df], axis= 'rows')
main_df = main_df.dropna()
main_df = main_df.reset_index(drop = True)

main_df.to_csv('FINAL_DATA.csv')

# 2. Objectives

<h3> I. Bruno Specific Data </h3>

    1. Name, age, current club and league he plays in.
    2. Goals scored and assisted.
        - Non Penalty, Headed goals
    3. Positions played this season, primary position, and goal contributions per position
    
<h3> II. Comparing Bruno to other AMF's from Europe's top 5 leagues.</h3>

   ><h4> A. Attacking Contributions </h4>
   
       1. Goals scored/ Non-Penalty goals scored. Bar 
       2. Assists. - Bar 
       3. Shots total/ Shots on-target/ % - Scatter
       4. Goal conv. vs Goals - Scatter
       5. Touches in box per 90 - Bar 
       6. Crosses per 90 - Bar
       
       
   ><h4> B. Passing metrics </h4>
    
        1. Forward passes per 90 vs Fwd passes acc %. - Scatter
        2. Lng passes per 90 vs Lng passes acc. % - Scatter
        3. Avg pass length, m - Bar 
        4. Avg lng pass length, m - Bar
        5. Final 3rd passes p90 vs accuracy - Scatter
        6. Passes to penalty area p90 vs accuracy - Scatter
        7. Thru passes p90 vs accuracy - Scatter
        
   ><h4> C. Match-by-Match analysis </h4>
    
        1. Passes to final third p90 - Line plot
        2. Passes to penalty box p90 - Line plot
    

# 3. Analysis

## Macro's and Function definitions

In [134]:
#Colors
BACKGROUND_COLOR = '#212121'

#Bar
BAR_GENERIC_COLOR = '#414141'
BAR_TARGET_COLOR = '#d32f2f'

BAR_GENERIC_SECONDARY_COLOR = '#bdbdbd'
BAR_TARGET_SECONDARY_COLOR = '#b71c1c'

#Scatter
SCATTER_POINT_COLOR = '#f44336'
SCATTER_TARGET_COLOR = '#f44336'
SCATTER_TEXT_COLOR = '#fff59d'

#Fonts
FONT_TICKS = 'Franklin Gothic Medium'
FONT_LABEL = 'Franklin Gothic Medium'
FONT_TITLE = 'Arial Rounded MT Bold'
FONT_TEXT = 'Franklin Gothic Medium'

#Folder
IMAGES = 'Images/'

#Image
DPI = 600

#Font Sizes
FONT_SIZES = {
    'Small':{
        'FONT_TICKS': 13,
        'FONT_LABEL': 14,
        'FONT_TITLE': 16,
    },
    'Medium':{
        'FONT_TICKS': 15,
        'FONT_LABEL': 16,
        'FONT_TITLE': 18,
    },
    'Large':{
        'FONT_TICKS': 18,
        'FONT_LABEL': 19,
        'FONT_TITLE': 20,
        
    },
    'Extra Large':{
        'FONT_TICKS': 18,
        'FONT_LABEL': 20,
        'FONT_TITLE': 22,
    }
}

NAME = 'Bruno Fernandes'

<h3>**Funtion Definitions**</h3>

In [248]:
def annotate_plot(ax, x_label, y_label, title, fontsize, rotation_x = 90, rotation_y = 90, ):
    
    #ax.set_xticklabels(ax.get_xticklabels(), rotation = rotation_x,
                       #family = FONT_TICKS, fontsize = fontsize['FONT_TICKS'])
    
    ax.tick_params(axis = 'x', color = 'white',size = fontsize['FONT_TICKS'])
    
    ax.set_xlabel(x_label, rotation = rotation_x, family = FONT_LABEL ,fontsize = fontsize['FONT_LABEL'])
    ax.set_ylabel(y_label, rotation = rotation_y,
                  family = FONT_LABEL, fontsize = fontsize['FONT_LABEL'])
    
    ax.set_title(title, family = FONT_TITLE, fontsize = fontsize['FONT_TITLE'], color = 'white')

    
def init_plot(figsize):
    
    sns.set(rc={'figure.figsize':figsize})
    sns.set_style("dark", {"axes.facecolor": BACKGROUND_COLOR})
    
def bar_get_color_list(df, name, feature = 'Player', prime_pallete = True):
    
    target_index = df[df[feature] == name].index.values[0]
    colors = []
    num = df.shape[0]
    
    for i in range(num):
        if i is int(target_index):
            if prime_pallete:
                colors += [BAR_TARGET_COLOR]
            else:
                colors += [BAR_TARGET_SECONDARY_COLOR]
        else:
            if prime_pallete:
                colors += [BAR_GENERIC_COLOR]
            else: 
                colors += [BAR_GENERIC_SECONDARY_COLOR]
            
    return colors

def save_plot(fig, name, hideplot = False):
    
    fig.savefig(name, facecolor = BACKGROUND_COLOR, dpi = DPI)
    
    if hideplot:
        plt.close()

def scatter_init_data(df, label, X, Y, sort_y = True, n = 15):
    
    sc_df = df[[label, X, Y]]
    if sort_y:
        sc_df = sc_df.sort_values(by = Y, ascending= False)
    else:
        sc_df = sc_df.sort_values(by = X, ascending= False)

    sc_df = sc_df.iloc[:n]
    x = sc_df[X].values
    y = sc_df[Y].values

    targets = sc_df[label].values
    
    return targets, x, y

def scatter_plot_data(targets, X, Y, sizes,  y_sorted = True, margin = 0, shift = 0):

    config = {
    'ha': 'center', 
    'va': 'center'
    }
            
    for i, tgt in enumerate(targets):
        if i == 0:
            plt.scatter(x[i], y[i], c = SCATTER_POINT_COLOR)
            plt.text(x[i], y[i] + shift,tgt,config, size = sizes['FONT_LABEL'],color = SCATTER_TEXT_COLOR)
        else:
            dis = y[i-1] - y[i]
            if dis > margin:
                plt.scatter(x[i], y[i], c = SCATTER_POINT_COLOR)
                plt.text(x[i], y[i] + shift,tgt,config,size = sizes['FONT_LABEL'],color = SCATTER_TEXT_COLOR) 



    return plt.gca()

def bar_top_n_data(player, data, statistic, n = 10, feature = 'Player'):
    
    #Find if feature is present in n indexes, else
    #append and return new data
    
    df = data.copy()
    df = df.sort_values(statistic, ascending = False)


    player_index = df[df[feature] == player].index
    idxs = df.iloc[:n].index
    
    #Check if player already in Top n
    not_in_top_n = all(np.isin(idxs, player_index) == False)
    
    if not_in_top_n:
        df = df.loc[idxs.append(player_index)]
    else:
        df = df.loc[idxs]
        
    return df.reset_index(drop = True)

def clean_plot(ax):
    
    ax.spines['bottom'].set_color('white')
    ax.spines['top'].set_color('white')
    ax.spines['left'].set_color('white')
    ax.spines['right'].set_color('white')
    
    for label in ax.get_xticklabels():
        label.set_fontproperties(FONT_TICKS)
        
    for label in ax.get_yticklabels():
        label.set_fontproperties(FONT_TICKS)        
    
    ax.yaxis.label.set_color('white')
    ax.tick_params(axis = 'y', colors = 'white',)
    
    ax.xaxis.label.set_color('white')
    ax.tick_params(axis= 'x', colors = 'white', )

# Analysis

In [136]:
df = pd.read_csv('FINAL_DATA.csv', index_col= [0])

<h3> Part I. </h3>
      - 1. 

### Part II. Bruno Comparisons

`A. 1 - Stacked Bar`

In [204]:
stat = df[['Player', 'Goals', 'Non-penalty goals']]
stat = bar_top_n_data(NAME, stat, 'Goals')

fig = plt.figure()

prim_colors = bar_get_color_list(stat, NAME)
sec_colors = bar_get_color_list(stat, NAME, prime_pallete=False)

init_plot((10, 10))
stat['Penalty goals'] = stat['Goals'] - stat['Non-penalty goals']

sns.barplot('Non-penalty goals', 'Player', data = stat, palette = colors, linewidth = 0)
sns.barplot('Penalty goals', 'Player', data = stat, palette = sec_colors, linewidth = 0)

annotate_plot(fig.gca(), "Goals Scored", "Player", "Goals Scored", FONT_SIZES['Large'],
                  rotation_x= 0, rotation_y= 90)
clean_plot(fig.gca())

plt.tight_layout()
save_plot(fig, IMAGES + 'A.1.jpg', hideplot= True)

`A. 2 - Bar`

In [205]:
PARAM = 'Assists'
stat = df[['Player', PARAM]]
stat = bar_top_n_data(NAME, stat, PARAM)
          
fig = plt.figure()
prim_colors = bar_get_color_list(stat, NAME)
          
init_plot((10, 10))
          
sns.barplot(PARAM, 'Player', data = stat, palette = colors, linewidth = 0)
annotate_plot(fig.gca(), "Assists", "Player", "Assists", FONT_SIZES['Large'],
                  rotation_x= 0, rotation_y= 90)
clean_plot(fig.gca())

plt.tight_layout()
save_plot(fig, IMAGES + 'A.2.jpg', hideplot= True)

`A. 3 - Scatter`

In [250]:
targets, x, y = scatter_init_data(df, 'Player', 'Shots total', 'Shots on target %', n = 30, )

fig = plt.figure()
init_plot((12, 12))

ax = scatter_plot_data(targets, x, y, FONT_SIZES['Medium'] , shift = 0.25,)

annotate_plot(ax, "Shots Taken", "Shots on Target %", "Shooting Habbits", 
              FONT_SIZES['Large'], rotation_x=0, rotation_y= 90)
clean_plot(ax)

plt.tight_layout()
save_plot(fig, IMAGES + 'A.3.jpg', hideplot = True)

`A. 4 - Scatter`

In [251]:
main_df

NameError: name 'main_df' is not defined

In [None]:
targets, x, y = scatter_init_data(df, 'Player', 'Shots total', 'Shots on target %', n = 30, )

fig = plt.figure()
init_plot((12, 12))

ax = scatter_plot_data(targets, x, y, FONT_SIZES['Medium'] , shift = 0.25,)

annotate_plot(ax, "Shots Taken", "Shots on Target %", "Shooting Habbits", 
              FONT_SIZES['Large'], rotation_x=0, rotation_y= 90)
clean_plot(ax)

plt.tight_layout()
save_plot(fig, IMAGES + 'A.3.jpg', hideplot = True)