In [None]:
!pip install azure-storage-file-datalake --pre

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import json
import os, uuid, sys, re
import statistics
!{sys.executable} -m pip uninstall pyarrow --yes
!{sys.executable} -m pip install pyarrow
import pyarrow.parquet as pq

from dotenv import load_dotenv
from pathlib import Path
from azure.storage.filedatalake import DataLakeServiceClient, FileSystemClient
from matplotlib.collections import LineCollection
from matplotlib.colors import ListedColormap, BoundaryNorm

load_dotenv()

In [None]:
### Configuración del cuaderno ###
##################################

AZGEN2READ = "/year_2020/month_12/day_17/"

###############################
### Leemos las credenciales ###

AZURE_STRING_CONNECT = os.getenv('AZURE_STRING_CONNECT')
FILE_SYSTEM_NAME = os.getenv('FILE_SYSTEM_NAME')

###############################

PLAYERS = {'Courtois': 'TC01', 
           'Carvajal': 'DC02', 
           'Militao': 'EM03',
           'Ramos': 'SR04',
           'Varane': 'RV05',
           'Kroos': 'TK08',
           'Modric': 'LM10',
           'Casemiro': 'CC14',
           'Hazard': 'EH07',
           'Benzema': 'KB09',
           'Vinicius': 'VJ20'}

## Obtener datos de Azure Data Lake Gen2

In [None]:
## Función para obtener los datos de la zona trusted de Azure Data Lake Gen2
def getDataAzureBlobGen2(file_system_client):
    
    paths = file_system_client.get_paths()
    
    for path in paths:
        regexFile = re.search("(trusted)\/(.*)(\/year_.*\/)(.*\/)(.*\.parquet)", path.name)
        
        if(regexFile is not None):
            
            zone = regexFile.group(1)
            entity = regexFile.group(2)
            dateRead = regexFile.group(3)
            fileNameRead = regexFile.group(4)
            fileNameWrite = fileNameRead.split("_")[0]
            file = regexFile.group(5)
            
            if(dateRead == AZGEN2READ):
                print("Loading file: " + zone + "/" + entity + dateRead +  fileNameRead + file)
                directory_client = file_system_client.get_directory_client(zone + "/" + entity + dateRead +  fileNameRead)
                file_client = directory_client.get_file_client(file)
                download=file_client.download_file()
                downloaded_bytes = download.readall()
                
                print("Saving file: " + entity + "/" + fileNameWrite + ".parquet")
                Path(entity).mkdir(parents=True, exist_ok=True) #Crear directorio sino existe
                with open(entity + "/" + fileNameWrite + ".parquet", "wb") as my_file:
                    my_file.write(downloaded_bytes)
                    my_file.close()
                
    print('Done!')

In [None]:
service_client = DataLakeServiceClient.from_connection_string(AZURE_STRING_CONNECT)
file_system_client = service_client.get_file_system_client(file_system=FILE_SYSTEM_NAME)

## Obtenemos datos
getDataAzureBlobGen2(file_system_client)

## Health

In [None]:
## Función para obtener las estadísticas de salud de un jugador en una jornada
def getHealthPlayer(player, game):
    
    if(player in PLAYERS.keys()):
    
        ## Obtenemos datos
        playerHealth = pq.read_table('health/' + PLAYERS[player] + '.parquet').to_pandas()

        gameMin =  playerHealth.loc[playerHealth["jornada"].idxmin()]["jornada"]
        gameMax =  playerHealth.loc[playerHealth["jornada"].idxmax()]["jornada"]
        
        if((game <= int(gameMax)) and (game >= int(gameMin))):
        
            ## Filtramos la jornada
            playerHealth = playerHealth[playerHealth['jornada'] == game]

            ## Calculamos el indice de masa corporal
            imc = "{:.2f}".format(float(playerHealth['Weight']) / ((float(playerHealth['Height'])/100)**2))

            ## Mostramos los datos
            print('--- Informe de salud ---')
            print('------------------------')
            print('* Nombre:' + playerHealth['Name'].to_string(index=False) + playerHealth['Surname'].to_string(index=False))
            print('* Posición:' + playerHealth['Position'].to_string(index=False))
            print('* Peso:' + playerHealth['Weight'].to_string(index=False) + 'Kg')
            print('* Altura:' + playerHealth['Height'].to_string(index=False) + 'cm')
            print('* IMC: ' + str(imc))
            print('------------------------')
            print('------------------------')

            ## Mostramos las gráficas
            minutos = np.arange(start=1, stop=91, step=1)
            mmHg_H = playerHealth['mmHg_H'].tolist()[0]
            mmHg_L = playerHealth['mmHg_L'].tolist()[0]
            pulso = playerHealth['pulso'].tolist()[0]
            oxigeno = playerHealth['SpO2'].tolist()[0]


            fig, ax = plt.subplots(3, 1, figsize=(23.0, 18.0), dpi=80, constrained_layout=True)
            fig.suptitle(playerHealth['Name'].to_string(index=False) + playerHealth['Surname'].to_string(index=False) + ' - Jornada' + playerHealth['jornada'].to_string(index=False), fontsize=16)

            ## Figura 1
            l1 = ax[0].plot(minutos, mmHg_H)
            l2 = ax[0].plot(minutos, mmHg_L)
            ax[0].set_xlabel('Minutos')
            ax[0].set_ylabel('mm Hg')
            ax[0].grid()
            ax[0].set_title('Presión arterial')

            #Leyenda
            line_labels = ["Sistólica", "Diastólica"]
            ax[0].legend([l1, l2], labels= line_labels, loc="center right", borderaxespad=1, title="mm Hg")


            ## Figura 2
            clrs = []
            for x in pulso:
                if(x < 120):
                    clrs.append('green')
                elif(x >= 180):
                    clrs.append('red')
                else:
                    clrs.append('gold')

            ax[1].bar(minutos, pulso, color=clrs, width=0.4)
            ax[1].set_xlabel('Minutos')
            ax[1].set_ylabel('Pulsaciones')
            ax[1].set_title('Frecuencia cardíaca')
            ax[1].grid()


            ## Figura 3
            ax[2].plot(minutos, oxigeno, 'o')
            ax[2].set_xlabel('Minutos')
            ax[2].set_ylabel('SpO2')
            ax[2].grid()
            ax[2].set_title('Saturación de oxígeno en sangre')



            plt.savefig("graphics/health/" + PLAYERS[player] + ".png")
            plt.show()
            
        else:
            print('Error: la jornada ' + str(game) + ' no se encuentra en los datos.')
    else:
        print('Error: el jugador ' + str(player) + ' no existe.')
    


In [None]:
## Función para obtener las estadísticas de salud de dos jugadores en una jornada
def playersHealthCompare(playerOne, playerTwo, game):
    
    if((playerOne in PLAYERS.keys()) and (playerTwo in PLAYERS.keys())):
        
        ## Obtenemos datos
        playerOneHealth = pq.read_table('health/' + PLAYERS[playerOne] + '.parquet').to_pandas()
        playerTwoHealth = pq.read_table('health/' + PLAYERS[playerTwo] + '.parquet').to_pandas()

        gameMin =  playerOneHealth.loc[playerOneHealth["jornada"].idxmin()]["jornada"]
        gameMax =  playerOneHealth.loc[playerOneHealth["jornada"].idxmax()]["jornada"]
        
        if((game <= int(gameMax)) and (game >= int(gameMin))):
            
            ## Filtramos la jornada
            playerOneHealth = playerOneHealth[playerOneHealth['jornada'] == game]
            playerTwoHealth = playerTwoHealth[playerTwoHealth['jornada'] == game]

            ## Calculamos el indice de masa corporal
            imcOne = "{:.2f}".format(float(playerOneHealth['Weight']) / ((float(playerOneHealth['Height'])/100)**2))
            imcTwo = "{:.2f}".format(float(playerTwoHealth['Weight']) / ((float(playerTwoHealth['Height'])/100)**2))
            
            
            nameOne = playerOneHealth['Name'].to_string(index=False) + playerOneHealth['Surname'].to_string(index=False)
            nameTwo = playerTwoHealth['Name'].to_string(index=False) + playerTwoHealth['Surname'].to_string(index=False)
            positionOne =  playerOneHealth['Position'].to_string(index=False)
            positionTwo =  playerTwoHealth['Position'].to_string(index=False)
            weightOne = playerOneHealth['Weight'].to_string(index=False) + 'Kg'
            weightTwo = playerTwoHealth['Weight'].to_string(index=False) + 'Kg'
            heightOne = playerOneHealth['Height'].to_string(index=False) + 'cm'
            heightTwo = playerTwoHealth['Height'].to_string(index=False) + 'cm'
            
            ## Mostramos los datos
            print('---------------- Informe de salud ----------------')
            print('--------------------------------------------------')
            print('* Nombre:  ' + nameOne + '  |  ' + nameTwo)
            print('* Posición:' + positionOne + '       |  ' + positionTwo)
            print('* Peso:    ' + weightOne + '          |  ' + weightTwo)
            print('* Altura:  ' + heightOne + '         |  ' + heightTwo)
            print('* IMC:      ' + str(imcOne) + '         |   ' + imcTwo)
            print('--------------------------------------------------')
            print('--------------------------------------------------')
            
            
            
            ## Mostramos la gráfica
            minutos = np.arange(start=1, stop=91, step=1)
            pulsoOne = playerOneHealth['pulso'].tolist()[0]
            pulsoTwo = playerTwoHealth['pulso'].tolist()[0]
            
            fig, ax = plt.subplots(figsize=(80.0, 5.0), dpi=65, constrained_layout=True)
            fig.suptitle('Comparativa de salud: ' + nameOne + ' |' +  nameTwo  + ' - Jornada' + playerOneHealth['jornada'].to_string(index=False), fontsize=16)
            
            width = 0.35
            rects1 = ax.bar(minutos - width/2, pulsoOne, width, label=nameOne)
            rects2 = ax.bar(minutos + width/2, pulsoTwo, width, label=nameTwo)
            
            ax.set_ylabel('Pulsaciones')
            ax.set_title('Frecuencia cardíaca')
            ax.set_xticks(minutos)
            ax.set_xticklabels(minutos)
            ax.legend()
            
            def autolabel(rects):
                for rect in rects:
                    height = rect.get_height()
                    ax.annotate('{}'.format(height),
                                xy=(rect.get_x() + rect.get_width() / 2, height),
                                xytext=(0, 3),  # 3 points vertical offset
                                textcoords="offset points",
                                ha='center', va='bottom')
            
            
            autolabel(rects1)
            autolabel(rects2)

            plt.savefig("graphics/health/compare/" + PLAYERS[playerOne] + '_' + PLAYERS[playerTwo] + ".png")
            plt.show()
            
        else:
            print('Error: la jornada ' + str(game) + ' no se encuentra en los datos.')
    else:
        print('Error: el jugador seleccionado no existe.')

In [None]:
## {'Courtois','Carvajal','Militao', 'Ramos','Varane','Kroos','Modric','Casemiro','Hazard','Benzema','Vinicius'}
## Obtenemos el informe de salud de un jugador en una jornada.
getHealthPlayer('Ramos', 32)

## Obtenemos comparativa de salud entre dos jugadores.
playersHealthCompare('Ramos', 'Vinicius', 33)

## Sport

In [None]:
## Función para obtener las estadísticas deportivas de dos jugadores en una jornada
def playersSportComapare(playerOne, playerTwo, game):

    if((playerOne in PLAYERS.keys()) and (playerTwo in PLAYERS.keys())):
        
        ## Obtenemos datos
        playerOneSport = pq.read_table('sport/' + PLAYERS[playerOne] + '.parquet').to_pandas()
        playerTwoSport = pq.read_table('sport/' + PLAYERS[playerTwo] + '.parquet').to_pandas()

        gameMin =  playerOneSport.loc[playerOneSport["jornada"].idxmin()]["jornada"]
        gameMax =  playerOneSport.loc[playerOneSport["jornada"].idxmax()]["jornada"]
        
        if((game <= int(gameMax)) and (game >= int(gameMin))):
            
            ## Filtramos la jornada
            playerOneSport = playerOneSport[playerOneSport['jornada'] == game]
            playerTwoSport = playerTwoSport[playerTwoSport['jornada'] == game]
            
            nameOne = playerOneSport['Name'].to_string(index=False) + playerOneSport['Surname'].to_string(index=False)
            nameTwo = playerTwoSport['Name'].to_string(index=False) + playerTwoSport['Surname'].to_string(index=False)
            positionOne =  playerOneSport['Position'].to_string(index=False)
            positionTwo =  playerTwoSport['Position'].to_string(index=False)
            
            ## Jugador uno
            playerOneTotal = []
            playerOneTotal.append(int(playerOneSport['Pass_total'].to_string(index=False)))
            playerOneTotal.append(int(playerOneSport['Pass_success'].to_string(index=False)))
            playerOneTotal.append(int(playerOneSport['Shoot_total'].to_string(index=False)))
            playerOneTotal.append(int(playerOneSport['Shoot_success'].to_string(index=False)))
            playerOneTotal.append(int(playerOneSport['Yellow_card'].to_string(index=False)))
            playerOneTotal.append(int(playerOneSport['Red_card'].to_string(index=False)))
            playerOneTotal.append(int(playerOneSport['Goals'].to_string(index=False)))
    
            ## Jugador dos
            playerTwoTotal = []
            playerTwoTotal.append(int(playerTwoSport['Pass_total'].to_string(index=False)))
            playerTwoTotal.append(int(playerTwoSport['Pass_success'].to_string(index=False)))
            playerTwoTotal.append(int(playerTwoSport['Shoot_total'].to_string(index=False)))
            playerTwoTotal.append(int(playerTwoSport['Shoot_success'].to_string(index=False)))
            playerTwoTotal.append(int(playerTwoSport['Yellow_card'].to_string(index=False)))
            playerTwoTotal.append(int(playerTwoSport['Red_card'].to_string(index=False)))
            playerTwoTotal.append(int(playerTwoSport['Goals'].to_string(index=False)))
            
            
            ## Mostramos la gráfica
            labels = ['Pases totales', 'Pases completados', 'Disparos totales', 'Disparos a puerta', 'Tarjetas amarillas', 'Tarjetas rojas', 'Goles']

            fig, ax = plt.subplots(figsize=(30.0, 15.0), dpi=60, constrained_layout=False)
            
            x = np.arange(len(labels))
            width = 0.35
            
            rects1 = ax.bar(x - width/2, playerOneTotal, width, label=nameOne + ' -' + positionOne)
            rects2 = ax.bar(x + width/2, playerTwoTotal, width, label=nameTwo + ' -' + positionTwo)

            # Add some text for labels, title and custom x-axis tick labels, etc.
            ax.set_ylabel('Estadísticas')
            ax.set_title('Comparativa deportiva: ' + nameOne + ' |' +  nameTwo  + ' - Jornada' + playerOneSport['jornada'].to_string(index=False), fontsize=16)
            ax.set_xticks(x)
            ax.set_xticklabels(labels)
            ax.legend()


            def autolabel(rects):
                for rect in rects:
                    height = rect.get_height()
                    ax.annotate('{}'.format(height),
                                xy=(rect.get_x() + rect.get_width() / 2, height),
                                xytext=(0, 3),
                                textcoords="offset points",
                                ha='center', va='bottom')

            autolabel(rects1)
            autolabel(rects2)

            fig.tight_layout()

            plt.savefig("graphics/sport/compare/" + PLAYERS[playerOne] + '_' + PLAYERS[playerTwo] + ".png")
            plt.show()
            
        else:
            print('Error: la jornada ' + str(game) + ' no se encuentra en los datos.')
    else:
        print('Error: el jugador seleccionado no existe.')

In [None]:
def getSportTeam(game):
    
    passTotal = {'Portero': [], 'Defensa': [], 'Centrocampista': [], 'Delantero': []}
    passSuccess = {'Portero': [], 'Defensa': [], 'Centrocampista': [], 'Delantero': []}
    shootTotal = {'Portero': [], 'Defensa': [], 'Centrocampista': [], 'Delantero': []}
    shootSuccess = {'Portero': [], 'Defensa': [], 'Centrocampista': [], 'Delantero': []}
    yellowCard = {'Portero': [], 'Defensa': [], 'Centrocampista': [], 'Delantero': []}
    redCard = {'Portero': [], 'Defensa': [], 'Centrocampista': [], 'Delantero': []}
    Goalkeeper = {'Portero': [], 'Defensa': [], 'Centrocampista': [], 'Delantero': []}
    Goals = {'Portero': [], 'Defensa': [], 'Centrocampista': [], 'Delantero': []}
    playerSport = pd.DataFrame()
    
    for player in PLAYERS:
        
        ## Obtenemos datos
        playerSport = pq.read_table('sport/' + PLAYERS[player] + '.parquet').to_pandas()

        ## Filtramos la jornada
        playerSport = playerSport[playerSport['jornada'] == game]

        if(not playerSport.empty):
            
            position = playerSport['Position'].to_string(index=False).strip()
            
            passTotal[position].append(int(playerSport['Pass_total'].to_string(index=False)))
            passSuccess[position].append(int(playerSport['Pass_success'].to_string(index=False)))
            shootTotal[position].append(int(playerSport['Shoot_total'].to_string(index=False)))
            shootSuccess[position].append(int(playerSport['Shoot_success'].to_string(index=False)))
            yellowCard[position].append(int(playerSport['Yellow_card'].to_string(index=False)))
            redCard[position].append(int(playerSport['Red_card'].to_string(index=False)))
            Goalkeeper[position].append(int(playerSport['Goalkeeper'].to_string(index=False)))
            Goals[position].append(int(playerSport['Goals'].to_string(index=False)))
    
    
    if(not playerSport.empty):
        
        fig, ax = plt.subplots(2, 2, figsize=(23.0, 18.0), dpi=65, constrained_layout=False)
        fig.suptitle('Estadísticas deportivas jornada ' +  str(game), fontsize=16)
    
    
        ## Figura 1
        size = 0.3
        labels = 'Porteros', 'Defensas', 'Centrocampistas', 'Delanteros'

        vals = np.array([[sum(passTotal['Portero']), sum(passSuccess['Portero'])], 
                         [sum(passTotal['Defensa']), sum(passSuccess['Defensa'])], 
                         [sum(passTotal['Centrocampista']), sum(passSuccess['Centrocampista'])], 
                         [sum(passTotal['Delantero']), sum(passSuccess['Delantero'])]])

        cmap = plt.get_cmap("tab20c")
        outer_colors = cmap(np.arange(4)*4)
        inner_colors = cmap(np.array([1, 2, 5, 6, 9, 10, 12, 13]))

        ax[0][0].pie(vals.sum(axis=1), radius=1, labels=labels, autopct='%1.1f%%', pctdistance=1.1, labeldistance=1.2, colors=outer_colors,
               wedgeprops=dict(width=size, edgecolor='w'))

        ax[0][0].pie(vals.flatten(), radius=1-size, colors=inner_colors,
               wedgeprops=dict(width=size, edgecolor='w'))

        ax[0][0].set(aspect="equal")
        ax[0][0].set_title('Pases (Pases completados)', fontsize=14)
        ax[0][0].legend()
        
        
        
        ## Figura 2
        vals = np.array([[sum(shootTotal['Portero']), sum(shootSuccess['Portero'])], 
                         [sum(shootTotal['Defensa']), sum(shootSuccess['Defensa'])], 
                         [sum(shootTotal['Centrocampista']), sum(shootSuccess['Centrocampista'])], 
                         [sum(shootTotal['Delantero']), sum(shootSuccess['Delantero'])]])

        ax[0][1].pie(vals.sum(axis=1), radius=1, labels=labels, autopct='%1.1f%%', pctdistance=1.1, labeldistance=1.2, colors=outer_colors,
               wedgeprops=dict(width=size, edgecolor='w'))

        ax[0][1].pie(vals.flatten(), radius=1-size, colors=inner_colors,
               wedgeprops=dict(width=size, edgecolor='w'))

        ax[0][1].set(aspect="equal")
        ax[0][1].set_title('Disparos (Disparos a portería)', fontsize=14)
        ax[0][1].legend()
        
        
        
        ## Figura 3       
        teamYellow = np.array([sum(yellowCard['Portero']),
                               sum(yellowCard['Defensa']),
                               sum(yellowCard['Centrocampista']),
                               sum(yellowCard['Delantero'])])
        
        teamRed = np.array([sum(redCard['Portero']),
                               sum(redCard['Defensa']),
                               sum(redCard['Centrocampista']),
                               sum(redCard['Delantero'])])
        
        x = np.arange(len(labels))
        width = 0.35

        rects1 = ax[1][0].bar(x - width/2, teamYellow, width, color='gold', label='Tarjetas Amarillas')
        rects2 = ax[1][0].bar(x + width/2, teamRed, width, color='orangered', label='Tarjetas Rojas')

        # Add some text for labels, title and custom x-axis tick labels, etc.
        ax[1][0].set_ylabel('Nº Tarjetas')
        ax[1][0].set_title('Tarjetas', fontsize=14)
        ax[1][0].set_xticks(x)
        ax[1][0].set_xticklabels(labels)
        ax[1][0].legend()


        def autolabel(rects):
            for rect in rects:
                height = rect.get_height()
                ax[1][0].annotate('{}'.format(height),
                            xy=(rect.get_x() + rect.get_width() / 2, height),
                            xytext=(0, 3),
                            textcoords="offset points",
                            ha='center', va='bottom')

        autolabel(rects1)
        autolabel(rects2)

        
        
         ## Figura 4
        teamGoalkeeper = np.array([sum(Goalkeeper['Portero']),
                               sum(Goalkeeper['Defensa']),
                               sum(Goalkeeper['Centrocampista']),
                               sum(Goalkeeper['Delantero'])])
        
        teamGoal = np.array([sum(Goals['Portero']),
                               sum(Goals['Defensa']),
                               sum(Goals['Centrocampista']),
                               sum(Goals['Delantero'])])
        

        rects1 = ax[1][1].bar(x - width/2, teamGoalkeeper, width, color='gray', label='Paradas')
        rects2 = ax[1][1].bar(x + width/2, teamGoal, width, color='green', label='Goles')

        # Add some text for labels, title and custom x-axis tick labels, etc.
        ax[1][1].set_ylabel('Estadísticas')
        ax[1][1].set_title('Paradas y goles', fontsize=14)
        ax[1][1].set_xticks(x)
        ax[1][1].set_xticklabels(labels)
        ax[1][1].legend()

        autolabel(rects1)
        autolabel(rects2)

        ## Guardamos y pintamos las gráficas
        plt.savefig("graphics/sport/team_" + str(game) + ".png")
        plt.show()
    
    else:
        print('Error: la jornada ' + str(game) + ' no se encuentra en los datos.')

In [None]:
## {'Courtois','Carvajal','Militao', 'Ramos','Varane','Kroos','Modric','Casemiro','Hazard','Benzema','Vinicius'}
## Obtenemos comparativa deportiva entre dos jugadores.
playersSportComapare('Benzema', 'Hazard', 30)

## Obtenemos los datos deportivos del conjunto del equipo para una jornada.
getSportTeam(33)