In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import nfl_data_py as nfl
import numpy as np

In [4]:
def buscar_gsis_id(nombre: str, year: int):
    """
    Busca el gsis_id de un jugador a partir de su nombre (parcial o completo).
    """
    rosters = nfl.import_weekly_rosters([year])
    
    # Unificar nombre completo
    rosters['full_name'] = rosters['first_name'] + ' ' + rosters['last_name']
    
    # Filtrar por coincidencia parcial insensible a mayúsculas
    coincidencias = rosters[rosters['full_name'].str.contains(nombre, case=False, na=False)]
    
    if coincidencias.empty:
        print(f"No se encontraron coincidencias para '{nombre}'.")
        return None

    coincidencias = coincidencias[['full_name', 'team', 'position', 'gsis_it_id']].drop_duplicates()

    if len(coincidencias) == 1:
        jugador = coincidencias.iloc[0]
        print(f"Jugador encontrado: {jugador['full_name']} | {jugador['team']} | {jugador['position']}")
        return jugador['gsis_it_id']
    else:
        print(f"Se encontraron varias coincidencias para '{nombre}':")
        print(coincidencias.to_string(index=False))
        return None

In [5]:
def weekly_opposition_statistics_eff(gsis_it_id: str, year: int) -> pd.DataFrame:
    """
    Yardas permitidas promedio por jugada por la defensiva rival que enfrenta el jugador semana por semana

    Parámetros:
    - gsis_it_id: ID único del jugador
    - year: Año de temporada

    Devuelve:
    - DataFrame con la dificultad defensiva de cada semana enfrentada
    """
    # Cargar datos
    weekly = nfl.import_weekly_data([year])
    rosters = nfl.import_weekly_rosters([year])
    plays = nfl.import_pbp_data([year])

    # Vincular ID de jugador
    df = pd.merge(weekly, rosters[['player_id', 'gsis_it_id']], on='player_id', how='left')
    player_df = df[df['gsis_it_id'] == gsis_it_id].copy().drop_duplicates()

    if player_df.empty:
        print(f"No se encontraron datos para el jugador {gsis_it_id} en {year}")
        return None

    # Obtener los equipos rivales por semana
    opponent_teams = (
        player_df[['week', 'opponent_team']]
        .drop_duplicates()
        .reset_index(drop=True)
    )

    posicion=player_df['position_group'].iloc[0]

    plays_1=plays[plays['play_type'].isin(['pass', 'run'])]

    plays_1=plays_1.copy()

    # Calcular yardas por juego permitidas por defensa según posición
    if posicion in ['QB', 'WR', 'TE']:
        plays['relevant_yards'] = plays['passing_yards'].fillna(0)
    elif posicion == 'RB':
        plays['relevant_yards'] = plays['rushing_yards'].fillna(0)
    else:
        print(f"Posición {posicion} no soportada para esta métrica.")
        return None

    defense_difficulty = (
        plays.groupby(['week', 'defteam'])['relevant_yards']
        .sum()
        .reset_index()
        .rename(columns={'defteam': 'opponent_team', 'relevant_yards': 'defensive_difficulty'})
    )

    # Unir con los rivales del jugador
    merged_defense = pd.merge(opponent_teams, defense_difficulty, on=['opponent_team', 'week'], how='left')

    # Calcular eficiencia del jugador por semana
    agg_dict = {
        'rushing_yards': 'sum',
        'receiving_yards': 'sum',
        'attempts': 'sum',
        'receptions': 'sum',
        'carries': 'sum'
    }
    player_efficiency = (
        player_df.groupby('week')
        .agg(agg_dict)
        .reset_index()
    )

    # Sumar yardas y jugadas
    player_efficiency['total_yards'] = player_efficiency['rushing_yards'] + player_efficiency['receiving_yards']
    player_efficiency['total_touches'] = player_efficiency['attempts'] + player_efficiency['receptions'] 
    player_efficiency['yards_per_touch'] = player_efficiency['total_yards'] / player_efficiency['total_touches']
    
    
    eff_vs_def = pd.merge(
    player_efficiency,
    merged_defense[['week', 'opponent_team', 'defensive_difficulty']],
    on='week',
    how='left'
    )

    eff_vs_def['exploitation']= eff_vs_def['total_yards'] / eff_vs_def['defensive_difficulty']

    import matplotlib.pyplot as plt

    plt.figure(figsize=(10, 6))
    plt.scatter(eff_vs_def['defensive_difficulty'], eff_vs_def['exploitation'], alpha=0.7)
    plt.xlabel('Defensive Difficulty')
    plt.ylabel('exploitation')
    plt.title('exploitation vs Defensive Difficulty')
    plt.grid(True)
    plt.suptitle('ITAM SportsAnalyitcs | by Emilio Mtz', fontsize=7.5)
    
    for i, row in eff_vs_def.iterrows():
        label = f"{row['opponent_team']} (W{row['week']})"
        plt.text(row['defensive_difficulty'], row['exploitation'], label, fontsize=8, alpha=0.7)

    plt.tight_layout()
    plt.show()


    plt.figure(figsize=(10, 6))
    plt.scatter(eff_vs_def['defensive_difficulty'], eff_vs_def['yards_per_touch'], alpha=0.7)
    plt.xlabel('Defensive Difficulty')
    plt.ylabel('yards_per_touch')
    plt.title('yards_per_touch vs defensive difficulty')
    plt.grid(True)
    plt.suptitle('ITAM SportsAnalyitcs | by Emilio Mtz', fontsize=7.5)
    
    for i, row in eff_vs_def.iterrows():
        label = f"{row['opponent_team']} (W{row['week']})"
        plt.text(row['defensive_difficulty'], row['yards_per_touch'], label, fontsize=8, alpha=0.7)

    plt.tight_layout()
    plt.show()
    
    return eff_vs_def

### yards_per_touch es la eficiencia del jugador por jugada, es decir que tanto aprovecha cada toque
### exploitation es el aprovechamiento del jugador por partido, es decir
    ## de todo el pastel de yardas, cuantas rebanadas se lleva (lo usan mucho o no?)