In [16]:
import pandas as pd
import matplotlib.pyplot as plt
import math
import logging

logging.basicConfig(
    level=logging.INFO,  
    format='%(asctime)s [简记] %(message)s',  # 去除非必要字段
    handlers=[
        logging.StreamHandler(), 
        logging.FileHandler('data_analysis.log') 
    ]
)

In [5]:
## Analyse How long holding the attack will produce a goal



# unified the coordinate system, based on the home team

def get_home_game_id(df):
    """
    :return: the home team's id
    """
    return df[df['ishomegame'] == 1]['teamid'].unique() 


#home_team_id = data[data['ishomegame'] == 1]['teamid'].unique()

def puck_location(row):
    """
    :param row: 
    :return: True if the puck is in the home team's half, False otherwise 
    """
    if row['ishomegame'] == 1 and row['xadjcoord'] >= 0:
        return 1
    elif row['ishomegame'] == 1 and row['xadjcoord'] < 0:
        return 0
    elif row['ishomegame'] != 1 and row['xadjcoord'] <= 0:
        return 1
    elif row['ishomegame'] != 1 and row['xadjcoord'] > 0:
        return 0
    else:
        return math.nan


# 填充缺失值
def fill_team_possession_missing_values(df: pd.DataFrame) -> pd.DataFrame:
    """
    fill the missing values in the team possession column
    :param df: 
    :return: 
    """
    def fill_missing_values(row):
        if  (row['eventname'] != 'faceoff') and row['compiledgametime'] == row['pre_time'] and pd.isna(row['teaminpossession']):
            return [row['pre_teaminpossession'] ,row['pre_currentpossession']]
        # elif row['eventname'] == 'faceoff' and int(row['compiledgametime']) == 0:
        #     return [math.nan, math.nan]
        elif  (row['eventname'] != 'faceoff') and  (pd.isna(row['teaminpossession'])) and row['pre_teaminpossession'] == row['after_teaminpossession']:
            return [row['pre_teaminpossession'] ,row['pre_currentpossession']]
        # elif row['eventname'] == 'faceoff' and (pd.isna(row['teaminpossession'])) and row['pre_teaminpossession'] != row['after_teaminpossession']:
        #     return [row['after_teaminpossession'], row['after_currentpossession']]
        else:
            return [row['teaminpossession'], row['currentpossession']]
               
    df_copy = df.copy()
    df_copy['pre_teaminpossession'] = df_copy['teaminpossession'].shift(1)
    df_copy['pre_currentpossession'] = df_copy['currentpossession'].shift(1)
    df_copy['pre_time'] = df_copy['compiledgametime'].shift(1)
    df_copy['after_teaminpossession'] = df_copy['teaminpossession'].shift(-1)
    df_copy['after_currentpossession'] = df_copy['currentpossession'].shift(-1)
    df_copy[['teaminpossession','currentpossession']] = df_copy[['teaminpossession','currentpossession','eventname','pre_time','compiledgametime','pre_teaminpossession','after_teaminpossession','after_currentpossession','pre_currentpossession']].apply(fill_missing_values,axis=1,result_type='expand')
    df_copy.drop(columns=['pre_teaminpossession', 'pre_currentpossession','pre_time','after_teaminpossession', 'after_currentpossession' ],inplace=True)
    return df_copy
   

def calculate_control_rate(data: pd.DataFrame, gameid: str) -> None:
    """
    calculate the control rate of the home team in the game
    :param data: 
    :param gameid: 
    :return: 
    """
    df = data.copy()
    # get the data for the specific game
    df = df[df['gameid'] == gameid]
    # order the data by time
    df = df.sort_values("compiledgametime")
    # obtain the team in possession
    possession_changes = df[df["teaminpossession"].notna()]
    times = possession_changes["compiledgametime"].values
    teams = possession_changes["teamid"].values.astype(int)
    
    # 计算控球时间分布
    total_time = df["compiledgametime"].max()
    bins = range(0, int(total_time)+10, 10)  # 每10秒为一个区间
    team814_time = [0] * len(bins)
    team885_time = [0] * len(bins)
    
    current_team = None
    prev_time = 0
    
    for time, team in zip(times, teams):
        idx = int(prev_time // 10)
        if current_team is not None:
            duration = time - prev_time
            if current_team == 814:
                team814_time[idx] += duration
            else:
                team885_time[idx] += duration
        current_team = team
        prev_time = time
    
    # 计算控球率
    possession_ratio = [
        (t814 / (t814 + t885)) * 100 if (t814 + t885) > 0 else 50
        for t814, t885 in zip(team814_time, team885_time)
    ]
    
    # 绘制曲线
    plt.figure(figsize=(12, 6))
    plt.plot(bins[:-1], possession_ratio, label="Team 814 Possession (%)")
    plt.xlabel("Game Time (seconds)")
    plt.ylabel("Possession Rate (%)")
    plt.title("Possession Ratio Over Time")
    plt.legend()
    plt.grid(True)
    plt.show()

In [23]:
"""
define the events that are considered as strength events
"""

STRENGTH_EVENTS = ["(eventname =='pass') & (outcome == 'successful') & (inopponentarea == 0)",
                   "(eventname =='lpr') & (outcome == 'successful') & (inopponentarea == 0)",
                   "(eventname =='check') & (inopponentarea == 0) & (outcome == 'successful')",
                   "(eventname =='controlledentryagainst') & (outcome == 'successful')",
                   "(eventname =='dumpin') & (outcome =='successful')",
                   # 只要是shot都算进攻，无论成功或者失败
                   "(eventname =='shot')",
                   # 尝试在对方半场造犯规
                   "(eventname =='penaltydrawn') & (inopponentarea == 0)",
                    # 对方半场争球成功
                   "(eventname =='faceoff') & (inopponentarea == 0) & (outcome == 'successful')",
                   # 对方半场护球成功
                   "(eventname =='puckprotection') & (inopponentarea == 0) & (outcome == 'successful')",
                   
                   "(eventname =='controlledentry')",


                   ]

"""
define the events that are considered as defend events

"""               #成功在己方半场护球成功
DEFEND_EVENTS = {"(eventname =='block') & (outcome == 'successful')  & (inopponentarea == 1)",
                 #成功在己方解围成功
                 "(eventname =='dumpout')  & (inopponentarea == 1) & (outcome == 'successful')",
                 #成功在己方半场合法撞击抢球
                  "(eventname =='check') & (inopponentarea == 1) & (outcome == 'successful')",
                # 尝试在己方半场造犯规
                   "(eventname =='penaltydrawn') & (inopponentarea == 1)",
                 #在己方半场抢球成功
                "(eventname =='lpr') & (outcome == 'successful') & (inopponentarea == 1)",
                 #成功带出
                  "(eventname =='controlledentry')",
                 
                 }

def get_attack_index(df,gameid,events):
    """
    :return: the index of the attacking team
    """
    index = 0
    df_copy = df.copy()
    df_copy = df_copy[df_copy['gameid'] == gameid]
    for action in events:
        condition = df_copy.eval(action)
        result = df_copy[condition]
        index = index + len(result)
        logging.info(f"the number of {action} is {len(result)}.total number of index is {index}")
        
        
        
data = pd.read_csv("Linhac24-25_Sportlogiq.csv",
                   # dtype={'opposingteamgoalieoniceid': 'Int64',
                   #         'teamgoalieoniceid':'Int64',
                   #         'eventid':'Int64',
                   #        'teaminpossession':'Int64',
                   #        'currentpossession':'Int64'}
                   )         
data['inopponentarea'] = data.apply(puck_location, axis = 1)


data.to_csv('hockey.csv')

get_attack_index(data,72393,STRENGTH_EVENTS)

get_attack_index(data,72393,DEFEND_EVENTS)

2025-02-09 21:48:59,206 [简记] the number of (eventname =='pass') & (outcome == 'successful') & (inopponentarea == 0) is 238.total number of index is 238
2025-02-09 21:48:59,210 [简记] the number of (eventname =='lpr') & (outcome == 'successful') & (inopponentarea == 0) is 280.total number of index is 518
2025-02-09 21:48:59,213 [简记] the number of (eventname =='check') & (inopponentarea == 0) & (outcome == 'successful') is 38.total number of index is 556
2025-02-09 21:48:59,216 [简记] the number of (eventname =='controlledentryagainst') & (outcome == 'successful') is 45.total number of index is 601
2025-02-09 21:48:59,219 [简记] the number of (eventname =='dumpin') & (outcome =='successful') is 57.total number of index is 658
2025-02-09 21:48:59,221 [简记] the number of (eventname =='shot') is 106.total number of index is 764
2025-02-09 21:48:59,224 [简记] the number of (eventname =='penaltydrawn') & (inopponentarea == 0) is 2.total number of index is 766
2025-02-09 21:48:59,227 [简记] the number of

In [None]:
#after_prc_miss_df = fill_team_possession_missing_values(data)

