In [None]:
import json
import pandas as pd
import numpy as np
import os
import math

FIELD_LENGTH = 105.0  # unit: meters
FIELD_WIDTH = 68.0  # unit: meters
GOAL_WIDTH = 7.32  # unit: meters
PENALTY_X = 105.0/2-16.5 # left point (unit: meters)
PENALTY_Y = 40.32 # upper point (unit: meters)

statsbomb_match_id = 3894907
statsbomb_event_path = "/home/z_chen/workspace3/laliga/laliga_23/statsbomb/events/3894907.csv"
skillcorner_tracking_path = "/home/z_chen/workspace3/laliga/laliga_23/skillcorner_v2/tracking/1553748.json"
skillcorner_match_path = "/home/z_chen/workspace3/laliga/laliga_23/skillcorner_v2/match/1553748.json"


if not os.path.exists(statsbomb_event_path):
    raise FileNotFoundError(f"Statsbomb event file not found: {statsbomb_event_path}")
if not os.path.exists(skillcorner_tracking_path):
    raise FileNotFoundError(f"Skillcorner tracking file not found: {skillcorner_tracking_path}")
if not os.path.exists(skillcorner_match_path):
    raise FileNotFoundError(f"Skillcorner match file not found: {skillcorner_match_path}")

# load data
events = pd.read_csv(statsbomb_event_path)

with open(skillcorner_tracking_path) as f:
    tracking = json.load(f)

with open(skillcorner_match_path, encoding='utf-8') as f:
    match = json.load(f)

team_name_dict = {
    'UD Almería': 'Almería', 'Real Sociedad': 'Real Sociedad', 'Athletic Club de Bilbao': 'Athletic Club', 
    'Villarreal CF': 'Villarreal', 'RC Celta de Vigo': 'Celta Vigo', 'Getafe CF': 'Getafe', 
    'UD Las Palmas': 'Las Palmas', 'Sevilla FC': 'Sevilla', 'Cadiz CF': 'Cádiz', 
    'Atlético Madrid': 'Atlético Madrid', 'RCD Mallorca': 'Mallorca', 'Valencia CF': 'Valencia', 
    'CA Osasuna': 'Osasuna', 'Girona FC': 'Girona', 'Real Betis Balompié': 'Real Betis', 
    'FC Barcelona': 'Barcelona', 'Deportivo Alavés': 'Deportivo Alavés', 'Granada CF': 'Granada', 
    'Rayo Vallecano': 'Rayo Vallecano', 'Real Madrid CF': 'Real Madrid'
}

home_team_name = team_name_dict[match['home_team']['name']]
away_team_name = team_name_dict[match['away_team']['name']]

team_dict = {
    match['home_team']['id']: {'role': 'home', 'name': home_team_name},
    match['away_team']['id']: {'role': 'away', 'name': away_team_name}
}

def rotate_around_center(x, y, center_x=105/2, center_y=34):

    translated_x = x - center_x
    translated_y = y - center_y

    rotated_x = -translated_x
    rotated_y = -translated_y

    new_x = rotated_x + center_x
    new_y = rotated_y + center_y

    return new_x, new_y

# trackable_objects
trackable_objects = {}
home_count = away_count = 0
home_id_mapping = {}
away_id_mapping = {}
for player in match['players']:
    role = team_dict[player['team_id']]['role']
    position = player['player_role']['name']
    if role == 'home':
        home_id_mapping[home_count] = len(home_id_mapping) + 1 
        trackable_objects[player['trackable_object']] = {
            'name': f"{player['first_name']} {player['last_name']}".strip(),
            'team': team_dict[player['team_id']]['name'],
            'role': role,
            'id': home_count,
            'position': position
        }
        home_count += 1
    elif role == 'away':
        away_id_mapping[away_count] = len(away_id_mapping) + 1 
        trackable_objects[player['trackable_object']] = {
            'name': f"{player['first_name']} {player['last_name']}".strip(),
            'team': team_dict[player['team_id']]['name'],
            'role': role,
            'id': away_count,
            'position': position
        }
        away_count += 1

trackable_objects[match['ball']['trackable_object']] = {'name': 'ball', 'team': 'ball', 'role': 'ball', 'position': 'ball'}
ball_id = match['ball']['trackable_object']


# event DataFrame
event_list = []
for _, event in events.iterrows():
    match_id = statsbomb_match_id
    event_period = event['period']
    event_time = event['timestamp']
    event_minute = event['minute']
    event_second = event['second']
    event_type = event['type']
    event_type_2 = None
    end_x = end_y = None
    pass_type = None
    pass_height = None 
    event_possession_team = event["possession_team"]
    pass_outcome = event.get("pass_outcome")
    pass_possibility = event.get("pass_pass_success_probability")
    pass_cluster_label = event.get("pass_pass_cluster_label")
    pass_cluster_possibility = event.get("pass_pass_cluster_probability")


    if event_type == "Pass":
        end_location = event.get('pass_end_location')
        if isinstance(end_location, str):
            end_location = [float(x) for x in end_location[1:-1].split(",")]
            end_x = round(end_location[0] * (1.05 / 1.2), 2)
            end_y = round(end_location[1] * (68 / 80), 2)
            # end_x, end_y = rotate_around_center(end_x, end_y)
        cross = event.get('pass_cross')
        pass_height = event.get('pass_height')
        pass_type = event.get('pass_type')
        
        event_type_2_mapping = {
            "Corner": "Corner",
            "Cross": "Cross",
            "High Pass": pass_height
        }
        
        if pass_type in event_type_2_mapping:
            event_type_2 = event_type_2_mapping[pass_type]
        elif cross and not pd.isna(cross):
            event_type_2 = "Cross"
        elif pass_height:
            event_type_2 = pass_height

    elif event_type == "Shot":
        event_type_2 = event.get('shot_outcome')

    event_team = event['team']
    home_team = 1 if event_team == home_team_name else 0
    event_player = event['player']
    event_location = event.get('location')

    if isinstance(event_location, str):
        event_location = [float(x) for x in event_location[1:-1].split(",")]
        start_x, start_y = round(event_location[0] * (1.05 / 1.2), 2), round(event_location[1] * (68 / 80), 2)
        # start_x, start_y = rotate_around_center(start_x, start_y)
    else:
        start_x = start_y = None

    time_components = event_time.split(':')
    event_seconds = round(float(time_components[0]) * 3600 + float(time_components[1]) * 60 + float(time_components[2]), 4) 
    if event_period == 2:
        event_seconds += 45 * 60
    elif event_period == 3:
        event_seconds += 90 * 60
    elif event_period == 4:
        event_seconds += (90 + 15) * 60

    event_list.append([match_id, event_period, event_time, event_minute, event_second, event_seconds, event_type, event_type_2, event_team, home_team, event_player, start_x, start_y, end_x, end_y, pass_type, pass_height, pass_outcome, pass_possibility, pass_cluster_label, pass_cluster_possibility,event_possession_team])

event_list_sorted = sorted(event_list, key=lambda x: x[5])

event_columns = [
    "match_id", "period", "time", "minute", "second", "seconds", "event_type", "event_type_2", "team", "home_team", 
    "player", "start_x", "start_y", "end_x", "end_y", "pass_type", "pass_height", "pass_outcome", 
    "pass_possibility", "pass_cluster_label", "pass_cluster_possibility", "possession_team"
]

df_event = pd.DataFrame(event_list_sorted, columns=event_columns)

# reverse the event location
reverse_events = []

# 遍历每一个事件（从第一个到倒数第三个，因为我们需要后两个事件进行判断）
for i in range(len(df_event) - 2):
    pt1 = df_event.loc[i, 'possession_team']
    pt2 = df_event.loc[i + 1, 'possession_team']
    pt3 = df_event.loc[i + 2, 'possession_team']
    
    if pt1 != pt2:
        if pt2 == pt3:
            df_event.loc[i, 'needs_reverse'] = True
        else:
            df_event.loc[i, 'needs_reverse'] = True
            reverse_events.append(df_event.loc[i, 'seconds'])
      # 执行反转
        if df_event.loc[i, 'needs_reverse']:

            if not pd.isna(df_event.loc[i, 'start_x']) and not pd.isna(df_event.loc[i, 'start_y']):
                start_x, start_y = rotate_around_center(
                    df_event.loc[i, 'start_x'], df_event.loc[i, 'start_y']
                )
                df_event.loc[i, 'start_x'] = start_x
                df_event.loc[i, 'start_y'] = start_y
            

            if not pd.isna(df_event.loc[i, 'end_x']) and not pd.isna(df_event.loc[i, 'end_y']):
                end_x, end_y = rotate_around_center(
                    df_event.loc[i, 'end_x'], df_event.loc[i, 'end_y']
                )
                df_event.loc[i, 'end_x'] = end_x
                df_event.loc[i, 'end_y'] = end_y
    else:

        df_event.loc[i, 'needs_reverse'] = False

# 对最后两行标记为不需要反转
df_event.loc[len(df_event) - 2:, 'needs_reverse'] = False



event_output_csv_path = "/home/z_chen/workspace3/test/reverse_events.csv"
df_event.to_csv(event_output_csv_path, index=False)
print(f"事件 DataFrame 已保存为 CSV 文件：{event_output_csv_path}")


# tracking dataframe
home_side = None
home_gk_x = away_gk_x = None
df_tracking_list = []


for frame in tracking:
    home_tracking = [None] * 2 * 23
    away_tracking = [None] * 2 * 23
    home_trackable_ids = [None] * 23  # 存储主队球员的 trackable_object
    away_trackable_ids = [None] * 23  # 存储客队球员的 trackable_object
    ball_x = ball_y = ball_z = None
    tracking_possession = frame.get("possession", {})
    possession_time = frame.get("timestamp", "")
    possession_period = frame.get("period", "")
    if possession_time:
        try:
            time_parts = possession_time.split(':')
            possession_second = round(float(time_parts[0]) * 3600 + float(time_parts[1]) * 60 + float(time_parts[2]),4)
        except (ValueError, IndexError):
            possession_second = 0
    else:
        possession_second = 0
    possession_team = tracking_possession.get("group")
    tracking_data = frame["data"]

    if tracking_data:
        for obj in tracking_data:
            track_obj = trackable_objects.get(obj['trackable_object'])
            if not track_obj:
                continue

    
            if track_obj['role'] == 'home':
                id = track_obj["id"]
                home_tracking[2 * track_obj['id']] = round(obj['x'] + FIELD_LENGTH / 2,2)
                home_tracking[2 * track_obj['id'] + 1] = round(-obj['y'] + FIELD_WIDTH / 2 ,2)
                home_trackable_ids[id] = obj['trackable_object']
            elif track_obj['role'] == 'away':
                id = track_obj["id"]
                away_tracking[2 * track_obj['id']] = round(obj['x'] + FIELD_LENGTH / 2,2)
                away_tracking[2 * track_obj['id'] + 1] = round(-obj['y'] + FIELD_WIDTH / 2 ,2)
                away_trackable_ids[id] = obj['trackable_object']
      
            if track_obj['position'] == "Goalkeeper":
                if track_obj['role'] == 'home' and home_gk_x is None:
                    home_gk_x = obj['x']
                elif track_obj['role'] == 'away' and away_gk_x is None:
                    away_gk_x = obj['x']

 
            if track_obj["role"] == "ball":
                ball_x = round(obj['x'] + FIELD_LENGTH / 2,2)
                ball_y = round(-obj['y'] + FIELD_WIDTH / 2 ,2)
                ball_z = round(obj["z"],2)

        if home_gk_x is not None and away_gk_x is not None:
            home_side = 'left' if home_gk_x < away_gk_x else 'right'


    df_tracking_list.append([possession_second, possession_period, possession_team, *home_tracking, *away_tracking, *home_trackable_ids, *away_trackable_ids, home_side, ball_x, ball_y, ball_z])



home_tracking_columns = []
away_tracking_columns = []
home_trackable_columns = []
away_trackable_columns = []

for i in range(1, 24):
    home_tracking_columns.extend([f"h{i}_x", f"h{i}_y"])
    away_tracking_columns.extend([f"a{i}_x", f"a{i}_y"])
    home_trackable_columns.append(f"h{i}_trackable_id")
    away_trackable_columns.append(f"a{i}_trackable_id")

columns1 = ["seconds", "period","possession_team"] + home_tracking_columns + away_tracking_columns + home_trackable_columns + away_trackable_columns + ["home_side", "ball_x", "ball_y", "ball_z"]

df_tracking_list_filtered = [x for x in df_tracking_list if x[0] is not None and x[1] is not None]
df_tracking_list_sorted = sorted(df_tracking_list_filtered, key=lambda x: (x[1], x[0]))

df_tracking = pd.DataFrame(df_tracking_list_sorted, columns=columns1)
df_tracking["seconds"] = df_tracking["seconds"].round(1)

tracking_output_csv_path = "/home/z_chen/workspace3/test/tracking.csv"
df_tracking.to_csv(tracking_output_csv_path, index=False)
print(f"追踪数据 DataFrame 已保存为 CSV 文件：{tracking_output_csv_path}")


In [None]:
print(home_id_mapping)
print(away_id_mapping)

In [None]:
from rapidfuzz import process, fuzz
import pandas as pd

# 去掉中间名
def preprocess_name(name):
    if isinstance(name, str):
        parts = name.split()  # 按空格拆分
        if len(parts) > 1:
            # 只保留第一个和最后一个名字
            return parts[0] + parts[-1]
        return name
    return name

# 规范化名字
def normalize_name(name):
    if isinstance(name, str):
        name = name.lower()
        name = name.replace(" ", "").replace("-", "").replace("_", "").replace(".", "")
        replacements = {
            "é": "e", "è": "e", "ê": "e", "ë": "e",
            "á": "a", "à": "a", "â": "a", "ä": "a",
            "í": "i", "ì": "i", "î": "i", "ï": "i",
            "ó": "o", "ò": "o", "ô": "o", "ö": "o",
            "ú": "u", "ù": "u", "û": "u", "ü": "u",
            "ç": "c", "ñ": "n", "ß": "ss"
        }
        for original, replacement in replacements.items():
            name = name.replace(original, replacement)
        name = name.strip()
        return name
    return name

df_event["preprocessed_name"] = df_event["player"].apply(preprocess_name)
df_event["normalized_name"] = df_event["preprocessed_name"].apply(normalize_name)
trackable_names = [value["name"] for key, value in trackable_objects.items()]
processed_trackable_names = [normalize_name(preprocess_name(name)) for name in trackable_names]

def match_names(event_name, trackable_names):
    result = process.extractOne(event_name, trackable_names, scorer=fuzz.ratio)
    if result:
        match, score = result[0], result[1]  # 解包匹配项和分数
        return match if score > 80 else None  # 设定相似度阈值
    return None

df_event["matched_name"] = df_event["normalized_name"].apply(lambda x: match_names(x, processed_trackable_names))


unique_df = df_event[["player", "matched_name"]].drop_duplicates()
print(unique_df)


In [None]:
trackable_names_set = set(processed_trackable_names)
matched_names_set = set(df_event["matched_name"].dropna().unique())
# 检查是否完全一致
if matched_names_set == trackable_names_set:
    print("df_event 的 matched_name 和 trackable_names 完全一致！")
else:
    print("仅在 matched_name 中的名字：", matched_names_set - trackable_names_set)
    print("仅在 trackable_names 中的名字：", trackable_names_set - matched_names_set)
    print(len(matched_names_set))
    print(len(trackable_names_set))



In [None]:
# 创建一个 trackable_object 到标准化名字的映射
trackable_name_map = {tid: normalize_name(preprocess_name(info['name'])) for tid, info in trackable_objects.items()}
print(trackable_name_map)
# 为每个球员生成名字列
for i in range(1, 24):
    # 处理主队球员名字
    trackable_column = f"h{i}_trackable_id"
    name_column = f"h{i}_name"
    df_tracking[name_column] = df_tracking[trackable_column].map(trackable_name_map)
    
    # 处理客队球员名字
    trackable_column = f"a{i}_trackable_id"
    name_column = f"a{i}_name"
    df_tracking[name_column] = df_tracking[trackable_column].map(trackable_name_map)

# 检查更新后的 DataFrame
print(df_tracking.iloc[100:105])


In [None]:
id_to_info = {
    tid: {
        "trackable_object": tid,
        "name": info["name"],
        "normalized_name": normalize_name(preprocess_name(info["name"])),
        "team": info["role"],
        "id": (info.get("id") + 1) if info.get("id") is not None else None
    }
    for tid, info in trackable_objects.items()
}
# 检查关系字典
print("id_to_info sample:", list(id_to_info.items())[:5])

matching_id111 = [
    info["id"] for tid, info in id_to_info.items()]
print(matching_id111)


In [None]:
df_tracking.columns

In [None]:
def calculate_acceleration_and_max_seconds(data, player_position,have_name):
    """
    Calculate acceleration and find the timestamp with the maximum acceleration.

    Parameters:
    - data: List of lists containing [timestamp, x, y, z] coordinates of the ball.
    - player_position: List of lists containing player positions and corresponding timestamps.

    Returns:
    - max_acceleration_seconds: The timestamp at which maximum acceleration occurs.
    - max_acceleration: The maximum acceleration value.
    """
    # Extract seconds, x, y, z coordinates
    m_seconds = [entry[0] for entry in data]
    m_x = np.array([entry[1] for entry in data])
    m_y = np.array([entry[2] for entry in data])
    m_z = np.array([entry[3] for entry in data])

    # Calculate differences
    delta_x = np.diff(m_x)
    delta_y = np.diff(m_y)
    delta_z = np.diff(m_z)
    delta_t = np.diff(m_seconds)

    # Avoid division by zero
    delta_t[delta_t == 0] = 1e-6  # Use a small epsilon value to avoid NaN

    # Calculate velocity components and magnitude
    vx = delta_x / delta_t
    vy = delta_y / delta_t
    vz = delta_z / delta_t
    velocity_magnitude = np.sqrt(vx**2 + vy**2 + vz**2)

    # Calculate acceleration
    delta_vx = np.diff(vx) / delta_t[1:]  # Acceleration in x direction
    delta_vy = np.diff(vy) / delta_t[1:]  # Acceleration in y direction
    delta_vz = np.diff(vz) / delta_t[1:]  # Acceleration in z direction
    acceleration_magnitude = np.sqrt(delta_vx**2 + delta_vy**2 + delta_vz**2)

    max_acceleration = None
    max_acceleration_seconds = None

    # Calculate distances between ball and player
    for index, m_second in enumerate(m_seconds):
        px = m_x[index]
        py = m_y[index]
        distances = []
        for p_frame in player_position:
            p_seconds = p_frame[1]
            p_position = p_frame[0]
            if p_seconds == m_second:
                if have_name:
                    kickoff_x = p_position[0]
                    kickoff_y = p_position[1]
                    distance = np.sqrt((px - kickoff_x)**2 + (py - kickoff_y)**2)
                    distances.append(distance)

                else:
                    for p in p_position:
                        if p[0] is not None and p[1] is not None and px is not None and py is not None:
                            kickoff_x = p[0]
                            kickoff_y = p[1]
                            distance = np.sqrt((px - kickoff_x)**2 + (py - kickoff_y)**2)
                            distances.append(distance)

        distances = np.array(distances)  # Convert to numpy array
        valid_indices = np.where((distances <= 2) & (~np.isnan(distances)))[0]
        if len(valid_indices) > 0 and len(acceleration_magnitude) > 0:
            min_index = valid_indices[np.argmin(distances[valid_indices])]  # 找出最小值的索引
            min_distance = distances[min_index]
            print(f"min dinstance:{min_distance} for {m_second}")
            max_acceleration_index_in_valid = np.argmax(acceleration_magnitude)
            max_acceleration = acceleration_magnitude[max_acceleration_index_in_valid]
            max_acceleration_seconds = m_seconds[max_acceleration_index_in_valid + 2]  # +2 due to second order difference
            break

    return max_acceleration_seconds if max_acceleration_seconds is not None else 0.0, max_acceleration if max_acceleration is not None else 0.0, distances


filtered = df_event[df_event["pass_type"] == "Kick Off"]
filter1 = filtered[filtered["period"]== 1]
filter2 = filtered[filtered["period"]== 2]

home_or_away1 = filter1["home_team"].iloc[0]
home_or_away2 = filter2["home_team"].iloc[0]
print(f"hometeam for first half : {home_or_away1}")
print(f"hometeam for second half : {home_or_away2}")
ball_frames = []

for _, ball_frame in df_tracking.iterrows():
    if np.isnan(ball_frame['ball_x']) or np.isnan(ball_frame['ball_y']):
        continue
    b_seconds = ball_frame["seconds"]
    b_x = ball_frame["ball_x"]
    b_y = ball_frame["ball_y"]
    b_z = ball_frame["ball_z"]
    ball_frames.append([b_seconds, b_x, b_y, b_z])
    if b_seconds > 5:
        break


ball_frames_secondhalf = []
# 筛选 period 为 2 的所有行
filtered_rows = df_tracking[df_tracking["period"]==2]

for _, second_frame in filtered_rows.iterrows():
    if np.isnan(second_frame['ball_x']) or np.isnan(second_frame['ball_y']):
        continue
    sb_seconds = second_frame["seconds"]
    sb_x = second_frame["ball_x"]
    sb_y = second_frame["ball_y"]
    sb_z = second_frame["ball_z"]
    ball_frames_secondhalf.append([sb_seconds,sb_x,sb_y,sb_z,possession_team])
    # if sb_seconds > 45*60+5:
    if sb_seconds > 45*60+10:
        break


print(f"ball frame first half: {ball_frames}")
print(f"ball frame second half: {ball_frames_secondhalf}")


kickoff_names1 = filter1["matched_name"].iloc[0]
kickoff_names2= filter2["matched_name"].iloc[0]
print(f"event kifkoff player name :{kickoff_names1}")
print(f"event kifkoff player name :{kickoff_names2}")

def get_kickoff_data(kickoff_name, home_or_away, df_tracking, id_to_info, period):

    if not kickoff_name or kickoff_name not in {info["normalized_name"] for info in id_to_info.values()}:
        return []

    # 获取球员对应的 trackable_object ID
    matching_id = [
        info["id"] for tid, info in id_to_info.items() if info["normalized_name"] == kickoff_name
    ]
    matching_name = [
        info["normalized_name"] for tid, info in id_to_info.items() if info["normalized_name"] == kickoff_name
    ]
    matching_object = [
        info["trackable_object"] for tid, info in id_to_info.items() if info["normalized_name"] == kickoff_name
    ]
    print(f"matching_name: {matching_name}")
    print(f"matching_object: {matching_object}")
    print(f"matching_id: {matching_id}")
    
    if not matching_id:
        return []
    
    matching_id_value = matching_id[0]
    
    # 根据主客队选择列名
    prefix = "h" if home_or_away == 1 else "a"
    x_column = f"{prefix}{int(matching_id_value)}_x"
    y_column = f"{prefix}{int(matching_id_value)}_y"
    # 检查列是否存在，并提取数据
    if x_column in df_tracking.columns and y_column in df_tracking.columns:
        matching_rows = df_tracking[df_tracking["period"]== period][[x_column, y_column, "seconds"]].dropna()
        return [
            [(row[x_column], row[y_column]), row["seconds"]]
            for _, row in matching_rows.iterrows()
        ]
    else:
        return []
    
kickoff_data1 = get_kickoff_data(kickoff_names1, home_or_away1, df_tracking, id_to_info,1)
kickoff_data2 = get_kickoff_data(kickoff_names2, home_or_away2, df_tracking, id_to_info,2)

print(f" kick off data1 : {kickoff_data1}")
print(f" kick off data2 : {kickoff_data2}")


max_seconds1,max_acceleration1, distances_data1 = calculate_acceleration_and_max_seconds(ball_frames,kickoff_data1,True)
max_seconds2,max_acceleration2, distances_data2= calculate_acceleration_and_max_seconds(ball_frames_secondhalf,kickoff_data2,True)  

h_columns_x = [col for col in df_tracking.columns if col.startswith("h") and col.endswith("_x")]
h_columns_y = [col.replace("_x", "_y") for col in h_columns_x]

h_players = []
for _, h_row in df_tracking.iterrows():
    if all(pd.isna(h_row[x]) for x in h_columns_x):
        continue
    h_player = [(h_row[x], h_row[y]) for x, y in zip(h_columns_x, h_columns_y)]
    h_seconds = h_row["seconds"]
    h_players.append([h_player, h_seconds])

a_columns_x = [col for col in df_tracking.columns if col.startswith("a") and col.endswith("_x")]
a_columns_y = [col.replace("_x", "_y") for col in a_columns_x]

a_players = []
for _, a_row in df_tracking.iterrows():
    if all(pd.isna(a_row[x]) for x in a_columns_x):
        continue
    a_player = [(a_row[x], a_row[y]) for x, y in zip(a_columns_x, a_columns_y)]
    a_seconds = a_row["seconds"]
    a_players.append([a_player, a_seconds])

data1_flag = all(distance_data1 >= 10 for distance_data1 in distances_data1) or not kickoff_data1
data2_flag = all(distance_data2 >= 10 for distance_data2 in distances_data2) or not kickoff_data2

# Determine home or away and calculate max seconds
if data1_flag:
    if home_or_away1==1:
        max_seconds1,max_acceleration1,distances_1 = calculate_acceleration_and_max_seconds(ball_frames,h_players,False)
        print(f"distances_1 : {distances_1}")
    else:
        max_seconds1,max_acceleration1,distances_1= calculate_acceleration_and_max_seconds(ball_frames,a_players,False)
        print(f"distances_1 : {distances_1}")
if data2_flag:
    if home_or_away2==1:
        max_seconds2,max_acceleration2,distances_2= calculate_acceleration_and_max_seconds(ball_frames_secondhalf,h_players,False)
        print(f"distances_2 : {distances_2}")
    else:
        max_seconds2,max_acceleration2,distances_2= calculate_acceleration_and_max_seconds(ball_frames_secondhalf,a_players,False)
        print(f"distances_2 : {distances_2}")

print(f"data1_flag: {data1_flag}")
print(f"data2_flag: {data2_flag}")
print(f" max seconds1 : {max_seconds1}")
print(f" max acceleration1 : {max_acceleration1}")
print(f" max seconds2 : {max_seconds2}")
print(f" max acceleration2 : {max_acceleration2}")
print(f" distances_data1 : {distances_data1}")
print(f" distances_data2 : {distances_data2}")



In [None]:
print(filter2.iloc[0])

In [None]:
print(max_seconds2)

In [None]:
adjusted_max_seconds2 = round(max_seconds2 - 45*60,1)
print(adjusted_max_seconds2)
print(df_tracking.columns)

In [None]:
# 保存原来的 seconds 列到 raw_seconds
df_tracking["raw_seconds"] = df_tracking["seconds"]

# 构造新的 adjusted_seconds 列
df_tracking["adjusted_seconds"] = df_tracking["seconds"]
df_tracking.loc[df_tracking["period"] == 1, "adjusted_seconds"] -= max_seconds1
df_tracking.loc[df_tracking["period"] == 2, "adjusted_seconds"] -= adjusted_max_seconds2
df_tracking.loc[df_tracking["period"] == 1, "adjusted_seconds"] = round(df_tracking.loc[df_tracking["period"] == 1, "adjusted_seconds"],1)
df_tracking.loc[df_tracking["period"] == 2, "adjusted_seconds"] = round(df_tracking.loc[df_tracking["period"] == 2, "adjusted_seconds"],1)
# # 用 adjusted_seconds 覆盖 seconds
# df_tracking["seconds"] = df_tracking["adjusted_seconds"]
# # 删除 adjusted_seconds 列（可选）
# df_tracking.drop(columns=["adjusted_seconds"], inplace=True)

print(df_tracking[df_tracking["period"]==1]["adjusted_seconds"].iloc[0])
print(df_tracking[df_tracking["period"]==1]["raw_seconds"].iloc[0])
print(df_tracking[df_tracking["period"]==2]["adjusted_seconds"].iloc[0])
print(df_tracking[df_tracking["period"]==2]["raw_seconds"].iloc[0])


In [15]:
firsthalf_data = df_tracking[df_tracking['period'] == 1]
firsthalf_data.to_csv('/home/z_chen/workspace3/test/dftracking_period_1.csv', index=False)

second_data = df_tracking[df_tracking['period'] == 2]
second_data.to_csv('/home/z_chen/workspace3/test/dftracking_period_2.csv', index=False)


In [16]:
def distance_score(pass_event, player_period, ball_velocity_period):

    event_second = pass_event[0]    
    event_player_x = pass_event[1]
    event_player_y = pass_event[2]
    event_player_side = pass_event[3]

    ball_data_dict = {
        "second": ball_velocity_period[0], 
        "x": ball_velocity_period[1], 
        "y": ball_velocity_period[2]   
    }

    tracking_event_frame = [] # 存了该second下 pass和所有tracking球员的距离与时间
    ball_distances_frame = [] #该second下所有球员和ball的距离

    tracking_second = player_period[0]    
    tracking_player_x = player_period[1]        
    tracking_player_y = player_period[2]                                                                                    
    tracking_player_id = player_period[3]

    tracking_event_distance = ((event_player_x - tracking_player_x) ** 2 + (event_player_y - tracking_player_y) ** 2) ** 0.5
    tracking_event_frame.append((tracking_second, tracking_event_distance, tracking_player_id))

    ball_distance = ((ball_data_dict["x"] - event_player_x) ** 2 + (ball_data_dict["y"] - event_player_y) ** 2) ** 0.5
    ball_distances_frame.append((tracking_second, ball_distance, tracking_player_id))


    # 计算每个球员的综合得分
    scores = []
    weight_event = 0.5
    weight_ball = 0.5

    for i in range(len(tracking_event_frame)):
        # 获取对应的时间和ID
        tracking_time_event, event_distance, player_id_event = tracking_event_frame[i]
        tracking_time_ball, ball_distance, player_id_ball = ball_distances_frame[i]

        if tracking_time_event != tracking_time_ball or player_id_event != player_id_ball:
            continue

        # 计算综合得分
        combined_score = weight_event * (1 / (event_distance + 1e-6)) + weight_ball * (1 / (ball_distance + 1e-6))
        scores.append((tracking_time_event, player_id_event, combined_score))

        # 找出最高分
        if scores:
            best_score = max(scores, key=lambda x: x[2])
            return best_score



In [17]:
from tqdm import tqdm

# Filter pass events
pass_events = df_event[(df_event['event_type'] == 'Pass') | (df_event['event_type'] == 'Ball Receipt*')]
pass_events_seconds = pass_events["seconds"]

In [None]:

def get_window_of_frames_around(action_second, df_tracking, ta):
    """
    Gets a window of frames around the given action_second using vectorized operations.
    """
    return df_tracking[
        (df_tracking["adjusted_seconds"].between(action_second - ta, action_second + ta)) &
        df_tracking["period"].notna()
    ]

# Simplify main loop
ta = 5
windows=[]
for action_second in pass_events_seconds:
    if action_second is not None and not isinstance(action_second, pd.DataFrame):
        window = get_window_of_frames_around(action_second, df_tracking, ta)
        windows.append([action_second,window])
    elif action_second is not None and isinstance(action_second, pd.DataFrame):
        action_second = action_second.iloc[0]
        print(f"df event second: {action_second}")
        window = get_window_of_frames_around(action_second, df_tracking, ta)
        windows.append([action_second,window])
    else:
        print(f"No window for the event second: {action_second}")
print(windows)


In [None]:
for window in windows:
    print(window[0])

In [None]:
non_empty_columns = windows[10][1].dropna(axis=1, how="all")  # 删除所有值均为空的列
print(non_empty_columns)
adjusted_seconds = windows[10][1]["adjusted_seconds"]
value_min = adjusted_seconds.min()
value_max = adjusted_seconds.max()
print(f"Range of adjusted_seconds: min={value_min}, max={value_max}")

In [None]:
def calculate_distance1(df, x_column, y_column, ref_x, ref_y):
    valid_mask = df[x_column].notna() & df[y_column].notna()
    distances = pd.Series(index=df.index, dtype=float)
    distances[valid_mask] = np.sqrt(
        (df.loc[valid_mask, x_column] - ref_x) ** 2 +
        (df.loc[valid_mask, y_column] - ref_y) ** 2
    )
    return distances

def distance_score1(event_pass_x, event_pass_y, df_window, event_hometeam, window_id_value):
    results = []  # 清空结果

    df_window["ball_event_distance"] = calculate_distance(df_window, "ball_x", "ball_y", event_pass_x, event_pass_y)
    print(f"ball distances : {df_window['ball_event_distance']}")

    if event_hometeam == 1:
        groups_prefix = "h"
    else:
        groups_prefix = "a"

    group_columns = [col for col in df_window.columns if col.startswith(groups_prefix) and ("_x" in col or "_y" in col)]
    groups = sorted(set(col.split("_")[0] for col in group_columns))

    for group in groups:
        x_column = f"{group}_x"
        y_column = f"{group}_y"
        if x_column in df_window.columns and y_column in df_window.columns:
            distance_column = f"{group}_distance"
            ball_column = f"{group}_ball_distance"
            score_column = f"{group}_score"
            # 创建布尔掩码，确保当前行所有涉及的列都非零
            non_zero_mask = (
                (df_window[x_column] != 0) &
                (df_window[y_column] != 0) &
                (df_window["ball_x"] != 0) &
                (df_window["ball_y"] != 0)
            )

            # 对满足条件的行计算
            df_window.loc[non_zero_mask, ball_column] = np.sqrt(
                (df_window.loc[non_zero_mask, x_column] - df_window.loc[non_zero_mask, "ball_x"]) ** 2 +
                (df_window.loc[non_zero_mask, y_column] - df_window.loc[non_zero_mask, "ball_y"]) ** 2
            )
            df_window[distance_column] = calculate_distance(df_window, x_column, y_column, event_pass_x, event_pass_y)

            valid_mask = (
                df_window["ball_event_distance"].notna() &
                df_window[distance_column].notna() &
                df_window[ball_column].notna()
            )

            # 仅对满足条件的行进行计算
            df_window.loc[valid_mask, score_column] = (
                (1 - (df_window.loc[valid_mask, "ball_event_distance"] / 5)) * 25 +
                (1 - (df_window.loc[valid_mask, distance_column] / 5)) * 25 +
                (1 - (df_window.loc[valid_mask, ball_column] / 5)) * 50
            )

            if not df_window[score_column].isnull().all():
                max_idx = df_window[score_column].idxmax()
                max_score = df_window.loc[max_idx, score_column]
                max_time = df_window.loc[max_idx, "adjusted_seconds"]

                results.append({
                    "group": group,
                    "adjusted_seconds": max_time,
                    "score": max_score
                })

    if results:
        best_result = max(results, key=lambda x: x["score"])
        return best_result
    else:
        return None

# Example usage
dfff_window = windows[1]
dff_window = dfff_window[1]
dff_window = pd.DataFrame(dff_window)
e_second = dfff_window[0]  # 提取第一行第一个值
pass_x = df_event[df_event["seconds"] == e_second]["start_x"].iloc[0]
pass_y = df_event[df_event["seconds"] == e_second]["start_y"].iloc[0]
e_team = df_event[df_event["seconds"] == e_second]["home_team"].iloc[0]

print("e_second:", e_second)
print("pass_x:", pass_x)
print("pass_y:", pass_y)
print(e_team)

window_score = distance_score1(pass_x, pass_y, dff_window, e_team, None)
print(window_score)


In [None]:

# 计算距离的函数
def calculate_distance(df, x_column, y_column, ref_x, ref_y):
    # 确保列为数值型并填充 NaN
    df[x_column] = pd.to_numeric(df[x_column], errors="coerce").fillna(0)
    df[y_column] = pd.to_numeric(df[y_column], errors="coerce").fillna(0)

    # 矢量化计算距离
    return np.sqrt(
        (df[x_column] - ref_x) ** 2 +
        (df[y_column] - ref_y) ** 2
    )

# 计算得分的主函数
def distance_score(event_pass_x, event_pass_y, df_window, event_hometeam):
    results = []

    # 计算 ball_event_distance
    df_window["ball_event_distance"] = calculate_distance(df_window, "ball_x", "ball_y", event_pass_x, event_pass_y)

    # 根据队伍确定前缀
    groups_prefix = "h" if event_hometeam == 1 else "a"
    group_columns = [col for col in df_window.columns if col.startswith(groups_prefix) and ("_x" in col or "_y" in col)]
    groups = sorted(set(col.split("_")[0] for col in group_columns))

    # 存储计算结果的临时字典
    calculated_columns = {}

    # 遍历每个 group
    for group in groups:
        x_column, y_column = f"{group}_x", f"{group}_y"
        if x_column in df_window.columns and y_column in df_window.columns:
            # 距离列和得分列名
            distance_column = f"{group}_distance"
            ball_column = f"{group}_ball_distance"
            score_column = f"{group}_score"

            # 确保列为数值型并填充空值
            for col in [x_column, y_column]:
                df_window[col] = pd.to_numeric(df_window[col], errors="coerce").fillna(0)

            # 计算 group 距离
            calculated_columns[distance_column] = calculate_distance(df_window, x_column, y_column, event_pass_x, event_pass_y)

            # 计算 ball_column 距离
            calculated_columns[ball_column] = calculate_distance(df_window, x_column, y_column, df_window["ball_x"], df_window["ball_y"])

            # 计算得分，只对所有列非空的行
            valid_mask = (
                df_window["ball_event_distance"].notna() &
                calculated_columns[distance_column].notna() &
                calculated_columns[ball_column].notna()
            )

            score = (
                (1 - (df_window.loc[valid_mask, "ball_event_distance"] / 5)) * 25 +
                (1 - (calculated_columns[distance_column].loc[valid_mask] / 5)) * 25 +
                (1 - (calculated_columns[ball_column].loc[valid_mask] / 5)) * 50
            )

            # 存储得分
            calculated_columns[score_column] = score

            # 找出得分最高的行
            if not score.isnull().all():
                max_idx = score.idxmax()
                results.append({
                    "group": group,
                    "adjusted_seconds": df_window.loc[max_idx, "adjusted_seconds"],
                    "score": score[max_idx]
                })

    # 批量添加列到 DataFrame
    df_window = pd.concat([df_window, pd.DataFrame(calculated_columns)], axis=1)

    # 返回得分最高的结果
    return max(results, key=lambda x: x["score"]) if results else None

# 示例代码
try:
    # 获取指定窗口和事件
    dfff_window = windows[2]  # 修改索引以选择具体窗口
    dff_window = pd.DataFrame(dfff_window[1])  # 转为 DataFrame
    e_second = dfff_window[0]  # 提取事件时间

    # 检查事件是否存在于 df_event
    if e_second not in df_event["seconds"].values:
        raise ValueError(f"Event with seconds={e_second} not found in df_event.")

    # 提取事件信息
    pass_x = df_event.loc[df_event["seconds"] == e_second, "start_x"].iloc[0]
    pass_y = df_event.loc[df_event["seconds"] == e_second, "start_y"].iloc[0]
    e_team = df_event.loc[df_event["seconds"] == e_second, "home_team"].iloc[0]

    # 打印事件信息
    print("e_second:", e_second)
    print("pass_x:", pass_x)
    print("pass_y:", pass_y)
    print("event team:", e_team)

    # 计算窗口得分
    window_score = distance_score(pass_x, pass_y, dff_window, e_team)
    print("Window score:", window_score)

except IndexError as ie:
    print("IndexError: Please ensure that 'windows' and 'df_event' contain the expected structure.", ie)
except KeyError as ke:
    print("KeyError: Ensure all required columns exist in 'df_event' and 'dff_window'.", ke)
except ValueError as ve:
    print("ValueError:", ve)
except Exception as e:
    print("An unexpected error occurred:", e)



In [None]:
results = []  # 存储所有窗口的计算结果

try:
    for dfff_window in windows:
        # 转为 DataFrame
        dff_window = pd.DataFrame(dfff_window[1])
        e_second = dfff_window[0]  # 提取事件时间

        # 检查事件是否存在于 df_event
        if e_second not in df_event["seconds"].values:
            raise ValueError(f"Event with seconds={e_second} not found in df_event.")

        # 提取事件信息
        pass_x = df_event.loc[df_event["seconds"] == e_second, "start_x"].iloc[0]
        pass_y = df_event.loc[df_event["seconds"] == e_second, "start_y"].iloc[0]
        e_team = df_event.loc[df_event["seconds"] == e_second, "home_team"].iloc[0]

        # 打印事件信息
        print(f"Processing window with e_second: {e_second}")
        print("pass_x:", pass_x, "pass_y:", pass_y, "event team:", e_team)

        # 计算窗口得分
        window_score = distance_score(pass_x, pass_y, dff_window, e_team)

        # 将结果加入到 results 中
        results.append({"e_seconds": e_second, "score": window_score})

except IndexError as ie:
    print("IndexError: Please ensure that 'windows' and 'df_event' contain the expected structure.", ie)
except KeyError as ke:
    print("KeyError: Ensure all required columns exist in 'df_event' and 'dff_window'.", ke)
except ValueError as ve:
    print("ValueError:", ve)
except Exception as e:
    print("An unexpected error occurred:", e)

# 打印所有结果
print("\nResults:")
for result in results:
    print(result)

In [None]:
results_df = pd.DataFrame(results)

# 打印所有结果
print("\nResults DataFrame:")
print(results_df)

In [None]:
import matplotlib.pyplot as plt

# 提取嵌套字典中的 'score' 值
results_df["score_value"] = results_df["score"].apply(lambda x: x["score"] if isinstance(x, dict) else None)

# 绘制图表
plt.figure(figsize=(12, 6))
plt.plot(results_df["e_seconds"], results_df["score_value"], marker='o', linestyle='None', label='Score Points')
plt.title("Score vs E_Seconds", fontsize=14)
plt.xlabel("E_Seconds", fontsize=12)
plt.ylabel("Score", fontsize=12)
plt.grid(True, linestyle='--', alpha=0.6)
plt.legend()
plt.tight_layout()
plt.show()



In [None]:

# 提取嵌套字典中的 'score' 值
results_df["score_value"] = results_df["score"].apply(lambda x: x["score"] if isinstance(x, dict) else None)

# 筛选 e_seconds 在 [0, 100] 范围内的数据
filtered_df = results_df[(results_df["score_value"] >= 0)]

# 绘制图表
plt.figure(figsize=(12, 6))
plt.plot(filtered_df["e_seconds"], filtered_df["score_value"], marker='o', linestyle= "None", label='Score Points between 0-100')
plt.title("Score vs E_Seconds (Zoomed: 0-100)", fontsize=14)
plt.xlabel("E_Seconds", fontsize=12)
plt.ylabel("Score", fontsize=12)
plt.grid(True, linestyle='--', alpha=0.6)
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
# 计算总数据点数量
total_count = len(results_df)

# 计算分数大于 60 分的数据点数量
above_60_count = results_df[results_df["score_value"] > 60].shape[0]

# 计算比例
proportion_above_60 = above_60_count / total_count if total_count > 0 else 0

# 显示比例
print(f"分数大于 60 分的比例: {proportion_above_60:.2%}")

In [None]:
# 筛选 score_value 大于 60 的数据
filtered_results_1 = results_df[results_df["score_value"] > 60]

# 初始化结果列表
merged_rows = []

# 遍历筛选结果
for _, row in filtered_results_1.iterrows():
    e_seconds = row["e_seconds"]
    adjusted_seconds = row["score"]["adjusted_seconds"] if isinstance(row["score"], dict) else None

    # 从 df_event 中找到 e_seconds 对应的行
    event_row = df_event[df_event["seconds"] == e_seconds]

    # 从 df_tracking 中找到 adjusted_seconds 对应的行
    tracking_row = df_tracking[df_tracking["seconds"] == adjusted_seconds]

    # 如果两者都找到，则合并
    if not event_row.empty and not tracking_row.empty:
        merged_row = {
            "e_seconds": e_seconds,
            "adjusted_seconds": adjusted_seconds,
            **event_row.iloc[0].to_dict(),  # 展开 event_row 数据
            **tracking_row.iloc[0].to_dict(),  # 展开 tracking_row 数据
        }
        merged_rows.append(merged_row)

# 转换为 DataFrame
merged_df = pd.DataFrame(merged_rows)

# 显示结果
print("Merged DataFrame:")
print(merged_df)
print(merged_df.columns)

In [None]:
# 需要保留的列
columns_to_keep = [
    "e_seconds", "adjusted_seconds", "match_id", "period", "time", "minute", "second",
    "event_type", "event_type_2", "team", "home_team", "player", "start_x", "start_y",
    "end_x", "end_y", "pass_type", "pass_height", "pass_outcome", "possession_team",
    "needs_reverse", "preprocessed_name", "normalized_name", "matched_name",
    # 所有的 h 列的 x y 和 a 列的 x y
    *[f"h{i}_x" for i in range(1, 24)], *[f"h{i}_y" for i in range(1, 24)],
    *[f"a{i}_x" for i in range(1, 24)], *[f"a{i}_y" for i in range(1, 24)],
    "home_side", "ball_x", "ball_y", "ball_z", "raw_seconds"
]
output_file = "/home/z_chen/workspace3/test/simplified_results.csv"

# 保留指定列
simplified_df = merged_df[columns_to_keep]
simplified_df.to_csv(output_file, index=False)

simplified_df.to_csv(output_file, index=False)
print(f"simplified DataFrame saved to {output_file}")



In [None]:
def compute_offsets(df_m):
    df_m.loc[:, 'offset'] = round(merged_df['adjusted_seconds'] - merged_df['e_seconds'], 1)
    return df_m

def align_events(df_event11, df_tracking11, df_m):
    df_m = compute_offsets(df_m)
    def find_offset(event_time):
        applicable_rows = df_m[df_m['e_seconds'] >= event_time]
        if not applicable_rows.empty:
            return applicable_rows.iloc[0]['offset']
        return df_m.iloc[-1]['offset']

    # 应用偏移到每个事件
    df_event11['adjusted_seconds'] = df_event11['seconds'].apply(
        lambda x: x + find_offset(x)
    )

    # 对 df_event 和 df_tracking 进行对齐
    df_event11['adjusted_seconds_rounded'] = df_event11['adjusted_seconds'].round(1)
    aligned_df = pd.merge(
        df_event11,
        df_tracking11,
        left_on='adjusted_seconds_rounded',
        right_on='adjusted_seconds',
        how='left',
        suffixes=('_event', '_tracking')
    )

    return aligned_df


aligned_df = align_events(df_event, df_tracking, simplified_df)

save_aligned = "/home/z_chen/workspace3/test/aligned_df.csv"
aligned_df.to_csv(save_aligned, index=False)
print(f"aligned DataFrame saved to {save_aligned}")


In [None]:
for row in range(10):
    # 过滤条件
    filtered_aligned_df = aligned_df[aligned_df["event_type"] == "Ball Receipt*"]
    # 使用 iloc 访问行数据
    print(filtered_aligned_df.iloc[row][["ball_x", "ball_y"]])
    print(filtered_aligned_df.iloc[row][["start_x", "start_y"]])
    print(filtered_aligned_df.iloc[row]["needs_reverse"])