In [1]:
import pandas as pd
import numpy as np
import os
from datetime import timedelta
from correlate_event_to_game_schedule import correlate_event_to_game_schedule
from notes_filter import notes_filter

#--------------------------User Inputs---------------------------------
movement_data_file_path = '/Users/nya/NBA project/NBA Project/01/clean datasets/movement_data_cleaned.csv'
schedule_file_path = '/Users/nya/NBA project/NBA Project/02/processed datasets/teams_schedule_processed.csv'

#-------------------------Load Files-----------------------------------
inactive_list_df = pd.read_csv(movement_data_file_path, parse_dates=['Date'])
team_schedules_df = pd.read_csv(schedule_file_path, parse_dates=['Date'])

#-----Processing - Section 1: Count Number of Missed Games--------
print('Counting number of missed games')

placed_on_IL_events_df = inactive_list_df[inactive_list_df['Acquired'].isnull()].copy()

# Initialize new columns
placed_on_IL_events_df['Reg_games_missed'] = np.nan
placed_on_IL_events_df['Post_games_missed'] = np.nan
placed_on_IL_events_df['Season'] = None
placed_on_IL_events_df['Year'] = np.nan
placed_on_IL_events_df['Game_number'] = np.nan
placed_on_IL_events_df['Out_for_season'] = None

# Iterate over placed_on_IL_events_df
for index, row in placed_on_IL_events_df.iterrows():
    results = correlate_event_to_game_schedule(team_schedules_df, row.Date, row.Team)
    if not results:
        continue  # Skip if results are None
    game_number_placed_on_IL, game_date_placed_on_IL, season_placed_on_IL, year_placed_on_IL, team_total_num_games = results

    activated_events_df = inactive_list_df.loc[(inactive_list_df['Acquired'] == row.Relinquished) & (inactive_list_df['Date'] > row.Date)]

    if not activated_events_df.empty:
        activated_event = activated_events_df.iloc[0]
        game_number_activated, _, season_activated, year_activated, _ = correlate_event_to_game_schedule(team_schedules_df, activated_event.Date, activated_event.Team)

        # Ensure 'game_number_activated' and 'game_number_placed_on_IL' are not None
        if game_number_activated is None or game_number_placed_on_IL is None:
            continue

        regular_season_games_missed = 82 - game_number_activated if season_activated == 'regular' else 82 - game_number_placed_on_IL
        post_season_games_missed = 0 if season_activated == 'regular' else game_number_activated - 82
        out_for_season = 'False'
    else:
        regular_season_games_missed = 82 - game_number_placed_on_IL if game_number_placed_on_IL is not None else 0
        post_season_games_missed = team_total_num_games - 82 if team_total_num_games > 82 else 0
        out_for_season = 'True' if game_number_placed_on_IL is None else 'False'

    placed_on_IL_events_df.loc[index, 'Reg_games_missed'] = regular_season_games_missed
    placed_on_IL_events_df.loc[index, 'Post_games_missed'] = post_season_games_missed
    placed_on_IL_events_df.loc[index, 'Out_for_season'] = out_for_season
    placed_on_IL_events_df.loc[index, 'Season'] = season_placed_on_IL
    placed_on_IL_events_df.loc[index, 'Year'] = year_placed_on_IL
    placed_on_IL_events_df.loc[index, 'Game_number'] = game_number_placed_on_IL

# Ensure 'Year' is treated as an integer where possible
placed_on_IL_events_df['Year'] = pd.to_numeric(placed_on_IL_events_df['Year'], errors='coerce').astype('Int64')
#------------Processing - Section 2: Filter Notes --------------------

# Apply the notes_filter function to each row and expand the returned tuples into separate columns
notes_results = placed_on_IL_events_df.apply(lambda row: notes_filter(row), axis=1, result_type='expand')
placed_on_IL_events_df['note_keyword'], placed_on_IL_events_df['category'] = notes_results[0], notes_results[1]

# Save processed data
save_directory = '/Users/nya/NBA project/NBA Project/02/processed datasets'
cleaned_data_path = os.path.join(save_directory, 'movement_data_processed.csv')
placed_on_IL_events_df.to_csv(cleaned_data_path, index=False)

print(f"Processed dataset saved to {cleaned_data_path}")

Counting number of missed games
Processed dataset saved to /Users/nya/NBA project/NBA Project/02/processed datasets/movement_data_processed.csv
