In [28]:
import pandas as pd
import numpy as np

# Load Data

In [44]:
schedule = pd.read_csv('../data/processed/nba_game_schedule.csv')

In [45]:
schedule.columns

Index(['team_name', '2024-10-22 00:00:00', '2024-10-23 00:00:00',
       '2024-10-24 00:00:00', '2024-10-25 00:00:00', '2024-10-26 00:00:00',
       '2024-10-27 00:00:00', '2024-10-28 00:00:00', '2024-10-29 00:00:00',
       '2024-10-30 00:00:00',
       ...
       '2025-04-04 00:00:00', '2025-04-05 00:00:00', '2025-04-06 00:00:00',
       '2025-04-07 00:00:00', '2025-04-08 00:00:00', '2025-04-09 00:00:00',
       '2025-04-10 00:00:00', '2025-04-11 00:00:00', '2025-04-12 00:00:00',
       '2025-04-13 00:00:00'],
      dtype='object', length=175)

In [47]:
# Update with the most recent data when downloaded
fantrax = pd.read_csv('../data/raw/fantrax/fantrax_10_14_24.csv')

# Clean Fantrax Data

In [49]:
# Convert column names to snake_case
fantrax.columns = fantrax.columns.str.lower().str.replace(' ', '_')

# Rename specific columns
column_mapping = {
    'rkov': 'rank_overall',
    'status': 'fantasy_team',
    'fpts': 'fantasy_points',
    'fp/g': 'fantasy_points_per_game',
    '%d': 'percent_drafted',
    'adp': 'average_draft_position',
    '3ptm': 'three_pointers_made',
    'pts': 'points',
    'reb': 'rebounds',
    'ast': 'assists',
    'st': 'steals',
    'blk': 'blocks',
    'to': 'turnovers',
    '3d': 'three_doubles',
    '2d': 'double_doubles'
}

fantrax = fantrax.rename(columns=column_mapping)

# Display the updated column names
print(fantrax.columns)


Index(['id', 'player', 'team', 'position', 'rank_overall', 'fantasy_team',
       'age', 'opponent', 'fantasy_points', 'fantasy_points_per_game',
       'percent_drafted', 'average_draft_position', 'fgm', 'fga',
       'three_pointers_made', 'ftm', 'fta', 'points', 'rebounds', 'assists',
       'steals', 'blocks', 'turnovers', 'three_doubles', 'double_doubles'],
      dtype='object')


In [50]:
fantrax_clean = fantrax[~fantrax['fantasy_team'].isin(['W (Tue)', 'W (Wed)', 'FA'])]

In [51]:
fantrax_clean['fantasy_team'].unique()


array(['CCC', '$¢$', 'BBB', 'HBC', 'Jmarr237', 'Teacups', 'STARKS',
       'BIGFOOTS', 'GBRAYERS', 'Orcas', 'SERP', 'SDP'], dtype=object)

In [52]:
fantrax_clean['team'].unique()

array(['DEN', 'DAL', 'MIL', 'OKC', 'LAL', 'SA', 'SAC', 'BOS', 'NY', 'PHI',
       'IND', 'ATL', 'PHO', 'MIN', 'NO', 'CLE', 'TOR', 'HOU', 'LAC',
       'ORL', 'MIA', 'CHI', 'GS', 'CHA', 'WAS', 'MEM', 'UTA', 'DET',
       'BKN', 'POR'], dtype=object)

In [54]:
schedule['team_name'].unique()

array(['SAC', 'GS', 'NY', 'CHA', 'SA', 'PHO', 'LAL', 'BKN', 'ATL', 'MIA',
       'DET', 'DAL', 'ORL', 'UTA', 'MIL', 'PHI', 'CLE', 'NO', 'CHI',
       'DEN', 'MEM', 'TOR', 'BOS', 'MIN', 'HOU', 'OKC', 'POR', 'WAS',
       'LAC', 'IND'], dtype=object)

# Merge Data

In [59]:
# Merge fantrax_clean with schedule
merged_df = fantrax_clean.merge(schedule, left_on='team', right_on='team_name', how='left')

# Select the columns we want
date_columns = [col for col in schedule.columns if col != 'team_name']
selected_columns = ['player', 'fantasy_team', 'fantasy_points_per_game', 'position', 'team'] + date_columns

# Create the final dataframe
final_df = merged_df[selected_columns]

# Rename the 'team' column to 'nba_team' for clarity
final_df = final_df.rename(columns={'team': 'nba_team'})

# Sort the dataframe by fantasy_points in descending order
final_df = final_df.sort_values('fantasy_points_per_game', ascending=False)

# Reset the index
final_df = final_df.reset_index(drop=True)

# Display the first few rows and shape of the final dataframe
print(final_df.head())
print("\nDataframe shape:", final_df.shape)

# Save the final_df as a CSV file in the processed_data directory
import os

# Save the DataFrame to a CSV file
final_df.to_csv('../data/processed/player_schedule.csv', index=False)

print("CSV file 'player_schedule.csv' has been saved in the 'processed_data' directory.")



                    player fantasy_team  fantasy_points_per_game     position  \
0              Luka Doncic          $¢$                    64.07     PG,G,Flx   
1             Nikola Jokic          CCC                    61.72        C,Flx   
2    Giannis Antetokounmpo          BBB                    57.79   PF,F,C,Flx   
3              Joel Embiid     Jmarr237                    57.20        C,Flx   
4  Shai Gilgeous-Alexander          HBC                    55.08  PG,SG,G,Flx   

  nba_team  2024-10-22 00:00:00  2024-10-23 00:00:00  2024-10-24 00:00:00  \
0      DAL                    0                    0                    1   
1      DEN                    0                    0                    1   
2      MIL                    0                    1                    0   
3      PHI                    0                    1                    0   
4      OKC                    0                    0                    1   

   2024-10-25 00:00:00  2024-10-26 00:00:00  ...  