In [1]:
import pandas as pd

In [2]:
player_schedule = pd.read_csv('../data/processed/player_schedule.csv')

In [24]:
# Convert date columns to datetime type
date_columns = player_schedule.columns[player_schedule.columns.str.match(r'\d{4}-\d{2}-\d{2}')]
player_schedule[date_columns] = player_schedule[date_columns].apply(pd.to_datetime)

# Display updated data types
player_schedule.dtypes

player                             object
fantasy_team                       object
fantasy_points_per_game           float64
position                           object
nba_team                           object
                                ...      
2025-04-09 00:00:00        datetime64[ns]
2025-04-10 00:00:00        datetime64[ns]
2025-04-11 00:00:00        datetime64[ns]
2025-04-12 00:00:00        datetime64[ns]
2025-04-13 00:00:00        datetime64[ns]
Length: 179, dtype: object

In [13]:
player_schedule.columns

Index(['player', 'fantasy_team', 'fantasy_points_per_game', 'position',
       'nba_team', '2024-10-22 00:00:00', '2024-10-23 00:00:00',
       '2024-10-24 00:00:00', '2024-10-25 00:00:00', '2024-10-26 00:00:00',
       ...
       '2025-04-04 00:00:00', '2025-04-05 00:00:00', '2025-04-06 00:00:00',
       '2025-04-07 00:00:00', '2025-04-08 00:00:00', '2025-04-09 00:00:00',
       '2025-04-10 00:00:00', '2025-04-11 00:00:00', '2025-04-12 00:00:00',
       '2025-04-13 00:00:00'],
      dtype='object', length=179)

In [23]:
# Define playoff dates
playoff_weeks = [
    ('2025-03-24', '2025-03-30'),
    ('2025-03-31', '2025-04-06'),
    ('2025-04-07', '2025-04-13')
]

# Create a list to store the playoff dates
playoff_dates = []

# Generate all dates within each playoff week
for start_date, end_date in playoff_weeks:
    week_dates = pd.date_range(start=start_date, end=end_date, freq='D')
    playoff_dates.extend(week_dates)

# Convert playoff_dates to the same format as the column names
playoff_date_strings = [date.strftime('%Y-%m-%d 00:00:00') for date in playoff_dates]

# Get the date columns that match the playoff dates
playoff_columns = [col for col in player_schedule.columns if col in playoff_date_strings]

# Create a new dataframe with only the playoff dates
playoff_schedule = player_schedule[playoff_columns]

# Convert date columns to binary values (1 for a game, 0 for no game)
playoff_schedule = playoff_schedule.notna().astype(int)

# Add player information columns
player_info_columns = ['player', 'fantasy_team', 'fantasy_points_per_game', 'position', 'nba_team']
playoff_schedule = pd.concat([player_schedule[player_info_columns], playoff_schedule], axis=1)

# Display the first few rows of the new dataframe
print(playoff_schedule.head())

# Display the column names to verify
print("\nColumn names:")
print(playoff_schedule.columns.tolist())

# Check if all player info columns are present
missing_columns = set(player_info_columns) - set(playoff_schedule.columns)
if missing_columns:
    print(f"\nWarning: The following columns are missing: {missing_columns}")
else:
    print("\nAll player info columns are present.")

# Display data types of columns
print("\nColumn data types:")
print(playoff_schedule.dtypes)

# Print the number of date columns
date_columns = [col for col in playoff_schedule.columns if col not in player_info_columns]
print(f"\nNumber of date columns: {len(date_columns)}")
print("Date columns:")
print(date_columns)

                    player fantasy_team  fantasy_points_per_game     position  \
0              Luka Doncic          $¢$                    64.07     PG,G,Flx   
1             Nikola Jokic          CCC                    61.72        C,Flx   
2    Giannis Antetokounmpo          BBB                    57.79   PF,F,C,Flx   
3              Joel Embiid     Jmarr237                    57.20        C,Flx   
4  Shai Gilgeous-Alexander          HBC                    55.08  PG,SG,G,Flx   

  nba_team  2025-03-24 00:00:00  2025-03-25 00:00:00  2025-03-26 00:00:00  \
0      DAL                    1                    1                    1   
1      DEN                    1                    1                    1   
2      MIL                    1                    1                    1   
3      PHI                    1                    1                    1   
4      OKC                    1                    1                    1   

   2025-03-27 00:00:00  2025-03-28 00:00:00  ...  

In [22]:
playoff_schedule.head()

Unnamed: 0,player,fantasy_team,fantasy_points_per_game,position,nba_team,2025-03-24 00:00:00,2025-03-25 00:00:00,2025-03-26 00:00:00,2025-03-27 00:00:00,2025-03-28 00:00:00,...,2025-04-04 00:00:00,2025-04-05 00:00:00,2025-04-06 00:00:00,2025-04-07 00:00:00,2025-04-08 00:00:00,2025-04-09 00:00:00,2025-04-10 00:00:00,2025-04-11 00:00:00,2025-04-12 00:00:00,2025-04-13 00:00:00
0,Luka Doncic,$¢$,64.07,"PG,G,Flx",DAL,1970-01-01 00:00:00.000000001,1970-01-01 00:00:00.000000001,1970-01-01 00:00:00.000000000,1970-01-01 00:00:00.000000001,1970-01-01 00:00:00.000000000,...,1970-01-01 00:00:00.000000001,1970-01-01 00:00:00.000000001,1970-01-01 00:00:00.000000000,1970-01-01 00:00:00.000000000,1970-01-01 00:00:00.000000000,1970-01-01 00:00:00.000000001,1970-01-01,1970-01-01 00:00:00.000000001,1970-01-01,1970-01-01 00:00:00.000000001
1,Nikola Jokic,CCC,61.72,"C,Flx",DEN,1970-01-01 00:00:00.000000001,1970-01-01 00:00:00.000000000,1970-01-01 00:00:00.000000001,1970-01-01 00:00:00.000000000,1970-01-01 00:00:00.000000001,...,1970-01-01 00:00:00.000000001,1970-01-01 00:00:00.000000000,1970-01-01 00:00:00.000000001,1970-01-01 00:00:00.000000000,1970-01-01 00:00:00.000000000,1970-01-01 00:00:00.000000001,1970-01-01,1970-01-01 00:00:00.000000001,1970-01-01,1970-01-01 00:00:00.000000001
2,Giannis Antetokounmpo,BBB,57.79,"PF,F,C,Flx",MIL,1970-01-01 00:00:00.000000001,1970-01-01 00:00:00.000000000,1970-01-01 00:00:00.000000001,1970-01-01 00:00:00.000000000,1970-01-01 00:00:00.000000001,...,1970-01-01 00:00:00.000000000,1970-01-01 00:00:00.000000001,1970-01-01 00:00:00.000000000,1970-01-01 00:00:00.000000000,1970-01-01 00:00:00.000000001,1970-01-01 00:00:00.000000001,1970-01-01,1970-01-01 00:00:00.000000001,1970-01-01,1970-01-01 00:00:00.000000001
3,Joel Embiid,Jmarr237,57.2,"C,Flx",PHI,1970-01-01 00:00:00.000000001,1970-01-01 00:00:00.000000000,1970-01-01 00:00:00.000000001,1970-01-01 00:00:00.000000000,1970-01-01 00:00:00.000000000,...,1970-01-01 00:00:00.000000000,1970-01-01 00:00:00.000000001,1970-01-01 00:00:00.000000000,1970-01-01 00:00:00.000000001,1970-01-01 00:00:00.000000000,1970-01-01 00:00:00.000000001,1970-01-01,1970-01-01 00:00:00.000000001,1970-01-01,1970-01-01 00:00:00.000000001
4,Shai Gilgeous-Alexander,HBC,55.08,"PG,SG,G,Flx",OKC,1970-01-01 00:00:00.000000000,1970-01-01 00:00:00.000000001,1970-01-01 00:00:00.000000000,1970-01-01 00:00:00.000000001,1970-01-01 00:00:00.000000000,...,1970-01-01 00:00:00.000000001,1970-01-01 00:00:00.000000000,1970-01-01 00:00:00.000000001,1970-01-01 00:00:00.000000000,1970-01-01 00:00:00.000000001,1970-01-01 00:00:00.000000001,1970-01-01,1970-01-01 00:00:00.000000001,1970-01-01,1970-01-01 00:00:00.000000001


In [5]:
playoff_schedule.columns

Index(['player', 'fantasy_team', 'fantasy_points_per_game', 'position',
       'nba_team'],
      dtype='object')

# Set Lineups

In [20]:
import pandas as pd
import numpy as np

def set_lineups(playoff_schedule):
    # Define the lineup slots
    lineup_slots = ['PG', 'SG', 'G', 'SF', 'PF', 'F', 'C', 'Flx', 'Flx', 'Flx']
    
    # Get unique fantasy teams
    teams = playoff_schedule['fantasy_team'].unique()
    
    # Get date columns
    date_columns = [col for col in playoff_schedule.columns if col not in ['player', 'fantasy_team', 'fantasy_points_per_game', 'position', 'nba_team']]
    
    # Create a dictionary to store lineups
    lineups = {team: {date: {} for date in date_columns} for team in teams}
    
    for team in teams:
        team_players = playoff_schedule[playoff_schedule['fantasy_team'] == team]
        
        for date in date_columns:
            available_players = team_players[team_players[date] == 1]
            lineup = []
            
            for slot in lineup_slots:
                eligible_players = available_players[available_players['position'].str.contains(slot)]
                if not eligible_players.empty:
                    player = eligible_players.sort_values('fantasy_points_per_game', ascending=False).iloc[0]
                    lineup.append((player['player'], slot))
                    available_players = available_players[available_players['player'] != player['player']]
                else:
                    lineup.append(('Empty', slot))
            
            lineups[team][date] = lineup
    
    return lineups

# Identify date columns
date_columns = [col for col in playoff_schedule.columns if col not in ['player', 'fantasy_team', 'fantasy_points_per_game', 'position', 'nba_team']]

# Convert date columns to integers (1 for a game, 0 for no game)
for col in date_columns:
    playoff_schedule[col] = playoff_schedule[col].notna().astype(int)

# Verify the conversion
print("Sample of converted data:")
print(playoff_schedule.iloc[:5, :10])  # Print first 5 rows and 10 columns

# Set lineups
team_lineups = set_lineups(playoff_schedule)

# Print example lineup for the first team and first date
first_team = list(team_lineups.keys())[0]
first_date = list(team_lineups[first_team].keys())[0]
print(f"\nLineup for {first_team} on {first_date}:")
for player, position in team_lineups[first_team][first_date]:
    print(f"{position}: {player}")

# Calculate the number of games played by each player
games_played = playoff_schedule[date_columns].sum(axis=1)
playoff_schedule['games_played'] = games_played

# Display the players with the most games during the playoff period
print("\nPlayers with the most games during playoffs:")
print(playoff_schedule.sort_values('games_played', ascending=False)[['player', 'fantasy_team', 'games_played']].head(10))

# Print the total number of game days
print(f"\nTotal number of game days: {len(date_columns)}")

# Print the maximum number of games played
print(f"Maximum number of games played: {games_played.max()}")

# Verify data for a player with max games
max_games_player = playoff_schedule.loc[games_played.idxmax(), 'player']
print(f"\nGame schedule for {max_games_player}:")
print(playoff_schedule.loc[games_played.idxmax(), date_columns].to_string())

Sample of converted data:
                    player fantasy_team  fantasy_points_per_game     position  \
0              Luka Doncic          $¢$                    64.07     PG,G,Flx   
1             Nikola Jokic          CCC                    61.72        C,Flx   
2    Giannis Antetokounmpo          BBB                    57.79   PF,F,C,Flx   
3              Joel Embiid     Jmarr237                    57.20        C,Flx   
4  Shai Gilgeous-Alexander          HBC                    55.08  PG,SG,G,Flx   

  nba_team  2025-03-24 00:00:00  2025-03-25 00:00:00  2025-03-26 00:00:00  \
0      DAL                    1                    1                    1   
1      DEN                    1                    1                    1   
2      MIL                    1                    1                    1   
3      PHI                    1                    1                    1   
4      OKC                    1                    1                    1   

   2025-03-27 00:00:00  