In [138]:
import sqlite3
import pandas as pd
import os
import sys
import datetime as dt
import matplotlib.pyplot as plt

sys.path.append("/Users/maxwellpatterson/Desktop/personal/bball")

from src.data_processing.get_fantrax_data import get_most_recent_fantrax_data


In [75]:
def get_schedule_matrix():
    conn = sqlite3.connect('data/player_data.db')
    schedule_matrix = pd.read_sql_query('SELECT * FROM schedule', conn)
    conn.close()
    return schedule_matrix

schedule_matrix = get_schedule_matrix()

# Remove the times from the date columns and convert to date objects
date_columns = [col for col in schedule_matrix.columns if isinstance(col, str) and col.startswith('20')]
schedule_matrix = schedule_matrix.rename(columns={col: pd.to_datetime(col).date() for col in date_columns})

# Update the schedule_matrix variable
schedule_matrix = schedule_matrix.copy()

print("Date columns updated. New column names:")
print(schedule_matrix.columns)



Date columns updated. New column names:
Index(['team_name', 'team_abbr',  2024-10-22,  2024-10-23,  2024-10-24,
        2024-10-25,  2024-10-26,  2024-10-27,  2024-10-28,  2024-10-29,
       ...
        2025-04-03,  2025-04-04,  2025-04-05,  2025-04-06,  2025-04-07,
        2025-04-08,  2025-04-09,  2025-04-10,  2025-04-11,  2025-04-13],
      dtype='object', length=157)


In [76]:
schedule_matrix.head()

Unnamed: 0,team_name,team_abbr,2024-10-22,2024-10-23,2024-10-24,2024-10-25,2024-10-26,2024-10-27,2024-10-28,2024-10-29,...,2025-04-03,2025-04-04,2025-04-05,2025-04-06,2025-04-07,2025-04-08,2025-04-09,2025-04-10,2025-04-11,2025-04-13
0,New York Knicks,NY,1,0,0,1,0,0,1,0,...,0,0,1,1,0,1,0,1,1,1
1,Minnesota Timberwolves,MIN,1,0,1,0,1,0,0,1,...,1,0,1,0,0,1,0,1,1,1
2,Indiana Pacers,IND,0,1,0,1,0,1,1,0,...,0,1,0,1,0,1,0,1,1,1
3,Brooklyn Nets,BKN,0,1,0,1,0,1,0,1,...,1,0,0,1,0,1,0,1,1,1
4,Orlando Magic,ORL,0,1,0,1,1,0,1,0,...,1,0,0,1,0,1,1,0,1,1


In [105]:
fantrax_data = get_most_recent_fantrax_data()

def remove_free_agents(df):
    return df[df['fantasy_team'] != 'FA']

fantrax_data = remove_free_agents(fantrax_data)

Loading most recent Fantrax data: fantrax_8_25_24


In [112]:
fantrax_data.columns

Index(['id', 'player', 'team', 'position', 'rkov', 'fantasy_team', 'age',
       'opponent', 'fpts', 'fp_per_g', 'pctd', 'adp', 'fgm', 'fga', '3ptm',
       'ftm', 'fta', 'pts', 'reb', 'ast', 'st', 'blk', 'to', '3d', '2d'],
      dtype='object')

In [113]:
# Join the schedule for the players team to the fantrax data
merged_data = fantrax_data.merge(schedule_matrix, left_on='team', right_on='team_abbr', how='left')

# Create a new dataframe with player name, team, fantasy_team, and date columns
player_schedules = merged_data[['player', 'team', 'fp_per_g', 'fantasy_team'] + [col for col in merged_data.columns if isinstance(col, dt.date)]]

In [114]:
player_schedules.head()

Unnamed: 0,player,team,fp_per_g,fantasy_team,2024-10-22,2024-10-23,2024-10-24,2024-10-25,2024-10-26,2024-10-27,...,2025-04-03,2025-04-04,2025-04-05,2025-04-06,2025-04-07,2025-04-08,2025-04-09,2025-04-10,2025-04-11,2025-04-13
0,Nikola Jokic,DEN,65.24,CCC,0.0,0.0,1.0,0.0,1.0,0.0,...,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0
1,Luka Doncic,DAL,66.34,$¢$,0.0,0.0,1.0,0.0,1.0,0.0,...,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0
2,Giannis Antetokounmpo,MIL,60.19,BBB,0.0,1.0,0.0,1.0,0.0,1.0,...,1.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0
3,Domantas Sabonis,SAC,52.99,BBB,0.0,0.0,1.0,0.0,1.0,0.0,...,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,1.0
4,Anthony Davis,LAL,57.08,Jmarr237,1.0,0.0,0.0,1.0,1.0,0.0,...,1.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0


In [143]:
# playoff weeks:
# 1: March 24 - March 30
#2: March 31 - April 6
#3: April 7 - April 13

# Define the playoff weeks
playoff_weeks = [
    (dt.date(2025, 3, 24), dt.date(2025, 3, 30)),
    (dt.date(2025, 3, 31), dt.date(2025, 4, 6)),
    (dt.date(2025, 4, 7), dt.date(2025, 4, 13))
]

In [119]:
# Create a list of all playoff dates
playoff_dates = [date for start, end in playoff_weeks for date in pd.date_range(start, end).date]

# Filter the player_schedules for only the playoff dates
player_playoff_schedule = player_schedules[['player', 'team', 'fp_per_g', 'fantasy_team'] + [col for col in player_schedules.columns if isinstance(col, dt.date) and col in playoff_dates]]

# Ensure all 21 date columns are present, even if they're not in player_schedules
all_playoff_dates = pd.date_range(playoff_weeks[0][0], playoff_weeks[-1][1]).date
for date in all_playoff_dates:
    if date not in player_playoff_schedule.columns:
        player_playoff_schedule[date] = 0  # or pd.NA if you prefer

# Sort the columns to ensure dates are in order
date_columns = [col for col in player_playoff_schedule.columns if isinstance(col, dt.date)]
player_playoff_schedule = player_playoff_schedule[['player', 'team', 'fp_per_g', 'fantasy_team'] + sorted(date_columns)]

print("Player playoff schedule shape:", player_playoff_schedule.shape)
print("\nFirst few rows of player_playoff_schedule:")


Player playoff schedule shape: (321, 25)

First few rows of player_playoff_schedule:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  player_playoff_schedule[date] = 0  # or pd.NA if you prefer


In [120]:
player_playoff_schedule.head()

Unnamed: 0,player,team,fp_per_g,fantasy_team,2025-03-24,2025-03-25,2025-03-26,2025-03-27,2025-03-28,2025-03-29,...,2025-04-04,2025-04-05,2025-04-06,2025-04-07,2025-04-08,2025-04-09,2025-04-10,2025-04-11,2025-04-12,2025-04-13
0,Nikola Jokic,DEN,65.24,CCC,1.0,0.0,1.0,0.0,1.0,0.0,...,1.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0,1.0
1,Luka Doncic,DAL,66.34,$¢$,1.0,1.0,0.0,1.0,0.0,1.0,...,1.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0,1.0
2,Giannis Antetokounmpo,MIL,60.19,BBB,1.0,0.0,1.0,0.0,1.0,0.0,...,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,0,1.0
3,Domantas Sabonis,SAC,52.99,BBB,1.0,1.0,0.0,1.0,0.0,1.0,...,1.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0,1.0
4,Anthony Davis,LAL,57.08,Jmarr237,1.0,0.0,1.0,1.0,0.0,1.0,...,1.0,0.0,1.0,0.0,1.0,1.0,0.0,1.0,0,1.0


In [131]:
# Define scoring groups based on fp_per_g
scoring_groups = [(0, 10), (10, 20), (20, 30), (30, 40), (40, 50), (50, float('inf'))]

# Function to get scoring group based on fp_per_g
def get_scoring_group(fp_per_g):
    for low, high in scoring_groups:
        if low <= fp_per_g < high:
            return f"{low}-{high if high != float('inf') else '+'}"
    return "0-10"  # For players with less than 10 fp_per_g

# Assign scoring group to each player based on their fp_per_g
player_playoff_schedule['Scoring_Group'] = player_playoff_schedule['fp_per_g'].apply(get_scoring_group)

# Calculate games per week for each player
for week_num, (start_date, end_date) in enumerate(playoff_weeks, 1):
    week_dates = [date for date in pd.date_range(start_date, end_date).date if date in player_playoff_schedule.columns]
    player_playoff_schedule[f'Week_{week_num}_Games'] = player_playoff_schedule[week_dates].sum(axis=1)

# Group by fantasy team and scoring group for each week
fantasy_team_groups = []
for week_num in range(1, 4):
    week_group = player_playoff_schedule.groupby(['fantasy_team', 'Scoring_Group'])[f'Week_{week_num}_Games'].sum().unstack(fill_value=0)
    week_group.columns = [f'{col} (Week {week_num})' for col in week_group.columns]
    fantasy_team_groups.append(week_group)

# Combine all weeks
fantasy_team_summary = pd.concat(fantasy_team_groups, axis=1)

# Sort columns to group weeks together
fantasy_team_summary = fantasy_team_summary.reindex(sorted(fantasy_team_summary.columns), axis=1)

print("Fantasy Team Playoff Schedule Summary:")
print(fantasy_team_summary)


Fantasy Team Playoff Schedule Summary:
              0-10 (Week 1)  0-10 (Week 2)  0-10 (Week 3)  10-20 (Week 1)  \
fantasy_team                                                                
$¢$                    20.0           22.0           23.0            23.0   
BBB                     9.0           12.0           10.0             7.0   
BIGFOOTS               16.0           19.0           20.0            23.0   
CCC                     8.0            6.0            8.0            15.0   
GBRAYERS               17.0           18.0           18.0            19.0   
HBC                    11.0           11.0           11.0             7.0   
Jmarr237               21.0           23.0           23.0             6.0   
Orcas                  10.0           11.0           12.0             9.0   
SDP                    35.0           36.0           40.0            35.0   
SERP                   20.0           21.0           22.0            24.0   
STARKS                 25.0          

In [184]:
fantasy_team_daily_scores = pd.DataFrame(index=player_playoff_schedule['fantasy_team'].unique(), columns=player_playoff_schedule.columns[4:25])

# Function to get top 10 scoring players for a team on a given day
def get_top_10_score(team_data, date):
    playing_players = team_data[team_data[date] == 1].nlargest(10, 'fp_per_g')
    return playing_players['fp_per_g'].sum()

# Iterate through each fantasy team and date
for team in fantasy_team_daily_scores.index:
    team_data = player_playoff_schedule[player_playoff_schedule['fantasy_team'] == team]
    for date in fantasy_team_daily_scores.columns:
        fantasy_team_daily_scores.at[team, date] = get_top_10_score(team_data, date)

# Create a DataFrame to store players playing for each fantasy team on each date
fantasy_team_daily_players = pd.DataFrame(index=player_playoff_schedule['fantasy_team'].unique(), columns=player_playoff_schedule.columns[4:25])

# Function to get players playing for a team on a given day
def get_playing_players(team_data, date):
    playing_players = team_data[team_data[date] == 1]['player'].tolist()
    return ', '.join(playing_players)

# Iterate through each fantasy team and date
for team in fantasy_team_daily_players.index:
    team_data = player_playoff_schedule[player_playoff_schedule['fantasy_team'] == team]
    for date in fantasy_team_daily_players.columns:
        fantasy_team_daily_players.at[team, date] = get_playing_players(team_data, date)

# Create a DataFrame to store players playing for each fantasy team on each date
fantasy_team_daily_players = pd.DataFrame(index=player_playoff_schedule['fantasy_team'].unique(), columns=player_playoff_schedule.columns[4:25])

# Function to get players playing for a team on a given day
def get_playing_players(team_data, date):
    playing_players = team_data[team_data[date] == 1]['player'].tolist()
    return ', '.join(playing_players)

# Iterate through each fantasy team and date
for team in fantasy_team_daily_players.index:
    team_data = player_playoff_schedule[player_playoff_schedule['fantasy_team'] == team]
    for date in fantasy_team_daily_players.columns:
        fantasy_team_daily_players.at[team, date] = get_playing_players(team_data, date)

# Display the first few rows of the DataFrame
print(fantasy_team_daily_players.head())

# Save the DataFrame to a CSV file
fantasy_team_daily_players.to_csv('fantasy_team_daily_players.csv')

print("DataFrame saved to 'fantasy_team_daily_players.csv'")



                                                 2025-03-24  \
CCC       Nikola Jokic, Derrick White, Jusuf Nurkic, Dan...   
$¢$       Luka Doncic, Paul George, Myles Turner, Coby W...   
BBB       Giannis Antetokounmpo, Domantas Sabonis, LeBro...   
Jmarr237  Anthony Davis, Kyrie Irving, Joel Embiid, Bran...   
HBC       DAngelo Russell, Michael Porter, Dennis Schrod...   

                                                 2025-03-25  \
CCC       Stephen Curry, Donovan Mitchell, Daniel Gaffor...   
$¢$       Luka Doncic, Franz Wagner, Lauri Markkanen, Ev...   
BBB       Domantas Sabonis, DeMar DeRozan, Jimmy Butler,...   
Jmarr237  Jarrett Allen, Kyrie Irving, Miles Bridges, Bo...   
HBC       Shai Gilgeous-Alexander, Jalen Williams, Tre J...   

                                                 2025-03-26  \
CCC       Nikola Jokic, Derrick White, Jusuf Nurkic, Jam...   
$¢$       Paul George, Myles Turner, Aaron Nesmith, Boja...   
BBB       Giannis Antetokounmpo, LeBron James, Kevin 

In [183]:
fantasy_team_daily_scores.head(25)

Unnamed: 0,2025-03-24,2025-03-25,2025-03-26,2025-03-27,2025-03-28,2025-03-29,2025-03-30,2025-03-31,2025-04-01,2025-04-02,...,2025-04-04,2025-04-05,2025-04-06,2025-04-07,2025-04-08,2025-04-09,2025-04-10,2025-04-11,2025-04-12,2025-04-13
CCC,342.23,317.42,306.66,299.6,364.92,279.99,308.28,262.55,286.39,373.77,...,396.16,154.24,389.92,49.65,333.89,375.16,205.0,396.16,0.0,396.16
$¢$,309.09,359.6,168.04,383.79,192.8,365.43,198.43,377.24,224.12,313.54,...,334.24,241.82,304.75,65.37,330.31,329.0,238.49,411.01,0.0,411.01
BBB,437.1,232.76,378.38,328.85,360.5,323.77,365.58,375.44,303.7,342.97,...,401.74,289.72,376.57,133.79,409.56,461.82,69.18,461.82,0.0,461.82
Jmarr237,337.94,340.83,239.91,335.96,338.88,299.98,382.08,371.68,201.29,372.78,...,381.75,349.57,349.55,140.23,378.94,432.17,204.32,440.36,0.0,440.36
HBC,278.57,304.63,207.1,319.62,199.42,309.91,260.31,302.88,265.64,310.79,...,341.21,114.07,345.14,68.24,319.34,335.74,104.69,345.14,0.0,345.14
Teacups,315.86,343.25,309.69,338.81,297.37,327.41,362.18,316.69,293.73,376.81,...,361.95,299.06,371.48,69.24,367.85,351.6,302.34,389.48,0.0,389.48
GBRAYERS,326.79,273.51,339.73,307.02,246.83,247.9,353.85,231.06,325.01,269.94,...,367.58,122.1,365.96,66.02,294.15,360.81,106.27,384.3,0.0,384.3
BIGFOOTS,274.49,315.41,174.85,272.36,304.84,187.57,312.22,233.19,228.72,344.8,...,299.79,215.86,356.06,121.52,311.76,311.46,188.49,366.98,0.0,366.98
STARKS,319.55,304.96,248.31,306.3,186.44,342.39,196.27,342.39,191.99,355.69,...,321.88,158.13,347.98,111.64,355.17,350.06,136.29,369.13,0.0,369.13
Orcas,312.0,269.73,286.05,146.39,316.77,229.48,321.47,224.87,309.6,264.47,...,279.72,278.37,329.49,90.33,325.75,277.76,276.26,350.43,0.0,350.43


In [None]:
fantasy_team_daily_scores.head()

In [180]:
# Ensure column indexes are datetime
fantasy_team_daily_scores.columns = pd.to_datetime(fantasy_team_daily_scores.columns)

# Convert the cell values to integers
fantasy_team_daily_scores = fantasy_team_daily_scores.astype(int)

# Define the date ranges for each week
week_ranges = [
    (pd.to_datetime('2025-03-24'), pd.to_datetime('2025-03-30')),  # Week 1
    (pd.to_datetime('2025-03-31'), pd.to_datetime('2025-04-06')),  # Week 2
    (pd.to_datetime('2025-04-07'), pd.to_datetime('2025-04-13'))   # Week 3
]

# Create a new DataFrame to store the weekly summaries
fantasy_team_weekly_summary = pd.DataFrame()

for week_num, (start_date, end_date) in enumerate(week_ranges, 1):
    week_data = fantasy_team_daily_scores.loc[:, start_date:end_date]
    fantasy_team_weekly_summary[f'Week_{week_num}_Total'] = week_data.sum(axis=1)

# Sort the DataFrame by the total score in descending order for each week
for week in range(1, 4):
    fantasy_team_weekly_summary = fantasy_team_weekly_summary.sort_values(f'Week_{week}_Total', ascending=False)

    # Create a bar plot for each week
    plt.figure(figsize=(12, 6))
    ax = fantasy_team_weekly_summary[f'Week_{week}_Total'].plot(kind='bar')
    plt.title(f'Fantasy Team Totals - Week {week}')
    plt.xlabel('Teams')
    plt.ylabel('Total Score')
    plt.xticks(rotation=45, ha='right')
    
    # Add value labels on top of each bar
    for i, v in enumerate(fantasy_team_weekly_summary[f'Week_{week}_Total']):
        ax.text(i, v, str(v), ha='center', va='bottom')
    
    plt.tight_layout()
    
    # Create the images folder if it doesn't exist
    images_folder = 'images'
    os.makedirs(images_folder, exist_ok=True)

    # Save the plot as an image file
    plt.savefig(os.path.join(images_folder, f'fantasy_team_totals_week_{week}.png'), dpi=300, bbox_inches='tight')
    plt.close()  # Close the figure to free up memory
