In [3]:
# Cell 1: Django Setup and Direct Model Check (Async Safe - Corrected Game Fetch)
import os
import django
import logging
from asgiref.sync import sync_to_async
import asyncio

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(levelname)s:%(name)s:%(message)s')
logger = logging.getLogger(__name__)

# Ensure CWD is correct
expected_cwd = '/Users/sg44574/Dropbox/Coding/nba-analytics-dashboard2'
if os.getcwd() != expected_cwd:
    os.chdir(expected_cwd)
    print(f"Changed working directory to: {os.getcwd()}")
else:
    print(f"Current working directory: {os.getcwd()}")

# Set the DJANGO_SETTINGS_MODULE environment variable
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'nba_analytics_project.settings')
print(f"DJANGO_SETTINGS_MODULE set to: {os.getenv('DJANGO_SETTINGS_MODULE')}")

# Set up Django
try:
    # Check if Django is already setup to avoid reinitialization errors
    if not hasattr(django, '_setup_called'):
        django.setup()
        django._setup_called = True # Mark that setup has been called
        print("Django setup successful.")
    else:
        print("Django already set up.")

    # --- Direct Model Checks (Async Safe) ---
    print("\n--- Checking Database Directly via Models (Async Safe) ---")
    from nba_data.models import Team, Player, Game

    # Define async functions to wrap ORM calls
    @sync_to_async
    def get_team_count():
        return Team.objects.count()

    @sync_to_async
    def get_first_team():
        # Select related might not be strictly needed here if Team.__str__ is simple,
        # but it's good practice if __str__ could access relations.
        return Team.objects.first()

    @sync_to_async
    def get_player_count():
        return Player.objects.count()

    @sync_to_async
    def get_first_player():
        # Select related for player's team if Player.__str__ uses it
        return Player.objects.select_related('team').first()

    @sync_to_async
    def get_game_count():
        return Game.objects.count()

    @sync_to_async
    def get_first_game():
        # *** CORRECTED: Use select_related to prevent lazy loading in __str__ ***
        return Game.objects.select_related('home_team', 'visitor_team').first()

    # Run the async functions
    async def run_checks():
        team_count = await get_team_count()
        print(f"Team count: {team_count}")
        if team_count > 0:
            first_team = await get_first_team()
            print(f"First team: {first_team}")

        player_count = await get_player_count()
        print(f"\nPlayer count: {player_count}")
        if player_count > 0:
            first_player = await get_first_player()
            print(f"First player: {first_player}") # This implicitly calls __str__

        game_count = await get_game_count()
        print(f"\nGame count: {game_count}")
        if game_count > 0:
            first_game = await get_first_game()
            print(f"First game: {first_game}") # This implicitly calls __str__

    # Execute the async checks
    # In Jupyter, top-level await works, otherwise use asyncio.run()
    await run_checks()

except Exception as e:
    print(f"An error occurred during setup or direct model check: {e}")

print("\n--- Direct Model Check Finished ---")


Current working directory: /Users/sg44574/Dropbox/Coding/nba-analytics-dashboard2
DJANGO_SETTINGS_MODULE set to: nba_analytics_project.settings
Django already set up.

--- Checking Database Directly via Models (Async Safe) ---
Team count: 45
First team: Atlanta Hawks

Player count: 725
First player: Alex Abrines

Game count: 1217
First game: Phoenix Suns @ Golden State Warriors (2023-10-24)

--- Direct Model Check Finished ---


In [5]:
# Cell 2: Data Processing Pipeline (Async Safe Calls to Dataframe Functions)
import pandas as pd
import logging
from asgiref.sync import sync_to_async
import asyncio

# Ensure logger is configured if running this cell independently
logging.basicConfig(level=logging.INFO, format='%(levelname)s:%(name)s:%(message)s')
logger = logging.getLogger(__name__)

# Import analytics functions
from nba_data.analytics import dataframes, data_prep, stats

# --- Step 1: Load data (using sync_to_async) ---
print("\n--- Loading Initial DataFrames (Async Safe) ---")

# Wrap dataframe functions
get_teams_dataframe_async = sync_to_async(dataframes.get_teams_dataframe)
get_players_dataframe_async = sync_to_async(dataframes.get_players_dataframe)
get_games_dataframe_async = sync_to_async(dataframes.get_games_dataframe)

async def load_data_async():
    try:
        teams_df = await get_teams_dataframe_async()
        players_df = await get_players_dataframe_async()
        games_df = await get_games_dataframe_async()
        return teams_df, players_df, games_df
    except Exception as e:
        print(f"Error loading initial dataframes async: {e}")
        return pd.DataFrame(), pd.DataFrame(), pd.DataFrame()

# Run the async loading
teams_df, players_df, games_df = await load_data_async()

print("\n=== Teams DataFrame Head ===")
if not teams_df.empty:
    print(teams_df.head())
    print(f"Shape: {teams_df.shape}")
else:
    print("Teams DataFrame is empty.")

print("\n=== Players DataFrame Head ===")
if not players_df.empty:
    print(players_df.head())
    print(f"Shape: {players_df.shape}")
else:
    print("Players DataFrame is empty.")

print("\n=== Games DataFrame Head ===")
if not games_df.empty:
    print(games_df.head())
    print(f"Shape: {games_df.shape}")
else:
    print("Games DataFrame is empty.")


# --- Step 2: Clean data ---
# Cleaning functions are typically CPU-bound and don't involve DB IO,
# so they might not strictly need sync_to_async unless they are very long-running.
# Assuming they are fast enough for now.
print("\n--- Cleaning Data ---")
cleaned_players_df = pd.DataFrame()
cleaned_teams_df = pd.DataFrame()
cleaned_games_df = pd.DataFrame()
try:
    # Check if initial dataframes are loaded before cleaning
    if not players_df.empty:
        cleaned_players_df = data_prep.clean_player_data(players_df)
    if not teams_df.empty:
        cleaned_teams_df = data_prep.clean_teams_data(teams_df)
    if not games_df.empty:
        cleaned_games_df = data_prep.clean_games_data(games_df)

    print("\n=== Cleaned Players DataFrame Head ===")
    if not cleaned_players_df.empty:
        print(cleaned_players_df.head())
        print(f"Shape: {cleaned_players_df.shape}")
    else:
        print("Cleaned Players DataFrame is empty (or initial was empty).")

    print("\n=== Cleaned Teams DataFrame Head ===")
    if not cleaned_teams_df.empty:
        print(cleaned_teams_df.head())
        print(f"Shape: {cleaned_teams_df.shape}")
    else:
        print("Cleaned Teams DataFrame is empty (or initial was empty).")

    print("\n=== Cleaned Games DataFrame Head ===")
    if not cleaned_games_df.empty:
        print(cleaned_games_df.head())
        print(f"Shape: {cleaned_games_df.shape}")
    else:
        print("Cleaned Games DataFrame is empty (or initial was empty).")

except Exception as e:
    print(f"Error cleaning data: {e}")


# --- Step 3: Prepare and Enhance Data ---
print("\n--- Preparing and Enhancing Data ---")
prepared_games_df = pd.DataFrame()
team_stats_df = pd.DataFrame()
try:
    if not cleaned_games_df.empty:
        # enhance_game_data and prepare_home_vs_away are likely CPU-bound pandas operations
        prepared_games_df = data_prep.enhance_game_data(cleaned_games_df)
        print("\n=== Prepared (Enhanced) Games DataFrame Head ===")
        print(prepared_games_df.head())
        print(f"Shape: {prepared_games_df.shape}")

        team_stats_df = data_prep.prepare_home_vs_away(prepared_games_df)
        print("\n=== Team Stats (Home vs Away) DataFrame Head ===")
        print(team_stats_df.head())
        print(f"Shape: {team_stats_df.shape}")
    else:
        print("Cleaned games data was empty, skipping enhancement and team stats prep.")

except ValueError as ve:
     print(f"ValueError during data prep/enhancement: {ve}. Check required columns.")
except Exception as e:
    print(f"Error preparing/enhancing data: {e}")


# --- Step 4: Calculate Stats and Rankings ---
print("\n--- Calculating Stats and Rankings ---")
team_metrics_df = pd.DataFrame()
team_rankings_df = pd.DataFrame()
try:
    if not team_stats_df.empty:
        # Stats calculations are likely CPU-bound pandas operations
        team_metrics_df = stats.calculate_team_performance_metrics(team_stats_df)
        print("\n=== Team Metrics DataFrame Head ===")
        print(team_metrics_df.head())
        print(f"Shape: {team_metrics_df.shape}")

        team_rankings_df = stats.calculate_team_rankings(team_metrics_df)
        print("\n=== Team Rankings DataFrame Head ===")
        print(team_rankings_df.head())
        print(f"Shape: {team_rankings_df.shape}")
    else:
        print("Team stats DataFrame was empty, skipping metrics and rankings calculation.")

except Exception as e:
    print(f"Error calculating stats/rankings: {e}")

print("\n--- Script Finished ---")

INFO:nba_data.analytics.data_prep:Initial game DataFrame shape: (1217, 16)
INFO:nba_data.analytics.data_prep:Enhanced game DataFrame shape: (1217, 23)
INFO:nba_data.analytics.data_prep:Enhanced game DataFrame summary:
            game_id                           date datetime  season  \
count  1.217000e+03                           1217        0  1217.0   
mean   2.347865e+06  2024-01-26 09:23:13.262119936      NaT  2023.0   
min    1.037593e+06            2023-10-24 00:00:00      NaT  2023.0   
25%    1.037975e+06            2023-12-08 00:00:00      NaT  2023.0   
50%    1.038305e+06            2024-01-24 00:00:00      NaT  2023.0   
75%    1.038638e+06            2024-03-15 00:00:00      NaT  2023.0   
max    1.590507e+07            2024-06-17 00:00:00      NaT  2023.0   
std    3.901796e+06                            NaN      NaN     0.0   

            period  home_team_score  visitor_team_score  home_team_id  \
count  1217.000000      1217.000000         1217.000000   1217.000000


--- Loading Initial DataFrames (Async Safe) ---

=== Teams DataFrame Head ===
   team_id abbreviation       city conference   division          full_name  \
0        1          ATL    Atlanta       East  Southeast      Atlanta Hawks   
1        2          BOS     Boston       East   Atlantic     Boston Celtics   
2        3          BKN   Brooklyn       East   Atlantic      Brooklyn Nets   
3        4          CHA  Charlotte       East  Southeast  Charlotte Hornets   
4        5          CHI    Chicago       East    Central      Chicago Bulls   

      name  
0    Hawks  
1  Celtics  
2     Nets  
3  Hornets  
4    Bulls  
Shape: (45, 7)

=== Players DataFrame Head ===
   player_id first_name      last_name position  height_feet  height_inches  \
0          1       Alex        Abrines        G          6.0            6.0   
1          2     Jaylen          Adams        G          6.0            0.0   
2          3     Steven          Adams        C          6.0           11.0   
3    

In [21]:
#export header of team_metrics_df to csv
#team_metrics_df.head().to_csv('team_metrics_header.csv', index=False)
prepared_games_df.head().to_csv('prepared_games_df_header.csv', index=False)
team_metrics_df.to_csv('team_metrics.csv', index=False)
team_rankings_df.to_csv('team_rankings.csv', index=False)
team_stats_df.to_csv('team_stats.csv', index=False)


#print team_stats_df shape
print(team_stats_df.shape)

#print team_metrics_df shape
print(team_metrics_df.shape)

#print team_rankings_df shape
print(prepared_games_df.shape)


(30, 19)
(30, 30)
(1217, 23)
