# Backfill season data

In [None]:
import requests
from datetime import datetime, timedelta
import pandas as pd
import time
import csv
import os

def fetch_nba_games_and_stats(season_year):
    # Base URL for the balldontlie API
    games_url = "https://www.balldontlie.io/api/v1/games"
    stats_url = "https://www.balldontlie.io/api/v1/stats"

    # Set start_date and end_date for the given season
    start_date = datetime(season_year, 10, 24)
    end_date = datetime(season_year + 1, 7, 1)  # Assuming season ends by July 1st

    # Format dates in the required format (YYYY-MM-DD)
    start_date_str = start_date.strftime("%Y-%m-%d")
    end_date_str = end_date.strftime("%Y-%m-%d")

    # Initialize variables for pagination and request counting
    all_games = []
    all_stats = []
    games_page = 1
    games_per_page = 100  # Maximum value as per the API documentation
    games_total_pages = 1  # Placeholder, will be updated after the first request
    request_count = 0

    # Function to handle request and count
    def make_request(url, params):
        nonlocal request_count
        response = requests.get(url, params=params)
        request_count += 1
        if request_count >= 100:
            time.sleep(10)
            request_count = 0
        return response

    # Fetch games
    while games_page <= games_total_pages:
        params = {
            "start_date": start_date_str,
            "end_date": end_date_str,
            "per_page": games_per_page,
            "page": games_page
        }

        response = make_request(games_url, params)
        if response.status_code == 200:
            data = response.json()
            all_games.extend(data['data'])

            if games_page == 1:
                games_total_pages = data['meta']['total_pages']

            games_page += 1

    # Fetch stats for each game
    for game in all_games:
        game_id = game['id']
        stats_page = 1
        stats_per_page = 100
        stats_total_pages = 1

        while stats_page <= stats_total_pages:
            params = {
                "game_ids[]": game_id,
                "per_page": stats_per_page,
                "page": stats_page
            }

            response = make_request(stats_url, params)
            if response.status_code == 200:
                data = response.json()
                all_stats.extend(data['data'])

                if stats_page == 1:
                    stats_total_pages = data['meta']['total_pages']

                stats_page += 1

    
    # Merge games and stats with flattened structure
    merged_data = []
    for stat in all_stats:
        game_info = next((game for game in all_games if game['id'] == stat['game']['id']), None)
        if game_info and stat['player'] is not None:  # Add a check for None
            # Flatten the game, player, team, home_team, and visitor_team data
            flattened_game_info = {f'game_{k}': v for k, v in game_info.items() if k != 'home_team' and k != 'visitor_team'}
            flattened_player_info = {f'player_{k}': v for k, v in stat['player'].items()}
            flattened_team_info = {f'player_team_{k}': v for k, v in stat['team'].items()}
            flattened_home_team_info = {f'home_team_{k}': v for k, v in game_info['home_team'].items()}
            flattened_visitor_team_info = {f'visitor_team_{k}': v for k, v in game_info['visitor_team'].items()}

            # Merge all flattened data
            merged_entry = {**flattened_game_info, **flattened_player_info, **flattened_team_info, **flattened_home_team_info, **flattened_visitor_team_info, **stat}
            merged_data.append(merged_entry)


    return merged_data


def write_to_csv(season_data, file_name, write_headers=True):
    with open(file_name, 'a', newline='', encoding='utf-8') as file:
        writer = csv.DictWriter(file, fieldnames=season_data[0].keys())

        if write_headers:
            writer.writeheader()

        for data in season_data:
            writer.writerow(data)

# Fetch and write data for a specific season
# season_year = datetime.now().year if datetime.now().month >= 10 else datetime.now().year - 1
season_year = 2019
season_data = fetch_nba_games_and_stats(season_year)

# Check if file exists and has content to determine if headers should be written
file_name = 'nba_games_historical.csv'
write_headers = not (os.path.exists(file_name) and os.path.getsize(file_name) > 0)

# Write data to CSV
write_to_csv(season_data, file_name, write_headers)