In [1]:
import logging
import os
import pandas as pd

from datetime import datetime, timedelta
from nba_api.stats.static.teams import get_teams
from nba_api.stats.endpoints import playbyplayv2, leaguegamefinder, playbyplayv3
from sqlalchemy import create_engine

from nba_betting_ai.consts import proj_paths
from nba_betting_ai.data.ingest import scrape_everything
from nba_betting_ai.data.storage import export_postgres_db, import_postgres_db

In [2]:
logging.basicConfig(level=logging.INFO)

logger = logging.getLogger()
proj_paths.logs.mkdir(exist_ok=True)
file_handler = logging.FileHandler(proj_paths.logs / 'ingest.log')
logger.addHandler(file_handler)

postgres_user = os.environ.get('POSTGRES_USER')
postgres_password = os.environ.get('POSTGRES_PASSWORD')
postgres_host = os.environ.get('POSTGRES_HOST')
postgres_port = os.environ.get('POSTGRES_PORT')
postgres_db = os.environ.get('POSTGRES_DB')

postgres_conn = f'postgresql://{postgres_user}:{postgres_password}@{postgres_host}:{postgres_port}/{postgres_db}'
engine = create_engine(postgres_conn)

In [3]:
import asyncio
import json
import time
from playwright.async_api import async_playwright

async def get_nba_headers():
    """
    Creates a browser session to obtain valid headers for NBA stats API requests.
    Returns a dictionary of headers that can be used for subsequent API calls.
    """
    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=False)
        context = await browser.new_context(
            user_agent=(
                'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
                'AppleWebKit/537.36 (KHTML, like Gecko) '
                'Chrome/120.0.0.0 Safari/537.36'
            )
        )
        
        # Create new page and navigate to NBA stats
        page = await context.new_page()
        await page.goto('https://nba.com/stats')
        
        # Wait for the page to load
        await asyncio.sleep(3)
        
        # Extract the New Relic script URL
        new_relic_script_url = await page.evaluate('''() => {
            const scripts = document.getElementsByTagName('script');
            for (const script of scripts) {
                if (script.src && script.src.includes('newrelic-prod.js')) {
                    return script.src;
                }
            }
            return null;
        }''')
        
        if not new_relic_script_url:
            print("New Relic script not found!")
            await browser.close()
            return None

        # Navigate to the script URL and extract its content
        await page.goto(new_relic_script_url)
        script_content = await page.evaluate('''() => {
            return document.body.innerText;
        }''')

        # Extract the configuration object from the script content
        config_start = script_content.find('NREUM.loader_config={') + len('NREUM.loader_config=')
        config_end = script_content.find(';', config_start)
        loader_config_raw = script_content[config_start:config_end]
        
        # Parse the loader config
        loader_config_fixed = loader_config_raw.replace('{', '{"').replace(',', ',"').replace(':', '":')
        try:
            loader_config = json.loads(loader_config_fixed)
        except Exception as e:
            print(f"Failed to parse loader config: {e}")
            loader_config = {}
            
        # Build headers
        headers = {
            'Accept': '*/*',
            'Accept-Language': 'en-US,en;q=0.9',
            'Connection': 'keep-alive',
            'Origin': 'https://nba.com/stats',
            'Referer': 'https://nba.com/stats',
            'Sec-Fetch-Dest': 'empty',
            'Sec-Fetch-Mode': 'cors',
            'Sec-Fetch-Site': 'same-site',
            'User-Agent': (
                'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
                'AppleWebKit/537.36 (KHTML, like Gecko) '
                'Chrome/120.0.0.0 Safari/537.36'
            ),
        }
        
        if 'X-NewRelic-ID' in loader_config:
            headers['X-NewRelic-ID'] = loader_config['X-NewRelic-ID']
        
        await browser.close()
        return headers, loader_config

# In a Jupyter cell, just do:
# headers = await get_nba_headers()
# print(headers)


In [4]:
headers = {
    "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:118.0) Gecko/20100101 Firefox/118.0",
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
    "Accept-Encoding": "gzip, deflate, br",
    "Accept-Language": "en-US,en;q=0.5",
    "Connection": "keep-alive",
}

In [13]:
%timeit

scrape_everything(engine=engine, season='2024-25', start_date=None, end_date=None, headers=None)

INFO:nba_betting_ai.data.ingest:Ingesting teams.
INFO:nba_betting_ai.data.ingest:Sleeping for 0.7855212313878497 sec.
INFO:nba_betting_ai.data.ingest:Ingesting games.
INFO:nba_betting_ai.data.ingest:Found 12 unfinished games from today. Removing them.
INFO:nba_betting_ai.data.ingest:Ingesting new gameflows.
INFO:nba_betting_ai.data.ingest:Sleeping for 0.7208739436622635 sec.
INFO:nba_betting_ai.data.ingest:Ingesting gameflow for game 0032100003 (1/1).
INFO:nba_betting_ai.data.ingest:No gameflow found for game 0032100003.
INFO:nba_betting_ai.data.ingest:Done ingesting games and gameflows.


In [14]:
backup_file = export_postgres_db(
    db_name=postgres_db,
    username=postgres_user,
    password=postgres_password,
    host=postgres_host,
    port=postgres_port,
)
print(backup_file)

pg_dump: last built-in OID is 16383
pg_dump: reading extensions
pg_dump: identifying extension members
pg_dump: reading schemas
pg_dump: reading user-defined tables
pg_dump: reading user-defined functions
pg_dump: reading user-defined types
pg_dump: reading procedural languages
pg_dump: reading user-defined aggregate functions
pg_dump: reading user-defined operators
pg_dump: reading user-defined access methods
pg_dump: reading user-defined operator classes
pg_dump: reading user-defined operator families
pg_dump: reading user-defined text search parsers
pg_dump: reading user-defined text search templates
pg_dump: reading user-defined text search dictionaries
pg_dump: reading user-defined text search configurations
pg_dump: reading user-defined foreign-data wrappers
pg_dump: reading user-defined foreign servers
pg_dump: reading default privileges
pg_dump: reading user-defined collations
pg_dump: reading user-defined conversions
pg_dump: reading type casts
pg_dump: reading transforms
pg_d

/workspaces/nba-betting-ai/pg_dump/nba_betting_20250111_032514.sql


In [None]:
to_import = False
if to_import:
    backup_file = proj_paths.pg_dump / 'nba_betting_20250106_185845.sql'
    import_postgres_db(
        backup_file=backup_file,
        db_name=postgres_db,
        username=postgres_user,
        password=postgres_password,
        host=postgres_host,
        port=postgres_port,
        nonempty_proceed=True
    )