In [1]:
import os
from dotenv import load_dotenv
import psycopg2
import requests
from typing import Dict, List
# Load environment variables from .env file
load_dotenv('/home/wjones/CC/Capstone/tbd2/LACCTiC/.env', override=True)
# Get database credentials from environment variables
db_name = os.getenv('DB_NAME')
db_user = os.getenv('DB_USER')
db_password = os.getenv('DB_PASS')
db_host = os.getenv('DB_HOST')
db_port = os.getenv('DB_PORT')

# Connect to the database
db_params = {
    'dbname': db_name,
    'user': db_user,
    'password' : db_password,
    'host' : db_host,
    'port' : db_port
}

In [2]:
def connect_db(db_params):
    """Connect to the PostgreSQL database server."""
    conn = psycopg2.connect(**db_params)
    return conn

In [3]:
def insert_runner(conn, runner: Dict):
    with conn.cursor() as cur:
        cur.execute("""
            INSERT INTO runners (lacctic_id, tfrrs_id, year, firstname, lastname, team_id, status)
            VALUES (%s, %s, %s, %s, %s, %s, %s); 
        """, (runner['id'], runner['tfrrs_id'], runner['year_in_school'], runner['firstname'], runner['lastname'], runner['team']['id'], runner['status']))
        conn.commit()

In [4]:
def process_page(conn, page: Dict):
    for runner in page['results']:
        id = insert_runner(conn, runner)

In [5]:
def main():
    # Main script to retrieve and process data
    next_page_url = 'https://c03mmwsf5i.execute-api.us-east-2.amazonaws.com/production/api_ranking/runner_page/'
    page_count = 0
    max_pages = 100000
    conn = connect_db(db_params)
    while next_page_url and page_count < max_pages:
        response = requests.get(next_page_url)
        if response.status_code == 200:
            data = response.json()
            process_page(conn, data)
            #test(data)
            next_page_url = data.get('next')  # Get the next page URL
            #print(next_page_url)
            page_count += 1
            #logging.info(f'Successfully processed page {page_count}')
        else:
            logging.error(f'Error with page {page_count + 1}: {response.status_code}')
            break
    conn.close()

In [79]:
def process_page(page: Dict):
    for runner in page['results']:
        insert_team(runner['team'])
        insert_runner(runner)
        for season_rating in runner.get('season_ratings', []):
            season_rating_id = insert_season_rating(season_rating, runner['id'])
            for xc_performance in season_rating.get('season_xc_performances', []):
                insert_race(xc_performance['race'])
                insert_season_xc_performance(xc_performance, season_rating_id)
            for track_performance in season_rating.get('season_track_performances', []):
                insert_race(track_performance['race'])
                insert_season_track_performance(track_performance, season_rating_id)

In [80]:
def test(page: Dict):
    for runner in page['results']:
        #print(runner)
        for season_rating in runner.get('season_ratings', []):
            print(season_rating)
            


In [81]:
# Main script to retrieve and process data
next_page_url = 'https://c03mmwsf5i.execute-api.us-east-2.amazonaws.com/production/api_ranking/runner_page/'
page_count = 0
max_pages = 10

while next_page_url and page_count < max_pages:
    response = requests.get(next_page_url)
    if response.status_code == 200:
        data = response.json()
        process_page(data)
        #test(data)
        next_page_url = data.get('next')  # Get the next page URL
        page_count += 1
    else:
        print(f'Error with page {page_count + 1}: {response.status_code}')
        break