In [2]:
import os
from dotenv import load_dotenv
import psycopg2
import requests
# Load environment variables from .env file
load_dotenv('/home/wjones/CC/Capstone/tbd2/LACCTiC/.env', override=True)
# Get database credentials from environment variables
db_name = os.getenv('DB_NAME')
db_user = os.getenv('DB_USER')
db_password = os.getenv('DB_PASS')
db_host = os.getenv('DB_HOST')
db_port = os.getenv('DB_PORT')

# Connect to the database
conn = psycopg2.connect(
    dbname=db_name,
    user=db_user,
    password=db_password,
    host=db_host,
    port=db_port
)
cursor = conn.cursor()

In [3]:
def insert_team(team_data):
    query = """INSERT INTO teams (id, name, sex, color, logo) VALUES (%s, %s, %s, %s, %s)
               ON CONFLICT (id) DO NOTHING;"""
    cursor.execute(query, (team_data['id'], team_data['name'], team_data['sex'], team_data['color'], team_data['logo']))
    conn.commit()

In [4]:
def insert_runner(runner_data):
    query = """INSERT INTO runners (id, tfrrs_id, year_in_school, firstname, lastname, team_id, ability, ability_std, status)
               VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s) ON CONFLICT (id) DO NOTHING;"""
    cursor.execute(query, (runner_data['id'], runner_data['tfrrs_id'], runner_data['year_in_school'], runner_data['firstname'], runner_data['lastname'],
                           runner_data['team']['id'], runner_data['ability'], runner_data['ability_std'], runner_data['status']))
    conn.commit()

In [5]:
def insert_race(race_data):
    query = """INSERT INTO races (id, meet_name, section, importance, date) VALUES (%s, %s, %s, %s, %s)
               ON CONFLICT (id) DO NOTHING;"""
    cursor.execute(query, (race_data['id'], race_data['meet_name'], race_data['section'], race_data['importance'], race_data['date']))
    conn.commit()

In [6]:
def insert_season_rating(season_rating, runner_id):
    query = """
    INSERT INTO season_ratings (runner_id, season_year, race_weight_sig, significant, sig_tic, sig_var)
    VALUES (%s, %s, %s, %s, %s, %s)
    RETURNING id;  -- This will return the generated id
    """
    cursor.execute(query, (
        runner_id,
        season_rating.get('season', {}).get('year'),  
        season_rating.get('race_weight_sig'),
        season_rating.get('significant'),
        season_rating.get('sig_tic'),
        season_rating.get('sig_var')
    ))
    season_rating_id = cursor.fetchone()[0]  # Fetch the generated id
    conn.commit()
    return season_rating_id


In [7]:
def insert_season_xc_performance(xc_performance, season_rating_id):
    query = """INSERT INTO season_xc_performances (id, season_rating_id, time, modern_tic, race_weight_sig, significant, race_id, date, url, meet_name)
               VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s) ON CONFLICT (id) DO NOTHING;"""
    cursor.execute(query, (xc_performance['id'], season_rating_id, xc_performance['time'], xc_performance['modern_tic'],
                           xc_performance['race_weight_sig'], xc_performance['significant'], xc_performance['race']['id'],
                           xc_performance['date'], xc_performance['url'], xc_performance['meet_name']))
    conn.commit()

In [8]:
def insert_season_track_performance(track_performance, season_rating_id):
    query = """INSERT INTO season_track_performances (id, season_rating_id, time, modern_tic, race_weight_sig, significant, race_id, date, url, meet_name)
               VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s) ON CONFLICT (id) DO NOTHING;"""
    cursor.execute(query, (track_performance['id'], season_rating_id, track_performance['time'], track_performance['modern_tic'],
                           track_performance['race_weight_sig'], track_performance['significant'], track_performance['race']['id'],
                           track_performance['date'], track_performance['url'], track_performance['meet_name']))
    conn.commit()

In [9]:
# Function to process a single page of runner data
def process_page(data):
    for runner in data['results']: 
        insert_team(runner['team'])
        insert_runner(runner)
        for season_rating in runner['season_ratings']:
            insert_season_rating(season_rating, runner['id'])
            for xc_performance in season_rating['season_xc_performances']:
                insert_season_xc_performance(xc_performance, season_rating['id'])
            for track_performance in season_rating['season_track_performances']:
                insert_season_track_performance(track_performance, season_rating['id'])
                insert_race(track_performance['race'])

In [10]:
# Main script to retrieve and process data
next_page_url = 'https://c03mmwsf5i.execute-api.us-east-2.amazonaws.com/production/api_ranking/runner_page/'
page_count = 0
max_pages = 10  

while next_page_url and page_count < max_pages:
    response = requests.get(next_page_url)
    if response.status_code == 200:
        data = response.json()
        process_page(data)
        next_page_url = data.get('next')  # Get the next page URL
        page_count += 1
    else:
        print(f'Error with page {page_count + 1}: {response.status_code}')
        break

# Close the database connection
cursor.close()
conn.close()

KeyError: 'id'