In [1]:
import os
from dotenv import load_dotenv
import psycopg2
import requests
from typing import Dict, List
import logging 
# Load the environment variables from .env file
load_dotenv('/home/wjones/CC/Capstone/tbd2/Track/.env', override=True)
logging.basicConfig(filename='db_insert.log', filemode='w', format='%(name)s - %(levelname)s - %(message)s', level=logging.INFO)

# Get the database credentials from environment variables
db_name = os.getenv('DB_NAME')
db_user = os.getenv('DB_USER')
db_password = os.getenv('DB_PASS')
db_host = os.getenv('DB_HOST')
db_port = os.getenv('DB_PORT')

# Connect to the database
db_params = {
    'dbname': db_name,
    'user': db_user,
    'password' : db_password,
    'host' : db_host,
    'port' : db_port
}

In [2]:
def connect_db(db_params):
    """Connect to the PostgreSQL database server."""
    conn = psycopg2.connect(**db_params)
    return conn

In [3]:
def time_to_seconds(time_str):
    if time_str == 'DNF' or not time_str.replace('.', '', 1).replace(':', '', 1).isdigit():
        return None
    min, sec = time_str.split(':')
    sec, millisec = sec.split('.')
    return int(min) * 60 + int(sec) + float(millisec) / 100

In [4]:
def insert_race_result(conn, race_data):
    with conn.cursor() as cur:
        cur.execute("""
            INSERT INTO raceResults (tfrrs_meet_id, tfrrs_id, result, time)
            VALUES (%s, %s, %s, %s)
            ON CONFLICT (tfrrs_meet_id, tfrrs_id)
            DO UPDATE SET result = EXCLUDED.result, time = EXCLUDED.time;
        """, (race_data['tfrrs_meet_id'], race_data['tfrrs_id'], race_data['result'], race_data['time'],))
        conn.commit()

In [9]:
import csv
conn = connect_db(db_params)

with open('csv/raceResults.csv', mode='r') as input_file:
    reader = csv.reader(input_file)
    next(reader)  # Skip the header row
    for row in reader:
        tffrs_id = None
        time_in_seconds = None
        meet_id,athlete_url,place,time = row
        if 'tfrrs' in athlete_url and 'athletes' in athlete_url:
            tffrs_parts = athlete_url.split('/')
            tffrs_id = tffrs_parts[4]  # The TFFRS ID is expected to be the fifth element
        time_in_seconds = time_to_seconds(time)
        if tffrs_id is not None and time_in_seconds is not None:
            race_data = {
                'tfrrs_meet_id': meet_id,
                'tfrrs_id': tffrs_id,
                'result': place,
                'time': time_in_seconds
            }
                #print(race_data)
            insert_race_result(conn, race_data)
        else:
            logging.info(f"Skipping {athlete_url} with time {time}")