In [None]:
import logging
from db import DB
from utils.os import load_csv, load_json
from utils.pytorch import seed_everything
from models.pytorch.us_gaap_alignment.ingest import upsert_us_gaap_concepts, insert_ofss_data, upsert_ofss_concept_mappings, upsert_us_gaap_description_variations


db = DB()

In [None]:
import mysql.connector

# IMPORTANT: This script will drop and recreate the given schema and will DELETE ALL DATA!
def reset_schema(db, schema_name: str) -> None:
    """
    Drops and recreates the given schema. WARNING: This will delete all data.

    Parameters:
        schema_name (str): The name of the schema (database) to reset.
    """
    try:
        cursor = db.conn.cursor()

        # Drop the schema
        cursor.execute(f"DROP DATABASE IF EXISTS `{schema_name}`")

        # Recreate the schema
        cursor.execute(f"CREATE DATABASE `{schema_name}`")

        logging.info("Schema '%s' has been reset successfully.", schema_name)

        cursor.close()
    except mysql.connector.Error as e:
        logging.error("Failed to reset schema '%s': %s", schema_name, e)
        raise

confirm = input("⚠️ This will DROP and RECREATE the schema 'us_gaap'. Type 'yes' to proceed: ")

if confirm.strip().lower() == "yes":
    reset_schema(db, "us_gaap")

    # Recreate the database connection
    db = DB()
else:
    print("❌ Cancelled.")

In [None]:
logging.info("Ingesting US GAAP concepts")

csv_file = 'data/2025_GAAP_Concepts.csv'

csv_data = load_csv(csv_file)
upsert_us_gaap_concepts(db, csv_data)

In [None]:
logging.info("Ingesting OFSS schema")

schema_file = '../shared/open_financial_statement_schema.json'

schema_data = load_json(schema_file)
insert_ofss_data(db, schema_data)

In [None]:
logging.info("Ingesting OFSS concept mappings")

csv_file = 'data/with_ofss_ids.csv'

csv_data = load_csv(csv_file)
upsert_ofss_concept_mappings(db, csv_data)

In [None]:
logging.info("Ingesting US GAAP description variations")

csv_file = 'data/us_gaap_2025_description_variations.csv'

csv_data = load_csv(csv_file)
upsert_us_gaap_description_variations(db, csv_data)