In [None]:
import os
import re
import datetime
import psycopg2
import psycopg2.extras
import lxml.etree as ET

# -----------------------------------------------------------------------
# Global Data Lists
# -----------------------------------------------------------------------
FORM_NPX_ROWS = []
INSTITUTIONAL_MANAGER_ROWS = []
SERIES_ROWS = []
PROXY_VOTING_RECORD_ROWS = []
MATTER_CATEGORY_ROWS = []
PROXY_VOTING_RECORD_CATEGORY_ROWS = []
VOTING_RECORD_MANAGER_ROWS = []
VOTING_RECORD_SERIES_ROWS = []

# We'll keep track of categories we discover so we don't insert duplicates repeatedly
KNOWN_CATEGORIES = {}

# -----------------------------------------------------------------------
# Helper Functions
# -----------------------------------------------------------------------

def parse_date(date_string):
    """
    Attempt to parse a date string in multiple formats.
    If out of range or parsing fails, return None.
    """
    if not date_string:
        return None
    
    patterns = ["%m/%d/%Y", "%Y-%m-%d", "%m-%d-%Y", "%Y%m%d"]
    for fmt in patterns:
        try:
            parsed = datetime.datetime.strptime(date_string.strip(), fmt).date()
            # Simple out-of-bounds check: e.g. year between 1800 and 2200
            if 1800 <= parsed.year <= 2200:
                return parsed
            else:
                # If it's valid format but year is out of bounds, return None
                return None
        except ValueError:
            pass
    return None

def get_text(node, xpath_expr):
    """
    Returns the .text of the first match or '' if none.
    """
    result = node.xpath(xpath_expr)
    if result and result[0] is not None and result[0].text:
        return result[0].text.strip()
    return ""

def get_decimal(node, xpath_expr):
    """
    Convert matched element text to float, ignoring commas. Return None if invalid.
    """
    txt = get_text(node, xpath_expr)
    if not txt:
        return None
    try:
        return float(txt.replace(",", ""))
    except ValueError:
        return None

def extract_sec_header_info(file_path):
    """
    Parse <SEC-HEADER> lines: "ACCESSION NUMBER:" and "FILED AS OF DATE:"
    Returns a dict with 'accession_number' (str) and 'date_filed' (date).
    """
    with open(file_path, "r", encoding="utf-8", errors="replace") as f:
        raw = f.read()

    info = {
        "accession_number": "",
        "date_filed": None
    }

    # Accession
    match_acc = re.search(r"ACCESSION\s+NUMBER:\s*([^\r\n]+)", raw, re.IGNORECASE)
    if match_acc:
        info["accession_number"] = match_acc.group(1).strip()

    # FILED AS OF DATE
    match_filed = re.search(r"FILED\s+AS\s+OF\s+DATE:\s*(\d{8})", raw, re.IGNORECASE)
    if match_filed:
        info["date_filed"] = parse_date(match_filed.group(1).strip())

    return info

def parse_edgar_submission(root, sec_header):
    """
    Extract top-level data from <edgarSubmission> for the 'form_npx' table.
    We will not generate our own form_id. Instead, we will return a dict
    with all the fields except the primary key, letting the DB auto-generate.
    """
    data = {
        "reporting_person_name": get_text(root, ".//*[local-name()='reportingPerson']/*[local-name()='name']"),
        "phone_number": get_text(root, ".//*[local-name()='reportingPerson']/*[local-name()='phoneNumber']"),
        "address_street1": get_text(root, ".//*[local-name()='reportingPerson']/*[local-name()='address']/*[local-name()='street1']"),
        "address_street2": get_text(root, ".//*[local-name()='reportingPerson']/*[local-name()='address']/*[local-name()='street2']"),
        "address_city": get_text(root, ".//*[local-name()='reportingPerson']/*[local-name()='address']/*[local-name()='city']"),
        "address_state": "",
        "address_zip": get_text(root, ".//*[local-name()='reportingPerson']/*[local-name()='address']/*[local-name()='zipCode']"),
        "accession_number": sec_header["accession_number"],
        "cik": get_text(root, ".//*[local-name()='issuerCredentials']/*[local-name()='cik']"),
        "conformed_period": parse_date(get_text(root, ".//*[local-name()='periodOfReport']")),
        "date_filed": sec_header["date_filed"],
        "report_type": get_text(root, ".//*[local-name()='reportInfo']/*[local-name()='reportType']") or "FUND VOTING REPORT",
        "form_type": get_text(root, ".//*[local-name()='submissionType']") or "N-PX",
        "sec_file_number": get_text(root, ".//*[local-name()='fileNumber']"),
        "crd_number": get_text(root, ".//*[local-name()='reportingCrdNumber']"),
        "sec_file_number_other": get_text(root, ".//*[local-name()='reportingSecFileNumber']"),
        "lei_number": get_text(root, ".//*[local-name()='leiNumber']"),
        "investment_company_type": get_text(root, ".//*[local-name()='investmentCompanyType']"),
        "confidential_treatment": "N",
        "is_notice_report": False,
        "explanatory_choice": "N",
        "other_included_managers_count": 0,
        "series_count": 0,
        "is_amendment": False,
        "amendment_no": None,
        "amendment_type": None,
        "notice_explanation": None,
        "signatory_name": get_text(root, ".//*[local-name()='signaturePage']/*[local-name()='txSignature']"),
        "signatory_name_printed": get_text(root, ".//*[local-name()='signaturePage']/*[local-name()='txPrintedSignature']"),
        "signatory_title": get_text(root, ".//*[local-name()='signaturePage']/*[local-name()='txTitle']"),
        "signatory_date": parse_date(get_text(root, ".//*[local-name()='signaturePage']/*[local-name()='txAsOfDate']")),
    }

    # Confidential treatment
    conf_treat = get_text(root, ".//*[local-name()='reportInfo']/*[local-name()='confidentialTreatment']").upper()
    if conf_treat in ["Y", "YES", "TRUE", "1"]:
        data["confidential_treatment"] = "Y"

    # Check if the report_type indicates a NOTICE report
    rt_up = data["report_type"].upper()
    if "NOTICE" in rt_up:
        data["is_notice_report"] = True

    # Check explanatory_choice
    expl_choice = get_text(root, ".//*[local-name()='explanatoryInformation']/*[local-name()='explanatoryChoice']").upper()
    if expl_choice in ["Y", "YES", "TRUE", "1"]:
        data["explanatory_choice"] = "Y"

    # If we have an explanatoryNotes field, treat it as "notice_explanation"
    expl_notes = get_text(root, ".//*[local-name()='explanatoryInformation']/*[local-name()='explanatoryNotes']")
    if expl_notes:
        data["notice_explanation"] = expl_notes

    # otherIncludedManagersCount
    oimc = get_text(root, ".//*[local-name()='summaryPage']/*[local-name()='otherIncludedManagersCount']")
    if oimc.isdigit():
        data["other_included_managers_count"] = int(oimc)

    # isAmendment
    is_amd = get_text(root, ".//*[local-name()='amendmentInfo']/*[local-name()='isAmendment']").upper()
    if is_amd in ["Y", "YES", "TRUE", "1"]:
        data["is_amendment"] = True

    # amendment_no
    amd_no = get_text(root, ".//*[local-name()='amendmentInfo']/*[local-name()='amendmentNo']")
    if amd_no.isdigit():
        data["amendment_no"] = int(amd_no)

    # amendment_type
    amd_type = get_text(root, ".//*[local-name()='amendmentInfo']/*[local-name()='amendmentType']")
    if amd_type:
        data["amendment_type"] = amd_type

    # stateOrCountry logic -> put into address_state
    raw_state_country = get_text(root, ".//*[local-name()='reportingPerson']/*[local-name()='address']/*[local-name()='stateOrCountry']")
    if raw_state_country:
        data["address_state"] = raw_state_country

    return data

def parse_institutional_managers(root):
    """
    Extract institutional managers from <summaryPage> -> <otherManagers2> or <otherManager>.
    Return a list of dicts that do NOT include manager_id (auto PK).
    """
    results = []
    manager_nodes = root.xpath(".//*[local-name()='otherManagers2']//*[local-name()='investmentManagers']")
    if not manager_nodes:
        manager_nodes = root.xpath(".//*[local-name()='otherManager']")

    for mn in manager_nodes:
        row_im = {
            "serial_no": None,
            "name": "",
            "form13f_number": "",
            "crd_number": "",
            "sec_file_number": "",
            "lei_number": ""
        }

        sn = mn.xpath(".//*[local-name()='serialNo']/text()")
        if sn and sn[0].isdigit():
            row_im["serial_no"] = int(sn[0])

        nm = mn.xpath(".//*[local-name()='name']/text()")
        if nm:
            row_im["name"] = nm[0].strip()

        f13 = mn.xpath(".//*[local-name()='form13FFileNumber']/text()")
        if f13:
            row_im["form13f_number"] = f13[0].strip()

        crd = mn.xpath(".//*[local-name()='crdNumber']/text()")
        if crd:
            row_im["crd_number"] = crd[0].strip()

        sfn = mn.xpath(".//*[local-name()='secFileNumber']/text()")
        if sfn:
            row_im["sec_file_number"] = sfn[0].strip()

        lei = mn.xpath(".//*[local-name()='leiNumber']/text()")
        if lei:
            row_im["lei_number"] = lei[0].strip()

        results.append(row_im)

    return results

def parse_series_info(root):
    """
    <seriesPage> -> <seriesDetails> -> <seriesReports>
    Returns list of dicts, each with code, name, lei. We'll match to form_id at insertion time.
    """
    series_nodes = root.xpath(".//*[local-name()='seriesReports']")
    results = []
    for sn in series_nodes:
        s_data = {
            "series_code": get_text(sn, ".//*[local-name()='idOfSeries']"),
            "series_name": get_text(sn, ".//*[local-name()='nameOfSeries']"),
            "series_lei": get_text(sn, ".//*[local-name()='leiOfSeries']")
        }
        results.append(s_data)
    return results

def parse_proxy_vote_table(proxy_vote_node):
    """
    Parse each <proxyTable> inside <proxyVoteTable>.
    Return a list of dicts for 'proxy_voting_record' + references to categories, managers, series.
    """
    vote_dicts = []
    cat_links = []  # list of (temp_vote_index, category_string)
    mgr_links = []  # list of (temp_vote_index, serial_no)
    series_links = []  # list of (temp_vote_index, series_code)

    proxy_tables = proxy_vote_node.xpath(".//*[local-name()='proxyTable']")
    for idx, pt in enumerate(proxy_tables):
        row = {
            # We'll get a DB-generated vote_id, so here we just store data for insertion
            "issuer_name": get_text(pt, ".//*[local-name()='issuerName']"),
            "cusip": get_text(pt, ".//*[local-name()='cusip']"),
            "isin": get_text(pt, ".//*[local-name()='isin']"),
            "figi": get_text(pt, ".//*[local-name()='figi']"),
            "meeting_date": parse_date(get_text(pt, ".//*[local-name()='meetingDate']")),
            "vote_description": get_text(pt, ".//*[local-name()='voteDescription']"),
            "proposed_by": get_text(pt, ".//*[local-name()='voteSource']"),
            "shares_voted": get_decimal(pt, ".//*[local-name()='sharesVoted'][1]"),
            "shares_on_loan": get_decimal(pt, ".//*[local-name()='sharesOnLoan'][1]"),
            "vote_cast": None,
            "vote_cast_shares": None,
            "management_rec": None,
            "other_notes": None
        }

        # <voteRecord> optional
        vote_records = pt.xpath(".//*[local-name()='voteRecord']")
        if vote_records:
            vr = vote_records[0]
            row["vote_cast"] = get_text(vr, ".//*[local-name()='howVoted']")
            row["vote_cast_shares"] = get_decimal(vr, ".//*[local-name()='sharesVoted']")
            row["management_rec"] = get_text(vr, ".//*[local-name()='managementRecommendation']")

            if len(vote_records) > 1:
                row["other_notes"] = f"{len(vote_records)} total <voteRecord> items found."

        vote_dicts.append(row)

        # categories
        categories = pt.xpath(".//*[local-name()='voteCategories']//*[local-name()='categoryType']/text()")
        for cat_str in categories:
            cat_clean = cat_str.strip()
            cat_links.append((idx, cat_clean))

        # managers
        other_mgrs = pt.xpath(".//*[local-name()='voteManager']//*[local-name()='otherManager']/text()")
        for mgr_code in other_mgrs:
            mgr_code = mgr_code.strip()
            mgr_links.append((idx, mgr_code))

        # series reference
        vs_code = get_text(pt, ".//*[local-name()='voteSeries']")
        if vs_code:
            series_links.append((idx, vs_code))

    return vote_dicts, cat_links, mgr_links, series_links

def extract_xml_blocks(file_path):
    """
    Return a list of <XML>...</XML> substrings from the file.
    """
    with open(file_path, "r", encoding="utf-8", errors="replace") as f:
        text = f.read()
    pattern = re.compile(r"<XML>(.*?)</XML>", re.IGNORECASE | re.DOTALL)
    return pattern.findall(text)

def parse_xml_fragment(xml_string):
    """
    Attempt to parse an XML fragment with lxml.etree in recovery mode.
    """
    parser = ET.XMLParser(recover=True, encoding="utf-8")
    try:
        root = ET.fromstring(xml_string.encode("utf-8"), parser=parser)
        return root
    except ET.XMLSyntaxError:
        return None

def process_npx_files(folder_path, cursor):
    """
    Main loop:
      1) For each .txt in folder_path
      2) Extract <SEC-HEADER> info
      3) Extract <XML> blocks
      4) Parse <edgarSubmission> (insert into 'form_npx')
      5) Parse managers (insert them referencing the form's PK)
      6) Parse series (insert them referencing the form's PK)
      7) Parse <proxyVoteTable> and insert records
    """
    all_files = os.listdir(folder_path)
    txt_files = [f for f in all_files if f.lower().endswith(".txt")]

    for fname in txt_files:
        file_path = os.path.join(folder_path, fname)
        print(f"\nProcessing: {file_path}")

        # 1) SEC header info
        header_info = extract_sec_header_info(file_path)

        # 2) <XML> blocks
        xml_fragments = extract_xml_blocks(file_path)
        if not xml_fragments:
            print("  No <XML> blocks found.")
            continue

        form_id = None
        found_form_data = False
        managers_for_form = []
        series_for_form = []
        voting_records_for_form = []
        cat_links_for_form = []
        mgr_links_for_form = []
        series_links_for_form = []

        for frag in xml_fragments:
            root = parse_xml_fragment(frag)
            if root is None:
                continue

            # find <edgarSubmission>
            es_nodes = root.xpath("//*[local-name()='edgarSubmission']")
            if es_nodes:
                es = es_nodes[0]
                # parse submission for the form_npx table
                form_row_dict = parse_edgar_submission(es, header_info)

                # Insert into form_npx, capture returned form_id
                insert_query = """
                    INSERT INTO form_npx (
                        reporting_person_name,
                        phone_number,
                        address_street1,
                        address_street2,
                        address_city,
                        address_state,
                        address_zip,
                        accession_number,
                        cik,
                        conformed_period,
                        date_filed,
                        report_type,
                        form_type,
                        sec_file_number,
                        crd_number,
                        sec_file_number_other,
                        lei_number,
                        investment_company_type,
                        confidential_treatment,
                        is_notice_report,
                        explanatory_choice,
                        other_included_managers_count,
                        series_count,
                        is_amendment,
                        amendment_no,
                        amendment_type,
                        notice_explanation,
                        signatory_name,
                        signatory_name_printed,
                        signatory_title,
                        signatory_date
                    )
                    VALUES (
                        %(reporting_person_name)s,
                        %(phone_number)s,
                        %(address_street1)s,
                        %(address_street2)s,
                        %(address_city)s,
                        %(address_state)s,
                        %(address_zip)s,
                        %(accession_number)s,
                        %(cik)s,
                        %(conformed_period)s,
                        %(date_filed)s,
                        %(report_type)s,
                        %(form_type)s,
                        %(sec_file_number)s,
                        %(crd_number)s,
                        %(sec_file_number_other)s,
                        %(lei_number)s,
                        %(investment_company_type)s,
                        %(confidential_treatment)s,
                        %(is_notice_report)s,
                        %(explanatory_choice)s,
                        %(other_included_managers_count)s,
                        %(series_count)s,
                        %(is_amendment)s,
                        %(amendment_no)s,
                        %(amendment_type)s,
                        %(notice_explanation)s,
                        %(signatory_name)s,
                        %(signatory_name_printed)s,
                        %(signatory_title)s,
                        %(signatory_date)s
                    )
                    RETURNING form_id;
                """
                cursor.execute(insert_query, form_row_dict)
                form_id = cursor.fetchone()[0]
                found_form_data = True

                # parse managers
                im_list = parse_institutional_managers(es)
                managers_for_form.extend(im_list)

                # parse series
                s_list = parse_series_info(es)
                series_for_form.extend(s_list)

            # find <proxyVoteTable>
            pvt_nodes = root.xpath("//*[local-name()='proxyVoteTable']")
            if pvt_nodes:
                # If no form row yet, we do a minimal insertion
                if not found_form_data:
                    # Minimal row
                    minimal_form = {
                        "reporting_person_name": "",
                        "phone_number": "",
                        "address_street1": "",
                        "address_street2": "",
                        "address_city": "",
                        "address_state": "",
                        "address_zip": "",
                        "accession_number": header_info["accession_number"],
                        "cik": "",
                        "conformed_period": None,
                        "date_filed": header_info["date_filed"],
                        "report_type": "FUND VOTING REPORT",
                        "form_type": "N-PX",
                        "sec_file_number": "",
                        "crd_number": "",
                        "sec_file_number_other": "",
                        "lei_number": "",
                        "investment_company_type": "",
                        "confidential_treatment": "N",
                        "is_notice_report": False,
                        "explanatory_choice": "N",
                        "other_included_managers_count": 0,
                        "series_count": 0,
                        "is_amendment": False,
                        "amendment_no": None,
                        "amendment_type": None,
                        "notice_explanation": None,
                        "signatory_name": "",
                        "signatory_name_printed": "",
                        "signatory_title": "",
                        "signatory_date": None,
                    }
                    insert_query = """
                        INSERT INTO form_npx (
                            reporting_person_name,
                            phone_number,
                            address_street1,
                            address_street2,
                            address_city,
                            address_state,
                            address_zip,
                            accession_number,
                            cik,
                            conformed_period,
                            date_filed,
                            report_type,
                            form_type,
                            sec_file_number,
                            crd_number,
                            sec_file_number_other,
                            lei_number,
                            investment_company_type,
                            confidential_treatment,
                            is_notice_report,
                            explanatory_choice,
                            other_included_managers_count,
                            series_count,
                            is_amendment,
                            amendment_no,
                            amendment_type,
                            notice_explanation,
                            signatory_name,
                            signatory_name_printed,
                            signatory_title,
                            signatory_date
                        )
                        VALUES (
                            %(reporting_person_name)s,
                            %(phone_number)s,
                            %(address_street1)s,
                            %(address_street2)s,
                            %(address_city)s,
                            %(address_state)s,
                            %(address_zip)s,
                            %(accession_number)s,
                            %(cik)s,
                            %(conformed_period)s,
                            %(date_filed)s,
                            %(report_type)s,
                            %(form_type)s,
                            %(sec_file_number)s,
                            %(crd_number)s,
                            %(sec_file_number_other)s,
                            %(lei_number)s,
                            %(investment_company_type)s,
                            %(confidential_treatment)s,
                            %(is_notice_report)s,
                            %(explanatory_choice)s,
                            %(other_included_managers_count)s,
                            %(series_count)s,
                            %(is_amendment)s,
                            %(amendment_no)s,
                            %(amendment_type)s,
                            %(notice_explanation)s,
                            %(signatory_name)s,
                            %(signatory_name_printed)s,
                            %(signatory_title)s,
                            %(signatory_date)s
                        )
                        RETURNING form_id;
                    """
                    cursor.execute(insert_query, minimal_form)
                    form_id = cursor.fetchone()[0]
                    found_form_data = True

                for pvt in pvt_nodes:
                    votes, cat_links, mgr_links_local, series_links_local = parse_proxy_vote_table(pvt)
                    voting_records_for_form.extend(votes)
                    # note that we store index-based links, then reconcile later
                    # we offset by adding the current length of voting_records_for_form minus len(votes)
                    start_idx = len(voting_records_for_form) - len(votes)
                    for (rel_i, cat_val) in cat_links:
                        cat_links_for_form.append((start_idx + rel_i, cat_val))
                    for (rel_i, mgr_val) in mgr_links_local:
                        mgr_links_for_form.append((start_idx + rel_i, mgr_val))
                    for (rel_i, ser_val) in series_links_local:
                        series_links_for_form.append((start_idx + rel_i, ser_val))

        # Once we've processed all fragments, insert managers -> get manager_id
        for im in managers_for_form:
            insert_mgr = """
                INSERT INTO institutional_manager (
                    form_id,
                    serial_no,
                    name,
                    form13f_number,
                    crd_number,
                    sec_file_number,
                    lei_number
                )
                VALUES (%s, %s, %s, %s, %s, %s, %s)
                RETURNING manager_id;
            """
            cursor.execute(insert_mgr, (
                form_id,
                im["serial_no"],
                im["name"],
                im["form13f_number"],
                im["crd_number"],
                im["sec_file_number"],
                im["lei_number"]
            ))
            new_manager_id = cursor.fetchone()[0]
            # We'll keep a little map if needed
            # key = serial_no, value = manager_id
            # But we must handle None serial_no carefully. Let's skip if None
            if im["serial_no"] is not None:
                KNOWN_CATEGORIES[f"mgr_{form_id}_{im['serial_no']}"] = new_manager_id  # hack re-use dictionary

        # Insert series -> get series_id
        for s in series_for_form:
            insert_series = """
                INSERT INTO series (
                    form_id,
                    series_code,
                    series_name,
                    series_lei
                )
                VALUES (%s, %s, %s, %s)
                RETURNING series_id;
            """
            cursor.execute(insert_series, (
                form_id,
                s["series_code"],
                s["series_name"],
                s["series_lei"]
            ))
            new_series_id = cursor.fetchone()[0]
            # store code->id for linking
            if s["series_code"]:
                KNOWN_CATEGORIES[f"ser_{form_id}_{s['series_code']}"] = new_series_id

        # Insert proxy_voting_record
        vote_id_map = []
        for vrow in voting_records_for_form:
            insert_vote = """
                INSERT INTO proxy_voting_record (
                    form_id,
                    issuer_name,
                    cusip,
                    isin,
                    figi,
                    meeting_date,
                    vote_description,
                    proposed_by,
                    shares_voted,
                    shares_on_loan,
                    vote_cast,
                    vote_cast_shares,
                    management_rec,
                    other_notes
                )
                VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
                RETURNING vote_id;
            """
            cursor.execute(insert_vote, (
                form_id,
                vrow["issuer_name"],
                vrow["cusip"],
                vrow["isin"],
                vrow["figi"],
                vrow["meeting_date"],
                vrow["vote_description"],
                vrow["proposed_by"],
                vrow["shares_voted"],
                vrow["shares_on_loan"],
                vrow["vote_cast"],
                vrow["vote_cast_shares"],
                vrow["management_rec"],
                vrow["other_notes"]
            ))
            new_vote_id = cursor.fetchone()[0]
            vote_id_map.append(new_vote_id)

        # Insert categories if needed
        for (vote_idx, cat_str) in cat_links_for_form:
            # see if we have cat_str in the DB
            # simpler approach: we can do an upsert or first check in Python
            if cat_str not in KNOWN_CATEGORIES:
                # insert into matter_category
                ins_cat = """
                    INSERT INTO matter_category (category_type) VALUES (%s) RETURNING category_id
                """
                cursor.execute(ins_cat, (cat_str,))
                new_cat_id = cursor.fetchone()[0]
                KNOWN_CATEGORIES[cat_str] = new_cat_id
            cat_id = KNOWN_CATEGORIES[cat_str]

            # link
            insert_link = """
                INSERT INTO proxy_voting_record_category (vote_id, category_id)
                VALUES (%s, %s);
            """
            cursor.execute(insert_link, (vote_id_map[vote_idx], cat_id))

        # Insert manager links
        for (vote_idx, mgr_val) in mgr_links_for_form:
            # mgr_val is something like "123" or "ABC"? We only matched numeric serial_nos
            try:
                serial = int(mgr_val)
            except ValueError:
                serial = None
            if serial is not None:
                key_mgr = f"mgr_{form_id}_{serial}"
                if key_mgr in KNOWN_CATEGORIES:
                    manager_id = KNOWN_CATEGORIES[key_mgr]
                    ins_vrm = """
                        INSERT INTO voting_record_manager (vote_id, manager_id) VALUES (%s, %s)
                    """
                    cursor.execute(ins_vrm, (vote_id_map[vote_idx], manager_id))

        # Insert series links
        for (vote_idx, ser_code) in series_links_for_form:
            key_ser = f"ser_{form_id}_{ser_code}"
            if key_ser in KNOWN_CATEGORIES:
                series_id = KNOWN_CATEGORIES[key_ser]
                ins_vrs = """
                    INSERT INTO voting_record_series (vote_id, series_id) VALUES (%s, %s)
                """
                cursor.execute(ins_vrs, (vote_id_map[vote_idx], series_id))

        # Update form series_count
        update_form_count = """
            UPDATE form_npx
            SET series_count = %s
            WHERE form_id = %s
        """
        cursor.execute(update_form_count, (len(series_for_form), form_id))

        print(f"  Inserted form_id {form_id} with {len(managers_for_form)} managers, {len(series_for_form)} series, and {len(voting_records_for_form)} votes.")

def run_all(folder_path, db_host, db_name, db_user, db_password, db_port=5432):
    """
    High-level convenience function:
      1) Connect to PostgreSQL on AWS RDS using psycopg2
      2) Parse the N-PX .txt files
      3) Insert results into the DB
    """
    print("Connecting to the database...")
    conn = psycopg2.connect(
        host=db_host,
        port=db_port,
        dbname=db_name,
        user=db_user,
        password=db_password
    )
    conn.autocommit = False  # We can control transactions

    try:
        with conn.cursor() as cur:
            process_npx_files(folder_path, cur)
        conn.commit()
    except Exception as e:
        conn.rollback()
        print("Error occurred, rolled back transaction:", e)
    finally:
        conn.close()
        print("Connection closed.")
