## Imports

In [None]:
from DataGenClasses import (
    FacilityDataClass,
    ComprehensiveEncounterDataClass,
    PatientVisitDataClass,
    ComprehensiveEncounterMapDataClass,
    PatientLanguageDataClass,
    PatientDisabilityDataClass,
    PatientMaritalDataClass,
    PatientRaceDataClass,
    PatientEthnicityDataClass,
    PatientDiagnosisDataClass,
    PatientVisitPDSCareProviderDataClass,
    PatientIdentDataClass,
    PatientVisitDetailsDataClass,
    FacilityIdentifierDataClass,
    Hl7MappingDataClass,
    DxCodeDataClass,
    )
import pandas as pd
from random import randint
from faker import Faker
from dataclasses import asdict
import os
import duckdb

## Create Fake Data

In [None]:
facility_list = []
comprehensive_encounter_list = []
patient_visit_list = []
comprehensive_encounter_map_list = []
patient_diagnosis_list = []
patient_visit_pds_care_provider_list = []
patient_visit_details_list = []
PatientLanguage_list = []
PatientDisability_list = []
PatientMarital_list = []
PatientRace_list = []
PatientEthnicity_list = []

for facility_id_num in range(20):
    facility = FacilityDataClass(id=facility_id_num)
    facility_list.append(facility.__dict__)

    # Comprehensive Encounter Generation
    for comprehensive_encounter_id in range(1000):
        comprehensive_encounter = ComprehensiveEncounterDataClass(
            facility_id=facility_id_num)
        comprehensive_encounter_list.append(comprehensive_encounter.__dict__)

        patient_visit = PatientVisitDataClass(
            patient_id=comprehensive_encounter.patient_id, 
            facility_id=facility_id_num)
        patient_visit_list.append(patient_visit.__dict__)

        comprehensive_encounter_map = ComprehensiveEncounterMapDataClass(
            comprehensive_encounter_id=comprehensive_encounter.id, 
            patient_visit_id=patient_visit.id)
        comprehensive_encounter_map_list.append(comprehensive_encounter_map.__dict__)

        patient_diagnosis = PatientDiagnosisDataClass(
            patient_visit_id=patient_visit.id)
        patient_diagnosis_list.append(patient_diagnosis.__dict__)

        patient_visit_pds_care_provider = PatientVisitPDSCareProviderDataClass(
            patient_visit_id=patient_visit.id
        )
        patient_visit_pds_care_provider_list.append(patient_visit_pds_care_provider.__dict__)

        patient_visit_details = PatientVisitDetailsDataClass(
            id=patient_visit.id
        )
        patient_visit_details_list.append(patient_visit_details.__dict__)

        PatientLanguage = PatientLanguageDataClass(
            patient_id=patient_visit.patient_id,
            facility_id=patient_visit.facility_id
        )
        PatientLanguage_list.append(PatientLanguage.__dict__)

        PatientDisability = PatientDisabilityDataClass(
            patient_id=patient_visit.patient_id,
            facility_id=patient_visit.facility_id
        )
        PatientDisability_list.append(PatientDisability.__dict__)

        PatientMarital = PatientMaritalDataClass(
            patient_id=patient_visit.patient_id,
            facility_id=patient_visit.facility_id
        )
        PatientMarital_list.append(PatientMarital.__dict__)

        PatientRace = PatientRaceDataClass(
            patient_id=patient_visit.patient_id,
            facility_id=patient_visit.facility_id
        )
        PatientRace_list.append(PatientRace.__dict__)

        PatientEthnicity = PatientEthnicityDataClass(
            patient_id=patient_visit.patient_id,
            facility_id=patient_visit.facility_id
        )
        PatientEthnicity_list.append(PatientEthnicity.__dict__)

## Convert to DF

In [None]:
facility_df = pd.DataFrame.from_records(facility_list)
comprehensive_encounter_df = pd.DataFrame.from_records(comprehensive_encounter_list)
patient_visit_df = pd.DataFrame.from_records(patient_visit_list)
comprehensive_encounter_map_df = pd.DataFrame.from_records(comprehensive_encounter_map_list)
patient_diagnosis_df = pd.DataFrame.from_records(patient_diagnosis_list)
patient_visit_pds_care_provider_df = pd.DataFrame.from_records(patient_visit_pds_care_provider_list)
patient_visit_details_df = pd.DataFrame.from_records(patient_visit_details_list)
PatientLanguage_df = pd.DataFrame.from_records(PatientLanguage_list)
PatientDisability_df = pd.DataFrame.from_records(PatientDisability_list)
PatientMarital_df = pd.DataFrame.from_records(PatientMarital_list)
PatientRace_df = pd.DataFrame.from_records(PatientRace_list)
PatientEthnicity_df = pd.DataFrame.from_records(PatientEthnicity_list)

## Write data to file

In [None]:
os.makedirs("./data/", exist_ok=True)
facility_df.to_parquet("./data/facility.parquet")
comprehensive_encounter_df.to_parquet("./data/comprehensive_encounter.parquet")
patient_visit_df.to_parquet("./data/patient_visit.parquet")
comprehensive_encounter_map_df.to_parquet("./data/comprehensive_encounter_map.parquet")
patient_diagnosis_df.to_parquet("./data/patient_diagnosis.parquet")
patient_visit_pds_care_provider_df.to_parquet("./data/patient_visit_pds_care_provider.parquet")
patient_visit_details_df.to_parquet("./data/patient_visit_details.parquet")
PatientLanguage_df.to_parquet("./data/PatientLanguage.parquet")
PatientDisability_df.to_parquet("./data/PatientDisability.parquet")
PatientMarital_df.to_parquet("./data/PatientMarital.parquet")
PatientRace_df.to_parquet("./data/PatientRace.parquet")
PatientEthnicity_df.to_parquet("./data/PatientEthnicity.parquet")

## Create DuckDB file

In [None]:
table_storage = {
    "f_df": "./data/facility.parquet",
    "ce_df": "./data/comprehensive_encounter.parquet",
    "pv_df": "./data/patient_visit.parquet",
    "cem_df": "./data/comprehensive_encounter_map.parquet",
    "p_dx_df": "./data/patient_diagnosis.parquet",
    "p_v_pcp_df": "./data/patient_visit_pds_care_provider.parquet",
    "pvd_df": "./data/patient_visit_details.parquet",
    "p_lang_df": "./data/PatientLanguage.parquet",
    "p_dis_df": "./data/PatientDisability.parquet",
    "p_mar_df": "./data/PatientMarital.parquet",
    "p_rac_df": "./data/PatientRace.parquet",
    "p_eth_df": "./data/PatientEthnicity.parquet",
}

In [None]:
with duckdb.connect("database.duckdb") as con:
    for table,path in table_storage.items():
        sql = f"CREATE VIEW {table} AS SELECT * FROM parquet_scan('{path}')"
        con.execute(sql)