In [221]:
import pandas as pd
import numpy as np
import json

In [222]:
data_path = "./data/HealthCare_Gold_datasets/Context/full_role_based_metric_access.json"
with open(data_path, "r") as f:
    data = json.load(f)
df = pd.DataFrame(data)

In [223]:
non_clinical_staff_df = df['non_clinical_staff']['metrics']
manager_df = df['managers']['metrics']
admin_df = df['administrative_staff']['metrics']
## Join non_clinical staff with manager and admin metrics
non_clinical_staff_df = pd.DataFrame(non_clinical_staff_df)
manager_df = pd.DataFrame(manager_df)
admin_df = pd.DataFrame(admin_df)
# Merging non-clinical staff metrics with manager and admin metrics
merged_df = pd.concat([non_clinical_staff_df, manager_df, admin_df], ignore_index=True)
# Display the merged DataFrame
merged_df.head()

Unnamed: 0,Column Name,Data Type,Description,Granularity,Calculation,source_sheet
0,patient_arrival_count_monthly,int64,Total number of patient arrivals in the month,Monthly,Sum of patient arrivals for the month,emergency_department_metrics
1,patient_arrival_count_daily,object,Daily count of patient arrivals (JSON format),Daily (nested),"Aggregated from daily patient logs, may includ...",emergency_department_metrics
2,bed_utilization_rate,float64,Emergency bed utilization rate (%),Monthly,(Beds in Use / Total Beds) * 100,emergency_department_metrics
3,staff_utilization_rate,float64,ED staff utilization rate (%),Monthly,(Staff in Use / Total Staff) * 100,emergency_department_metrics
4,outpatient_visit_count_monthly,int,Total number of outpatient visits in the month,Monthly,Sum of daily outpatient visit counts,outpatient_department_metrics


In [224]:
non_clinical_staff_df['role'] = 'non_clinical_staff'
manager_df['role'] = 'manager'
admin_df['role'] = 'administrative_staff'
# Concatenate the DataFrames
all_roles_df = pd.concat([non_clinical_staff_df, manager_df, admin_df], ignore_index=True)

In [225]:
# add records with table name and "manager" role
new_rows = pd.DataFrame([{'source_sheet': 'departments', 'role': ['manager','administrative_staff']},
                        {'source_sheet': 'staff', 'role': ['manager','administrative_staff']},
                        {'source_sheet': 'hospitals', 'role': ['manager','administrative_staff']}])

# Append the new row to the original DataFrame
all_roles_df = pd.concat([all_roles_df, new_rows], ignore_index=True)

In [226]:
# If any 'role' is a list, explode it so each role is in its own row
all_roles_exploded = all_roles_df.explode('role')

# Now group by 'source_sheet' and get unique roles as a list
unique_combination = all_roles_exploded.groupby('source_sheet')['role'].unique().reset_index()
unique_combination.columns = ['source_sheet', 'roles']

# Display the unique combinations DataFrame
unique_combination

Unnamed: 0,source_sheet,roles
0,administration_metrics,"[manager, administrative_staff]"
1,icu_metrics,"[manager, administrative_staff]"
2,departments,"[manager, administrative_staff]"
3,emergency_department_metrics,"[non_clinical_staff, manager, administrative_s..."
4,finance_billing_metrics,"[non_clinical_staff, manager]"
5,hospitals,"[manager, administrative_staff]"
6,inpatient_ward_metrics,"[manager, administrative_staff]"
7,laboratory_department_metrics,"[non_clinical_staff, manager]"
8,outpatient_department_metrics,"[non_clinical_staff, manager, administrative_s..."
9,patient_experience_metrics,"[manager, administrative_staff]"


In [227]:
keys_req = ["table_name","description","joins"]

table_name_mapping = {
 'departments':'departments',
 'staff':'staff',
 'hospitals':'hospitals',
 'emergency_department_metrics':'emergency_metrics',
 'inpatient_ward_metrics': 'inpatient_metrics',
 'outpatient_department_metrics': 'outpatient_metrics',
 'surgery_department_metrics': 'surgery_metrics',
 'radiology_department_metrics': 'radiology_metrics',
 'laboratory_department_metrics': 'lab_metrics',
 'pharmacy_department_metrics': 'pharmacy_metrics',
 'icu_metrics': 'icu_metrics',
 'administration_metrics': 'hospital_administration_metrics',
 'quality_safety_metrics': 'patient_safety_metrics',
 'finance_billing_metrics' : 'financial_metrics',
 'patient_experience_metrics': 'patient_experience_metrics'}

table_name_mapping_reversed = {
    'departments': 'departments',
    'staff': 'staff',
    'hospitals': 'hospitals',
    'emergency_metrics': 'emergency_department_metrics',
    'inpatient_metrics': 'inpatient_ward_metrics',
    'outpatient_metrics': 'outpatient_department_metrics',
    'surgery_metrics': 'surgery_department_metrics',
    'radiology_metrics': 'radiology_department_metrics',
    'lab_metrics': 'laboratory_department_metrics',
    'pharmacy_metrics': 'pharmacy_department_metrics',
    'icu_metrics': 'icu_metrics',
    'hospital_administration_metrics': 'administration_metrics',
    'patient_safety_metrics': 'quality_safety_metrics',
    'financial_metrics': 'finance_billing_metrics',
    'patient_experience_metrics': 'patient_experience_metrics'
}


table_description_mapping = {
    'departments': 'Information about various departments within the healthcare facility.',
    'staff': 'Details about the healthcare staff including roles and responsibilities.',
    'hospitals': 'Information about different hospitals and their facilities.',
    'emergency_department_metrics': 'Metrics related to the emergency department operations and performance.',
    'inpatient_ward_metrics': 'Metrics related to inpatient ward operations and performance.',
    'outpatient_department_metrics': 'Metrics related to outpatient department operations and performance.',
    'surgery_department_metrics': 'Metrics related to surgery department operations and performance.',
    'radiology_department_metrics': 'Metrics related to radiology department operations and performance.',
    'laboratory_department_metrics': 'Metrics related to laboratory department operations and performance.', 
    'pharmacy_department_metrics': 'Metrics related to pharmacy department operations and performance.',
    'icu_metrics': 'Metrics related to intensive care unit operations and performance.',
    'administration_metrics': 'Metrics related to hospital administration operations and performance.',
    'quality_safety_metrics': 'Metrics related to patient safety and quality of care.',
    'finance_billing_metrics': 'Metrics related to financial operations and billing processes.',
    'patient_experience_metrics': 'Metrics related to patient experience and satisfaction.'
}

In [228]:
non_clinical_meta_path = "./data/HealthCare_Gold_datasets/Context/non_clinical_metrics_metadata.json"
with open(non_clinical_meta_path, "r") as f:
    non_clinical_meta = json.load(f)

In [229]:
departments_df = pd.read_csv("./data/HealthCare_Gold_datasets/Hospital info/Departments.csv")
hospital_df = pd.read_csv("./data/HealthCare_Gold_datasets/Hospital info/Hospitals.csv")
staff_df = pd.read_csv("./data/HealthCare_Gold_datasets/Hospital info/Staff.csv")


# create a dictionary of table name and description
departments_df['role'] = [['manager', 'administrative_staff'] for _ in range(len(departments_df))]
hospital_df['role'] = [['manager', 'administrative_staff'] for _ in range(len(hospital_df))]
staff_df['role'] = [['manager', 'administrative_staff'] for _ in range(len(staff_df))]

# concatenate the three dataframes and convert to a dictionary
dim_tables = {
    "departments": departments_df.to_dict(orient='records'),
    "hospitals": hospital_df.to_dict(orient='records'),
    "staff": staff_df.to_dict(orient='records')
}

# copy all the key values from non_clinical_meta to dim_tables
for key in non_clinical_meta.keys():
    if key not in dim_tables:
        dim_tables[key] = non_clinical_meta[key]

dim_tables


{'departments': [{'department_id': 'DEPT001',
   'hospital_id': 'HOSP001',
   'department_name': 'Emergency Department',
   'floor_number': '1',
   'head_of_department': 'Aaryahi Chad',
   'active_status': True,
   'role': ['manager', 'administrative_staff']},
  {'department_id': 'DEPT002',
   'hospital_id': 'HOSP001',
   'department_name': 'Inpatient Ward',
   'floor_number': '3',
   'head_of_department': 'Shray Korpal',
   'active_status': True,
   'role': ['manager', 'administrative_staff']},
  {'department_id': 'DEPT051',
   'hospital_id': 'HOSP001',
   'department_name': 'Outpatient Department',
   'floor_number': '2',
   'head_of_department': 'Neelofar Kaur',
   'active_status': True,
   'role': ['manager', 'administrative_staff']},
  {'department_id': 'DEPT052',
   'hospital_id': 'HOSP001',
   'department_name': 'Surgery Department',
   'floor_number': '4',
   'head_of_department': 'Ela Master',
   'active_status': True,
   'role': ['manager', 'administrative_staff']},
  {'depar

In [230]:
# dep_cols = departments_df.columns.tolist()
# hosp_cols = hospital_df.columns.tolist()
# staff_cols = staff_df.columns.tolist()
# # concatenate the columns of the three dataframes
# all_cols = dep_cols + hosp_cols + staff_cols
# all_cols

In [231]:
# emergency_metrics_df = pd.read_csv("./data/HealthCare_Gold_datasets/Non-Clinical KPIs/emergency_metrics.csv")
# inpatient_metrics_df = pd.read_csv("./data/HealthCare_Gold_datasets/Non-Clinical KPIs/inpatient_metrics.csv")
# outpatient_metrics_df = pd.read_csv("./data/HealthCare_Gold_datasets/Non-Clinical KPIs/outpatient_metrics.csv")
# surgery_metrics_df = pd.read_csv("./data/HealthCare_Gold_datasets/Non-Clinical KPIs/surgery_metrics.csv")
# radiology_metrics_df = pd.read_csv("./data/HealthCare_Gold_datasets/Non-Clinical KPIs/radiology_metrics.csv")
# lab_metrics_df = pd.read_csv("./data/HealthCare_Gold_datasets/Non-Clinical KPIs/lab_metrics.csv")
# pharmacy_metrics_df = pd.read_csv("./data/HealthCare_Gold_datasets/Non-Clinical KPIs/pharmacy_metrics.csv") 
# icu_metrics_df = pd.read_csv("./data/HealthCare_Gold_datasets/Non-Clinical KPIs/icu_metrics.csv")
# hospital_administration_metrics_df = pd.read_csv("./data/HealthCare_Gold_datasets/Non-Clinical KPIs/hospital_administration_metrics.csv")
# patient_safety_metrics_df = pd.read_csv("./data/HealthCare_Gold_datasets/Non-Clinical KPIs/patient_safety_metrics.csv")
# finance_billing_metrics_df = pd.read_csv("./data/HealthCare_Gold_datasets/Non-Clinical KPIs/financial_metrics.csv")
# patient_experience_metrics_df = pd.read_csv("./data/HealthCare_Gold_datasets/Non-Clinical KPIs/patient_experience_metrics.csv")

# # get the columns of each dataframe
# emergency_cols = emergency_metrics_df.columns.tolist()
# inpatient_cols = inpatient_metrics_df.columns.tolist()      
# outpatient_cols = outpatient_metrics_df.columns.tolist()
# surgery_cols = surgery_metrics_df.columns.tolist()
# radiology_cols = radiology_metrics_df.columns.tolist()
# lab_cols = lab_metrics_df.columns.tolist()
# pharmacy_cols = pharmacy_metrics_df.columns.tolist()
# icu_cols = icu_metrics_df.columns.tolist()
# hospital_administration_cols = hospital_administration_metrics_df.columns.tolist()
# quality_safety_cols = patient_safety_metrics_df.columns.tolist()
# finance_billing_cols = finance_billing_metrics_df.columns.tolist()
# patient_experience_cols = patient_experience_metrics_df.columns.tolist()
# # concatenate the columns of each dataframe
# all_non_clinical_cols = (emergency_cols + inpatient_cols + outpatient_cols + surgery_cols + 
#                          radiology_cols + lab_cols + pharmacy_cols + icu_cols + 
#                          hospital_administration_cols + quality_safety_cols + 
#                          finance_billing_cols + patient_experience_cols)
# all_non_clinical_cols

In [232]:
# # get the columns from departments
# departments_columns = departments_df.columns.tolist()
# hospital_columns = hospital_df.columns.tolist()
# staff_columns = staff_df.columns.tolist()
# # map the department columns with the columns in the all_roles_df to get the roles allowed
# def map_roles_to_columns(columns, all_roles_df):
#     roles_allowed = []
#     for column in columns:
#         roles = all_roles_df[all_roles_df['Column Name'] == column]['role'].values
#         if len(roles) > 0:
#             roles_allowed.append(roles[0])
#         else:
#             roles_allowed.append([])
#     return roles_allowed
# dep_mapped_roles = map_roles_to_columns(departments_columns, all_roles_df)
# hospital_mapped_roles = map_roles_to_columns(hospital_columns, all_roles_df)
# staff_mapped_roles = map_roles_to_columns(staff_columns, all_roles_df)

# print("dep_mapped_roles:",dep_mapped_roles)
# print("hospital_mapped_roles:",hospital_mapped_roles)
# print("staff_mapped_roles:",staff_mapped_roles)


In [233]:
# join_metadata = {
#     "emergency_department_metrics": "departments.department_id",
#     "inpatient_ward_metrics": "departments.department_id → inpatient_metrics.ward_id",
#     "outpatient_department_metrics": "departments.department_id → outpatient_metrics.clinic_id",
#     "surgery_department_metrics": "departments.department_id → surgery_metrics.surgery_type_id",
#     "radiology_department_metrics": None,
#     "laboratory_department_metrics": None,
#     "pharmacy_department_metrics": None,
#     "icu_metrics": "departments.department_id → icu_metrics.icu_id",
#     "administration_metrics": "hospitals.hospital_id",
#     "quality_safety_metrics": "departments.department_id",
#     "finance_billing_metrics": "hospitals.hospital_id",
#     "patient_experience_metrics": "hospitals.hospital_id"
# }

In [234]:
join_metadata = {
    "emergency_department_metrics": [{"departments.department_id": "emergency_metrics.department_id"}],
    "inpatient_ward_metrics": [{"departments.department_id": "inpatient_metrics.ward_id"}],
    "outpatient_department_metrics": [{"departments.department_id": "outpatient_metrics.clinic_id"}],
    "surgery_department_metrics": [],
    "radiology_department_metrics": [],
    "laboratory_department_metrics": [],
    "pharmacy_department_metrics": [],
    "icu_metrics": [{"departments.department_id": "icu_metrics.icu_id"}],
    "administration_metrics": [{"departments.department_id": "hospital_administration_metrics.hospital_id"}],
    "quality_safety_metrics": [{"departments.department_id": "patient_safety_metrics.department_id"}],
    "finance_billing_metrics": [{"hospitals.hospital_id": "financial_metrics.hospital_id"}],
    "patient_experience_metrics": [{"hospitals.hospital_id": "patient_experience_metrics.hospital_id"}],

    "departments": [{"departments.department_id": "emergency_metrics.department_id"},
                    {"departments.department_id": "inpatient_metrics.ward_id"},
                    {"departments.department_id": "outpatient_metrics.clinic_id"},
                    {"departments.department_id": "icu_metrics.icu_id"},
                    {"departments.department_id": "hospital_administration_metrics.hospital_id"},
                    {"departments.department_id": "patient_safety_metrics.department_id"},
                    {"departments.hospital_id": "hospitals.hospital_id"}],
    "hospitals": [{"hospitals.hospital_id": "financial_metrics.hospital_id"},
                   {"hospitals.hospital_id": "patient_experience_metrics.hospital_id"},
                   {"hospitals.hospital_id": "departments.hospital_id"}],
    "staff": [{"staff.hospital_id": "hospitals.hospital_id"}]
}


In [235]:
primary_key_metadata = {
    "emergency_department_metrics": [{"primary_key": ["department_id", "date"]}],
    "inpatient_ward_metrics": [{"primary_key": ["ward_id", "date"]}],
    "outpatient_department_metrics": [{"primary_key": ["clinic_id", "date"]}],
    "surgery_department_metrics": [{"primary_key": ["surgery_type_id", "date"]}],
    "radiology_department_metrics": [{"primary_key": ["radiology_lab_id", "date"]}],
    "laboratory_department_metrics": [{"primary_key": ["lab_id", "date"]}],
    "pharmacy_department_metrics": [{"primary_key": ["pharmacy_id", "date"]}],
    "icu_metrics": [{"primary_key": ["icu_id", "date"]}],
    "administration_metrics": [{"primary_key": ["hospital_id", "date"]}],
    "quality_safety_metrics": [{"primary_key": ["department_id", "date"]}],
    "finance_billing_metrics": [{"primary_key": ["hospital_id", "date"]}],
    "patient_experience_metrics": [{"primary_key": ["hospital_id", "date"]}],

    "departments": [{"primary_key": "department_id"}],
    "hospitals": [{"primary_key": "hospital_id"}],
    "staff": [{"primary_key": "staff_id"}]  # Assumed based on naming
}


In [236]:
foreign_key_metadata = {
    "emergency_department_metrics": [
        {"foreign_key": {"table_name": "departments", "column_name": "department_id"}}
    ],
    "inpatient_ward_metrics": [
        {"foreign_key": {"table_name": "departments", "column_name": "ward_id"}}
    ],
    "outpatient_department_metrics": [
        {"foreign_key": {"table_name": "departments", "column_name": "clinic_id"}}
    ],
    "surgery_department_metrics": [],
    "radiology_department_metrics": [],
    "laboratory_department_metrics": [],
    "pharmacy_department_metrics": [],
    "icu_metrics": [
        {"foreign_key": {"table_name": "departments", "column_name": "icu_id"}}
    ],
    "administration_metrics": [
        {"foreign_key": {"table_name": "departments", "column_name": "hospital_id"}}
    ],
    "quality_safety_metrics": [
        {"foreign_key": {"table_name": "departments", "column_name": "department_id"}}
    ],
    "finance_billing_metrics": [
        {"foreign_key": {"table_name": "hospitals", "column_name": "hospital_id"}}
    ],
    "patient_experience_metrics": [
        {"foreign_key": {"table_name": "hospitals", "column_name": "hospital_id"}}
    ],
    "departments": [
        {"foreign_key": {"table_name": "emergency_metrics", "column_name": "department_id"}},
        {"foreign_key": {"table_name": "inpatient_metrics", "column_name": "ward_id"}},
        {"foreign_key": {"table_name": "outpatient_metrics", "column_name": "clinic_id"}},
        {"foreign_key": {"table_name": "icu_metrics", "column_name": "icu_id"}},
        {"foreign_key": {"table_name": "hospital_administration_metrics", "column_name": "hospital_id"}},
        {"foreign_key": {"table_name": "patient_safety_metrics", "column_name": "department_id"}},
        {"foreign_key": {"table_name": "hospitals", "column_name": "hospital_id"}}
    ],
    "hospitals": [
        {"foreign_key": {"table_name": "financial_metrics", "column_name": "hospital_id"}},
        {"foreign_key": {"table_name": "patient_experience_metrics", "column_name": "hospital_id"}},
        {"foreign_key": {"table_name": "departments", "column_name": "hospital_id"}},
        {"foreign_key": {"table_name": "staff", "column_name": "hospital_id"}}
    ],
    "staff": [
        {"foreign_key": {"table_name": "hospitals", "column_name": "hospital_id"}}
    ]
}


In [237]:
table_metadata = []
for table_key, cols in dim_tables.items():
    table_name_cleaned = table_key.strip()
    meta_info = {key: None for key in keys_req}
    meta_info['table_name'] = table_name_mapping.get(table_name_cleaned)
    meta_info['description'] = table_description_mapping.get(table_name_cleaned)
    joins = join_metadata.get(table_name_cleaned, [])
    if joins:
        join_list = [f"{list(join.keys())[0]} → {list(join.values())[0]}" for join in joins]
        meta_info["joins"] = [", ".join(join_list)]
    else:
        meta_info["joins"] = []
    pk_info = primary_key_metadata.get(table_name_cleaned, [])

    meta_info['primary_key'] = [
        col
        for d in pk_info
        for col in (d["primary_key"] if isinstance(d["primary_key"], list) else [d["primary_key"]])
    ] if pk_info else []


    fk_info = foreign_key_metadata.get(table_name_cleaned, [])
    meta_info['foreign_key'] = [
        {
            "table_name": fk["foreign_key"]["table_name"],
            "column_name": fk["foreign_key"]["column_name"],
            "description": table_description_mapping.get(table_name_mapping_reversed.get(fk["foreign_key"]["table_name"]))
        }
        for fk in fk_info
    ] if fk_info else []
    roles_allowed = unique_combination[(unique_combination['source_sheet'] == table_key)]['roles'].tolist()
    print(f"Roles allowed for {table_key}: {roles_allowed}")
    #make it a simple list
    roles_allowed = [role for sublist in roles_allowed for role in sublist]
    meta_info['managers_allowed'] = "No"
    meta_info['administrative_staff_allowed'] = "No"
    meta_info['non_clinical_staff_allowed'] = "No"
    for role in roles_allowed:
        if role == "non_clinical_staff":
            meta_info['non_clinical_staff_allowed'] = "Yes"
        elif role == "manager":
            meta_info['managers_allowed'] = "Yes"
        elif role == "administrative_staff":
            meta_info['administrative_staff_allowed'] = "Yes"
    
    # if isinstance(meta_info['roles_allowed'], list) and len(meta_info['roles_allowed']) == 1:
    #     meta_info['roles_allowed'] = meta_info['roles_allowed'][0]
    table_metadata.append(meta_info)

print(table_metadata)

Roles allowed for departments: [array(['manager', 'administrative_staff'], dtype=object)]
Roles allowed for hospitals: [array(['manager', 'administrative_staff'], dtype=object)]
Roles allowed for staff: [array(['manager', 'administrative_staff'], dtype=object)]
Roles allowed for emergency_department_metrics: [array(['non_clinical_staff', 'manager', 'administrative_staff'],
      dtype=object)]
Roles allowed for inpatient_ward_metrics: [array(['manager', 'administrative_staff'], dtype=object)]
Roles allowed for outpatient_department_metrics: [array(['non_clinical_staff', 'manager', 'administrative_staff'],
      dtype=object)]
Roles allowed for surgery_department_metrics: [array(['non_clinical_staff', 'manager', 'administrative_staff'],
      dtype=object)]
Roles allowed for radiology_department_metrics: [array(['non_clinical_staff', 'manager', 'administrative_staff'],
      dtype=object)]
Roles allowed for laboratory_department_metrics: [array(['non_clinical_staff', 'manager'], dtype=o

In [238]:
df= pd.DataFrame(table_metadata)
df

Unnamed: 0,table_name,description,joins,primary_key,foreign_key,managers_allowed,administrative_staff_allowed,non_clinical_staff_allowed
0,departments,Information about various departments within t...,[departments.department_id → emergency_metrics...,[department_id],"[{'table_name': 'emergency_metrics', 'column_n...",Yes,Yes,No
1,hospitals,Information about different hospitals and thei...,[hospitals.hospital_id → financial_metrics.hos...,[hospital_id],"[{'table_name': 'financial_metrics', 'column_n...",Yes,Yes,No
2,staff,Details about the healthcare staff including r...,[staff.hospital_id → hospitals.hospital_id],[staff_id],"[{'table_name': 'hospitals', 'column_name': 'h...",Yes,Yes,No
3,emergency_metrics,Metrics related to the emergency department op...,[departments.department_id → emergency_metrics...,"[department_id, date]","[{'table_name': 'departments', 'column_name': ...",Yes,Yes,Yes
4,inpatient_metrics,Metrics related to inpatient ward operations a...,[departments.department_id → inpatient_metrics...,"[ward_id, date]","[{'table_name': 'departments', 'column_name': ...",Yes,Yes,No
5,outpatient_metrics,Metrics related to outpatient department opera...,[departments.department_id → outpatient_metric...,"[clinic_id, date]","[{'table_name': 'departments', 'column_name': ...",Yes,Yes,Yes
6,surgery_metrics,Metrics related to surgery department operatio...,[],"[surgery_type_id, date]",[],Yes,Yes,Yes
7,radiology_metrics,Metrics related to radiology department operat...,[],"[radiology_lab_id, date]",[],Yes,Yes,Yes
8,lab_metrics,Metrics related to laboratory department opera...,[],"[lab_id, date]",[],Yes,No,Yes
9,pharmacy_metrics,Metrics related to pharmacy department operati...,[],"[pharmacy_id, date]",[],Yes,Yes,No


In [239]:
for i, item in enumerate(table_metadata):
    if isinstance(item, dict):
        for key, value in item.items():
            if isinstance(value, np.ndarray):
                item[key] = value.tolist()
    elif isinstance(item, np.ndarray):
        table_metadata[i] = item.tolist()

# Convert any numpy arrays in the DataFrame to lists before dumping to JSON
def convert_ndarray_to_list(x):
    if isinstance(x, np.ndarray):
        return x.tolist()
    return x

df_serializable = df.applymap(convert_ndarray_to_list)

with open("table_level_metadata.json", "w", encoding="utf-8") as f:
    json.dump(df_serializable.to_dict(orient="records"), f, indent=4, ensure_ascii=False)

  df_serializable = df.applymap(convert_ndarray_to_list)
