# Examples Reading Excel Files

References:
- [`pandas.read_excel`](https://pandas.pydata.org/docs/reference/api/pandas.read_excel.html#pandas-read-excel)

In [None]:
import os

import pandas as pd
import numpy as np
from sqlalchemy import create_engine, text
from sqlalchemy.orm import sessionmaker

In [None]:
%%bash

ls -al data/*.xlsx

## Read Single Tab

In [None]:
consolidated_codes = pd.read_excel('data/ReferenceData_MSSPQMs_PY2021.xlsx', 
                                   sheet_name='2021 Codes List', 
                                  )

consolidated_codes.head()

In [None]:
consolidated_codes = pd.read_excel('data/ReferenceData_MSSPQMs_PY2021.xlsx', 
                                   sheet_name='2021 Codes List', 
                                   names=('measure_type', 'measure_indicator_cms_wi', 'variable_name', 
                                          'orig_code_system_name', 'code', 'code_description', 'tab')
                                  )

consolidated_codes = consolidated_codes.astype({'measure_indicator_cms_wi': str, 'code': str})
consolidated_codes.head()

In [None]:
consolidated_codes.shape

In [None]:
"""
Derive: orig_code_system_name to normalized_code_system_name:

LN to LOINC
I9 to ICD-9
SNM to SNOMED CT
I10 to ICD-10
"""
@np.vectorize
def transform_code(code):
    if code == "LN":
        return "LOINC"
    elif code == "I9":
        return "ICD-9"
    elif code == "I10":
        return "ICD-10"
    elif code == "SNM":
        return "SNOMED CT"
    else:
        return code
    
consolidated_codes['normalized_code_system_name'] = transform_code(consolidated_codes['orig_code_system_name'])

In [None]:
consolidated_codes.loc[consolidated_codes['orig_code_system_name'] == 'SNM', :].head()

## Read Single Tab

In [None]:
drug_codes = pd.read_excel('data/ReferenceData_MSSPQMs_PY2021.xlsx', 
                           sheet_name='2021 Drug Codes DM_HTN_PREV', 
                           names=('measure_type', 'measure_indicator_cms_wi', 'variable_name', 'code_system_name', 
                                  'code', 'drug_description', 'drug_category', 'drug_exclusion', 'tab'))

drug_codes = drug_codes.astype({'measure_indicator_cms_wi': str, 'code': str})
drug_codes.head()

In [None]:
drug_codes.shape

In [None]:
drug_codes.dtypes

## Write to SQL

In [None]:
host = os.environ['PG_SERVER']
db = os.environ['PG_DATABASE']
user = os.environ['PG_UID']
pw = os.environ['PG_PASSWORD']

con_str = f'{user}:{pw}@{host}/{db}'
engine = create_engine(f"postgresql+psycopg2://{con_str}", echo=False)

In [None]:
consolidated_codes.to_sql('consolidated_codes_reference_2021', 
                          con=engine, schema='ccda_validation', 
                          if_exists='replace', index=False)

In [None]:
drug_codes.to_sql('drug_codes_reference_2021', 
                  con=engine, schema='ccda_validation', 
                  if_exists='replace', index=False)