# Create Mapper Datasets
## 00_create_mappers

| Date | User | Change Type | Remarks |  
| ---- | ---- | ----------- | ------- |
| 02/10/2025   | Adrienne | Created | Created mapper datasets | 

# Content

* [Introduction](#introduction)
* [Load Data](#load-data)
* [Create Mapper Functions](#create-mapper-functions)

## Introduction

The mapper combines the various HCPCS encodings (compiled by Adrienne) in a single dataframe. It expands the coding range from `code_range_start` to `code_range_end` and maps it to the corresponding description.

In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder

## Load Data

In [16]:
path = "../data/raw"
hcpcs_1_map = pd.read_excel(f"{path}/HCPCS Code Ranges.xlsx", sheet_name='HCPCS Level I', dtype=str)
hcpcs_2_map = pd.read_excel(f"{path}/HCPCS Code Ranges.xlsx", sheet_name='HCPCS Level II', dtype=str)
cpt_map = pd.read_excel(f"{path}/HCPCS Code Ranges.xlsx", sheet_name='CPT Category II', dtype=str)

## Create Mapper Functions

In [14]:
def extend_values(val: int, maxlen):
  if len(str(val)) < maxlen:
    return (maxlen - len(str(val))) * "0" + str(val)
  else:
    return str(val)

In [15]:
def create_mapper(raw: pd.DataFrame, dset: ''):
  out = {
    'code': [],
    'category': [],
    'description': []
  }
  for idx, row in raw.iterrows():
    if dset == 'hcpcs_1':
      row['code_range_start_int'] = int(row['code_range_start'])
      row['code_range_end_int'] = int(row['code_range_end'])
      
      value = np.arange(row['code_range_start_int'], row['code_range_end_int']+1)
      value = list(map(lambda x: extend_values(x, 5), value))
      out['code'].append(value)
      out['category'].append(len(value) * ["HCPCS_level_1"])
      out['description'].append(len(value) * [row['range_description']])
    
    elif row['code_range_start'][0].isalpha():
      if pd.isna(row['code_range_end']):
        out['code'].append([row['code_range_start']])
        out['category'].append(["HCPCS_level_2"])
        out['description'].append([row['range_description']])
      else:
        try:
          start_letter = row['code_range_start'][0]
          start_num, end_num = int(row['code_range_start'][1:]), int(row['code_range_end'][1:])
          value = np.arange(start_num, end_num+1)
          value = list(map(lambda x: extend_values(x, 4), value))
          value = [f"{start_letter}{i}" for i in value]
          out['code'].append(value)
          out['category'].append(len(value) * ["HCPCS_level_2"])
          out['description'].append(len(value) * [row['range_description']])
        except:
          pass
    
    elif row['code_range_start'][-1].isalpha():
      try:
        end_letter = row['code_range_start'][-1]
        start_num, end_num = int(row['code_range_start'][:-1]), int(row['code_range_end'][:-1])
        value = np.arange(start_num, end_num+1)
        value = list(map(lambda x: extend_values(x, 4), value))
        value = [f"{i}{end_letter}" for i in value]
        out['code'].append(value)
        out['category'].append(len(value) * ["CPT"])
        out['description'].append(len(value) * [row['range_description']])
      except:
        pass

  out = {k: [i for sublist in v for i in sublist] for k, v in out.items()}
  out = pd.DataFrame(out)
  
  return out

## Create Maps

In [17]:
hcpcs_1_map = create_mapper(hcpcs_1_map, 'hcpcs_1')
hcpcs_2_map = create_mapper(hcpcs_2_map, '')
cpt_map = create_mapper(cpt_map, '')

In [18]:
combined_mapper = pd.concat([hcpcs_1_map, hcpcs_2_map, cpt_map], axis=0)
combined_mapper.head()

Unnamed: 0,code,category,description
0,99201,HCPCS_level_1,Evaluation and Management (E/M) Codes
1,99202,HCPCS_level_1,Evaluation and Management (E/M) Codes
2,99203,HCPCS_level_1,Evaluation and Management (E/M) Codes
3,99204,HCPCS_level_1,Evaluation and Management (E/M) Codes
4,99205,HCPCS_level_1,Evaluation and Management (E/M) Codes


## Save Datasets

In [21]:
mapper_path = "../data/mappers"

hcpcs_1_map.to_pickle(f"{mapper_path}/hcpcs_1_map.pkl")
hcpcs_2_map.to_pickle(f"{mapper_path}/hcpcs_2_map.pkl")
cpt_map.to_pickle(f"{mapper_path}/cpt_map.pkl")
combined_mapper.to_pickle(f"{mapper_path}/combined_mapper.pkl")