# ICD-10 and CUI

### 2023-06-29

ICD-10 codes downloaded from https://www.cms.gov/medicare/coordination-benefits-recovery-overview/icd-code-lists

In [None]:
# import data
import pandas as pd

# Read the Excel file
excel_file = r'C:\Users\sungh\icd10.xlsx'
icd10 = pd.read_excel(excel_file)

# Convert to CSV
icd10file = r'C:\Users\sungh\icd10.csv'
icd10.to_csv(icd10file, index=False)

In [None]:
# Specify the new column names
new_column_names = {'CODE': 'ICD10', 'SHORT DESCRIPTION (VALID ICD-10 FY2023)': 
                    'Description'}

# Rename the columns in the DataFrame
icd10.rename(columns=new_column_names, inplace=True)

#### Drop unnecessary columns:

In [None]:
# Specify the columns to keep
columns_to_keep = ['ICD10', 'Description']

# Create a new DataFrame with only the desired columns
icd10_updated = icd10[columns_to_keep]

# Print the modified DataFrame
print(icd10_updated.head(11))

### Number of ICD-10-CM Codes: 

In [None]:
# Count the number of rows
num_rows = len(icd10_updated)
print("Number of ICD-10 Codes:", num_rows)

#### Add decimal places to format ICD-10-CM:

In [None]:
def add_decimal(icd_code):
    if len(icd_code) > 3:
        return icd_code[:3] + '.' + icd_code[3:]
    else:
        return icd_code

# Apply the add_decimal function to the 'ICD10CM' column in dataframe
icd10_updated['ICD10_WITH_DECIMAL'] = icd10_updated['ICD10'].apply(add_decimal)

In [None]:
# Create a new DataFrame without 'icd10' column
final_icd10 = icd10_updated.drop('ICD10', axis=1)

# Verify new DataFrame
print(final_icd10.head())

### From https://documentation.uts.nlm.nih.gov/scripts/crosswalk.py

### 06-27 Update: This code below works (from: https://documentation.uts.nlm.nih.gov/scripts/search-terms.py)

### This step will process the entire data, using "Description" column to produce CSV file that matches CUI: 
#### (will take a while)

In [None]:
import requests
import pandas as pd
from math import ceil

def retrieve_cui(apikey, version, icd_codes, batch_size):
    uri = "https://uts-ws.nlm.nih.gov"
    content_endpoint = "/rest/search/" + version
    full_url = uri + content_endpoint
    result_list = []

    total_batches = ceil(len(icd_codes) / batch_size)

    for batch in range(total_batches):
        start_index = batch * batch_size
        end_index = (batch + 1) * batch_size
        batch_codes = icd_codes[start_index:end_index]

        try:
            for icd_code in batch_codes:
                page = 0  # Reset the page counter for each ICD code
                while True:
                    page += 1
                    query = {'string': icd_code, 'apiKey': apikey, 'pageNumber': page}
                    r = requests.get(full_url, params=query)
                    r.raise_for_status()
                    r.encoding = 'utf-8'
                    outputs = r.json()

                    items = outputs['result']['results']

                    if len(items) == 0:
                        if page == 1:
                            print('No results found for ICD code: ' + icd_code + '\n')
                            break
                        else:
                            break

                    print("Results for ICD code: " + icd_code + ", page " + str(page) + "\n")

                    for result in items:
                        cui = result['ui']
                        print('ICD-10: ' + icd_code)
                        print('CUI: ' + cui)
                        print('\n')

                        result_dict = {
                            'ICD-10': icd_code,
                            'CUI': cui,
                        }
                        result_list.append(result_dict)

                    print('*********')

        except Exception as except_error:
            print(except_error)

    # Convert the result_list to a DataFrame
    result_df = pd.DataFrame(result_list, columns=['ICD-10', 'CUI'])

    # Save the DataFrame to a CSV file
    result_df.to_csv('ICD10_CUI_FINAL.csv', index=False)
    print("CSV file saved successfully.")

# Set UMLS API key and version
apikey = "49182007-9896-41a5-aae6-f2cb70d0ca5e"
version = "current"

# ICD codes dataframe
icd_codes = final_icd10['ICD10_WITH_DECIMAL']
batch_size = 200  # Number of codes to process in each batch

# Call retrieve_cui function
retrieve_cui(apikey, version, icd_codes, batch_size)