In [1]:
import pandas as pd

In [2]:
df = pd.read_excel('cptupdated.xlsx')

In [3]:
# Viewing column names
column_names = df.columns
print("Column Names:")
for column in column_names:
    print(column)

Column Names:
Procedure Code Category
CPT Codes
Procedure Code Descriptions
Code Status


In [4]:
# Viewing data of important columns
print("\nData in 'CPT Codes' column:")
print(df['CPT Codes'])

print("\nData in 'Procedure Code Descriptions' column:")
print(df['Procedure Code Descriptions'])


Data in 'CPT Codes' column:
0       34830
1       34831
2       34832
3       35081
4       35082
        ...  
1163    49426
1164    49900
1165    49905
1166    49906
1167    58960
Name: CPT Codes, Length: 1168, dtype: object

Data in 'Procedure Code Descriptions' column:
0       Open repair of infrarenal aortic aneurysm or d...
1       Open repair of infrarenal aortic aneurysm or d...
2       Open repair of infrarenal aortic aneurysm or d...
3       Direct repair of aneurysm, pseudoaneurysm, or ...
4       Direct repair of aneurysm, pseudoaneurysm, or ...
                              ...                        
1163                  Revision of peritoneal-venous shunt
1164    Suture, secondary, of abdominal wall for evisc...
1165    Omental flap, intra-abdominal (List separately...
1166     Free omental flap with microvascular anastomosis
1167    Laparotomy, for staging or restaging of ovaria...
Name: Procedure Code Descriptions, Length: 1168, dtype: object


In [5]:
# Preprocessing the data
df['CPT Codes'] = df['CPT Codes'].astype(str)
df['Procedure Code Descriptions'] = df['Procedure Code Descriptions'].fillna('')

In [6]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [7]:
# Extracting the features
vectorizer = TfidfVectorizer()
description_vectors = vectorizer.fit_transform(df['Procedure Code Descriptions'])

In [8]:
# Training a recommendation model
cosine_sim_matrix = cosine_similarity(description_vectors)

In [9]:
# Function to recommend CPT codes based on input CPT code
def recommend_cpt(input_cpt, top_n=2):
    recommended_cpts = []
    if input_cpt in df['CPT Codes'].values:
        index = df[df['CPT Codes'] == input_cpt].index[0]
        similarities = cosine_sim_matrix[index]
        top_indices = similarities.argsort()[-top_n-1:-1][::-1] 
        recommended_cpt_codes = df.loc[top_indices, 'CPT Codes'].values
        recommended_cpt_names = df.loc[top_indices, 'Procedure Code Descriptions'].values
        similarity_scores = similarities[top_indices]
        recommended_cpts = list(zip(recommended_cpt_codes, recommended_cpt_names, similarity_scores))
    return recommended_cpts

In [10]:
# Example recommending CPTs for given CPT
input_cpt = '34830'
input_cpt_description = df.loc[df['CPT Codes'] == input_cpt, 'Procedure Code Descriptions'].values[0]

recommendations = recommend_cpt(input_cpt)

print(f"Given CPT: {input_cpt} CPT code: {input_cpt_description}\n")
print("Recommended CPTs:")
for cpt_code, cpt_name, similarity_score in recommendations:
    print(f"{cpt_code}: {cpt_name}")
    print(f"Similarity Score: {similarity_score}")
    print()

Given CPT: 34830 CPT code: Open repair of infrarenal aortic aneurysm or dissection, plus repair of associated arterial trauma, following unsuccessful endovascular repair; tube prosthesis  

Recommended CPTs:
34832: Open repair of infrarenal aortic aneurysm or dissection, plus repair of associated arterial trauma, following unsuccessful endovascular repair; aorto-bifemoral prosthesis  
Similarity Score: 0.8943709370003284

34831: Open repair of infrarenal aortic aneurysm or dissection, plus repair of associated arterial trauma, following unsuccessful endovascular repair; aorto-bi-iliac prosthesis 
Similarity Score: 0.8675368098373953



In [11]:
# Function to recommend CPT codes based on input CPT codes
def recommend_cpts_multiple_input(input_cpts, top_n=2):
    recommended_cpts = []
    for input_cpt in input_cpts:
        if input_cpt in df['CPT Codes'].values:
            index = df[df['CPT Codes'] == input_cpt].index[0]
            similarities = cosine_sim_matrix[index]
            top_indices = similarities.argsort()[-top_n-1:-1][::-1] 
            recommended_cpt_codes = df.loc[top_indices, 'CPT Codes'].values
            recommended_cpt_names = df.loc[top_indices, 'Procedure Code Descriptions'].values
            similarity_scores = similarities[top_indices]
            recommended_cpts.append(list(zip(recommended_cpt_codes, recommended_cpt_names, similarity_scores)))
    return recommended_cpts

In [12]:
# Example recommending CPTs for given CPTs
input_cpts = ['34830', '35082']
input_cpt_descriptions = df.loc[df['CPT Codes'].isin(input_cpts), 'Procedure Code Descriptions'].values

recommendations = recommend_cpts_multiple_input(input_cpts)

for input_cpt, input_cpt_description, cpt_recommendations in zip(input_cpts, input_cpt_descriptions, recommendations):
    print(f"\nGiven CPT: {input_cpt} CPT code: {input_cpt_description}\n")
    print("Recommended CPTs:")
    for cpt_code, cpt_name, similarity_score in cpt_recommendations:
        print(f"{cpt_code}: {cpt_name}")
        print(f"Similarity Score: {similarity_score}")
        print()


Given CPT: 34830 CPT code: Open repair of infrarenal aortic aneurysm or dissection, plus repair of associated arterial trauma, following unsuccessful endovascular repair; tube prosthesis  

Recommended CPTs:
34832: Open repair of infrarenal aortic aneurysm or dissection, plus repair of associated arterial trauma, following unsuccessful endovascular repair; aorto-bifemoral prosthesis  
Similarity Score: 0.8943709370003284

34831: Open repair of infrarenal aortic aneurysm or dissection, plus repair of associated arterial trauma, following unsuccessful endovascular repair; aorto-bi-iliac prosthesis 
Similarity Score: 0.8675368098373953


Given CPT: 35082 CPT code: Direct repair of aneurysm, pseudoaneurysm, or excision (partial or total) and graft insertion, with or without patch graft; for ruptured aneurysm, abdominal aorta  

Recommended CPTs:
35081: Direct repair of aneurysm, pseudoaneurysm, or excision (partial or total) and graft insertion, with or without patch graft; for aneurysm, 