In [1]:
import pandas as pd
from collections import defaultdict
import csv
from itertools import combinations

In [2]:
# Loading the dataset
df = pd.read_csv('claims_synthetic_dataset(100000).csv')

In [3]:
# Defining the number of CPT codes per claim
num_cpt_codes = 6

In [4]:
# Function to suggest the most commonly occurring CPT codes in the dataset
def suggest_common_cpts(num_suggestions):
    cpt_counts = defaultdict(int)
    for _, row in df.iterrows():
        cpt_codes = [str(row[f'CPT Code {i+1}']) for i in range(num_cpt_codes)]
        for cpt in cpt_codes:
            cpt_counts[cpt] += 1
    
    sorted_cpt_counts = sorted(cpt_counts.items(), key=lambda x: x[1], reverse=True)
    suggested_cpts = [cpt for cpt, _ in sorted_cpt_counts[:num_suggestions]]
    suggested_cpt_occurrences = [cpt_counts[cpt] for cpt in suggested_cpts]

    return suggested_cpts, suggested_cpt_occurrences

In [5]:
# Suggesting the most commonly occurring CPT codes in the dataset
num_common_suggestions = 10
common_cpts, common_cpt_occurrences = suggest_common_cpts(num_common_suggestions)

In [6]:
# Displaying the results
print(f"\nThe most commonly occurring CPT codes in the dataset are:")
for i, (cpt, occurrences) in enumerate(zip(common_cpts, common_cpt_occurrences), 1):
    print(f" {i}: CPT {cpt} (Occurrences: {occurrences})")


The most commonly occurring CPT codes in the dataset are:
 1: CPT 89579 (Occurrences: 53)
 2: CPT 80386 (Occurrences: 53)
 3: CPT 95564 (Occurrences: 53)
 4: CPT 84314 (Occurrences: 51)
 5: CPT 84179 (Occurrences: 51)
 6: CPT 83860 (Occurrences: 50)
 7: CPT 88551 (Occurrences: 50)
 8: CPT 93402 (Occurrences: 50)
 9: CPT 84140 (Occurrences: 49)
 10: CPT 99565 (Occurrences: 49)


In [7]:
# Function to find the most common CPT codes that occur within the same claim
def find_common_cpt_pairs():
    cpt_pairs = defaultdict(int)
    for _, row in df.iterrows():
        cpt_codes = [str(row[f'CPT Code {i+1}']) for i in range(num_cpt_codes)]
        for i in range(num_cpt_codes):
            for j in range(i+1, num_cpt_codes):
                pair = (cpt_codes[i], cpt_codes[j])
                cpt_pairs[pair] += 1

    sorted_cpt_pairs = sorted(cpt_pairs.items(), key=lambda x: x[1], reverse=True)
    most_common_pairs = sorted_cpt_pairs[:8]  
    return most_common_pairs

In [8]:
# Finding the most common CPT code pairs
most_common_cpt_pairs = find_common_cpt_pairs()

In [9]:
# Displaying the results
print("\nThe most common CPT code pairs that occur within the same claim are:")
for i, ((cpt1, cpt2), occurrences) in enumerate(most_common_cpt_pairs, 1):
    print(f"Pair {i}: CPT {cpt1} and CPT {cpt2} (Occurrences: {occurrences})")


The most common CPT code pairs that occur within the same claim are:
Pair 1: CPT 85245 and CPT 95540 (Occurrences: 3)
Pair 2: CPT 81147 and CPT 85769 (Occurrences: 3)
Pair 3: CPT 85528 and CPT 90108 (Occurrences: 3)
Pair 4: CPT 86050 and CPT 92352 (Occurrences: 3)
Pair 5: CPT 95207 and CPT 92888 (Occurrences: 3)
Pair 6: CPT 91786 and CPT 90964 (Occurrences: 3)
Pair 7: CPT 87474 and CPT 96773 (Occurrences: 3)
Pair 8: CPT 97548 and CPT 85559 (Occurrences: 3)


In [10]:
# Function to suggest multiple sets of three CPT codes that occur within the same claim
def suggest_common_cooccurring_cpts(num_suggestions):
    cpt_counts = defaultdict(int)
    for _, row in df.iterrows():
        cpt_codes = [str(row[f'CPT Code {i+1}']) for i in range(num_cpt_codes)]
        for combo in combinations(cpt_codes, 3):
            cpt_counts[combo] += 1
    
    sorted_cpt_counts = sorted(cpt_counts.items(), key=lambda x: x[1], reverse=True)
    suggested_cpts = [cpt for cpt, _ in sorted_cpt_counts[:num_suggestions]]
    suggested_cpt_occurrences = [cpt_counts[cpt] for cpt in suggested_cpts]

    return suggested_cpts, suggested_cpt_occurrences

In [11]:
# Suggesting multiple sets of three CPT codes that occur in the same row
num_common_suggestions = 5
common_cpts, common_cpt_occurrences = suggest_common_cooccurring_cpts(num_common_suggestions)

In [12]:
# Displaying the results
print(f"\nThe most commonly co-occuring sets of three CPT codes are:")
for i, (cpts, occurrences) in enumerate(zip(common_cpts, common_cpt_occurrences), 1):
    cpt1, cpt2, cpt3 = cpts
    print(f"Suggestion {i}: CPT {cpt1}, CPT {cpt2}, CPT {cpt3} (Occurrences: {occurrences})")


The most commonly co-occuring sets of three CPT codes are:
Suggestion 1: CPT 81147, CPT 84903, CPT 85769 (Occurrences: 2)
Suggestion 2: CPT 83527, CPT 94503, CPT 81097 (Occurrences: 1)
Suggestion 3: CPT 83527, CPT 94503, CPT 82987 (Occurrences: 1)
Suggestion 4: CPT 83527, CPT 94503, CPT 92384 (Occurrences: 1)
Suggestion 5: CPT 83527, CPT 94503, CPT 94392 (Occurrences: 1)


In [13]:
# Function to suggest the most co-occurring next CPT codes based on selected CPT codes
def suggest_next_cpt(selected_cpt_codes, num_suggestions):
    cooccurrence_row_counts = defaultdict(int) 

    for _, row in df.iterrows():
        row_cpt_codes = [str(row[f'CPT Code {i+1}']) for i in range(num_cpt_codes)]
        if set(selected_cpt_codes).issubset(row_cpt_codes):
            for cpt in row_cpt_codes:
                if cpt not in selected_cpt_codes:
                    cooccurrence_row_counts[cpt] += 1

    sorted_next_cpts = sorted(cooccurrence_row_counts.items(), key=lambda x: x[1], reverse=True)
    suggested_cpts = [cpt for cpt, _ in sorted_next_cpts[:num_suggestions]]
    suggested_cpt_occurrences = [cooccurrence_row_counts[cpt] for cpt in suggested_cpts]

    return suggested_cpts, suggested_cpt_occurrences

In [14]:
# Function to check if the selected CPT codes are valid
def is_valid_cpt(cpt_codes):
    for cpt in cpt_codes:
        if not (80000 <= int(cpt) <= 99999):
            return False
    return True

In [15]:
# Example usage
selected_cpt_codes = ['81147','84903']
if not is_valid_cpt(selected_cpt_codes):
    print("Invalid CPT code(s) entered. CPT codes should be in the range of 80000 to 99999.")
else:
    num_suggestions = 3
    suggested_cpts, suggested_cpt_occurrences = suggest_next_cpt(selected_cpt_codes, num_suggestions)

In [16]:
# Displaying the results
print(f"The most co-occurring next CPT codes for {selected_cpt_codes} on the basis of cooccurrences are:")
for i, (cpt, occurrences) in enumerate(zip(suggested_cpts, suggested_cpt_occurrences), 1):
    print(f"Suggestion {i}: CPT {cpt} (Occurrences: {occurrences})")

The most co-occurring next CPT codes for ['81147', '84903'] on the basis of cooccurrences are:
Suggestion 1: CPT 85769 (Occurrences: 2)
Suggestion 2: CPT 91607 (Occurrences: 1)
Suggestion 3: CPT 88311 (Occurrences: 1)


In [17]:
#Verifying the above results
import csv

filename ='claims_synthetic_dataset(100000).csv'

cpt_codes = ['81147','84903','88311']


matching_row_count = 0

with open(filename, 'r') as file:
    reader = csv.DictReader(file)
    for row in reader:
        if all(cpt_code in row.values() for cpt_code in cpt_codes):
            matching_row_count += 1

print(f"Number of rows containing CPT codes {', '.join(cpt_codes)}: {matching_row_count}")

Number of rows containing CPT codes 81147, 84903, 88311: 1
