In [1]:
# Importing necessary libraries
import random
import csv
import pandas as pd
from itertools import combinations
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

In [2]:
# Function to generate synthetic dataset
def generate_dataset(num_transactions, num_cpt_codes):
    dataset = []
    cpt_codes = list(range(99980, 99999))
    for _ in range(num_transactions):
        transaction = random.sample(cpt_codes, num_cpt_codes)
        dataset.append(transaction)
    return dataset

In [3]:
# Function to save dataset as csv file
def save_dataset_to_csv(dataset, file_path):
    with open(file_path, 'w', newline='') as csv_file:
        writer = csv.writer(csv_file)
        for transaction in dataset:
            writer.writerow(transaction)

In [4]:
#Generating synthetic dataset
num_transactions = 200000
num_cpt_codes = 8
dataset = generate_dataset(num_transactions, num_cpt_codes)

In [5]:
#Displaying dataset
dataset

[[99987, 99985, 99994, 99993, 99990, 99997, 99982, 99991],
 [99997, 99992, 99986, 99996, 99980, 99982, 99987, 99985],
 [99983, 99985, 99980, 99982, 99986, 99987, 99988, 99981],
 [99997, 99980, 99993, 99992, 99996, 99986, 99983, 99990],
 [99984, 99992, 99996, 99988, 99982, 99991, 99994, 99997],
 [99995, 99983, 99997, 99998, 99992, 99989, 99984, 99994],
 [99996, 99991, 99981, 99987, 99986, 99997, 99994, 99992],
 [99981, 99989, 99982, 99985, 99997, 99995, 99987, 99988],
 [99998, 99992, 99993, 99984, 99991, 99986, 99994, 99985],
 [99993, 99985, 99983, 99996, 99998, 99984, 99982, 99981],
 [99981, 99994, 99997, 99988, 99984, 99998, 99993, 99992],
 [99992, 99991, 99986, 99988, 99983, 99990, 99998, 99985],
 [99990, 99986, 99995, 99989, 99981, 99998, 99988, 99980],
 [99985, 99997, 99986, 99990, 99988, 99992, 99995, 99982],
 [99981, 99992, 99993, 99983, 99987, 99994, 99988, 99985],
 [99991, 99988, 99992, 99997, 99980, 99996, 99987, 99983],
 [99996, 99997, 99990, 99988, 99982, 99987, 99993, 99989

In [6]:
# Saving dataset to CSV file
file_path = 'dataset.csv'
save_dataset_to_csv(dataset, file_path)

In [7]:
# Performing market basket analysis
def market_basket_analysis(dataset, min_support, min_confidence):
    te = TransactionEncoder()
    te_ary = te.fit(dataset).transform(dataset)
    df = pd.DataFrame(te_ary, columns=te.columns_)
    
    frequent_itemsets = apriori(df, min_support=min_support, use_colnames=True)
    rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=min_confidence)
    
    return df, rules

In [8]:
min_support = 0.02
min_confidence = 0.02
df, rules = market_basket_analysis(dataset, min_support, min_confidence)

In [9]:
#Displaying binary transaction data
df

Unnamed: 0,99980,99981,99982,99983,99984,99985,99986,99987,99988,99989,99990,99991,99992,99993,99994,99995,99996,99997,99998
0,False,False,True,False,False,True,False,True,False,False,True,True,False,True,True,False,False,True,False
1,True,False,True,False,False,True,True,True,False,False,False,False,True,False,False,False,True,True,False
2,True,True,True,True,False,True,True,True,True,False,False,False,False,False,False,False,False,False,False
3,True,False,False,True,False,False,True,False,False,False,True,False,True,True,False,False,True,True,False
4,False,False,True,False,True,False,False,False,True,False,False,True,True,False,True,False,True,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
199995,True,False,True,False,True,False,False,True,True,True,False,False,True,False,False,False,False,False,True
199996,True,False,True,False,False,False,False,True,False,False,False,True,True,False,True,False,True,False,True
199997,False,True,False,True,False,True,False,False,False,False,False,False,True,True,True,False,False,True,True
199998,False,False,False,True,False,True,True,False,True,True,False,False,False,True,False,True,False,True,False


In [10]:
# Displaying association rules
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(99980),(99981),0.419880,0.420565,0.162025,0.385884,0.917537,-0.014562,0.943527,-0.134141
1,(99981),(99980),0.420565,0.419880,0.162025,0.385256,0.917537,-0.014562,0.943677,-0.134279
2,(99980),(99982),0.419880,0.422485,0.163780,0.390064,0.923261,-0.013613,0.946845,-0.125321
3,(99982),(99980),0.422485,0.419880,0.163780,0.387659,0.923261,-0.013613,0.947380,-0.125815
4,(99980),(99983),0.419880,0.420580,0.162345,0.386646,0.919317,-0.014248,0.944675,-0.131407
...,...,...,...,...,...,...,...,...,...,...
6151,"(99996, 99998)",(99997),0.163800,0.421085,0.057730,0.352442,0.836985,-0.011244,0.893997,-0.188914
6152,"(99997, 99998)",(99996),0.163780,0.421175,0.057730,0.352485,0.836909,-0.011250,0.893918,-0.188997
6153,(99996),"(99997, 99998)",0.421175,0.163780,0.057730,0.137069,0.836909,-0.011250,0.969046,-0.251873
6154,(99997),"(99996, 99998)",0.421085,0.163800,0.057730,0.137098,0.836985,-0.011244,0.969056,-0.251737


In [11]:
#Function to recommend CPT codes based on co-occurrences
def recommend_co_occurring_cpt_codes(input_cpt_codes, rules, top_n=5):
    if isinstance(input_cpt_codes, int):
        input_cpt_codes = [input_cpt_codes]
    
    co_occurring_cpt_codes = {}
    for index, row in rules.iterrows():
        antecedents = set(row['antecedents'])
        consequents = set(row['consequents'])
        
        if set(input_cpt_codes).issubset(antecedents):
            co_occurring = consequents - set(input_cpt_codes)
            for code in co_occurring:
                if code in co_occurring_cpt_codes:
                    co_occurring_cpt_codes[code] = max(co_occurring_cpt_codes[code], row['support'])
                else:
                    co_occurring_cpt_codes[code] = row['support']
    
    sorted_cpt_codes = sorted(co_occurring_cpt_codes.items(), key=lambda x: x[1], reverse=True)
    recommended_cpt_codes = [(code, support) for code, support in sorted_cpt_codes[:top_n]]
    
    return recommended_cpt_codes

In [12]:
#Usage Example
input_cpt_code = 99982, 99994
recommended_codes = recommend_co_occurring_cpt_codes(input_cpt_code, rules)
print("Recommended CPT Codes with Support:")
for code, support in recommended_codes:
    print("CPT Code:", code, "| Support:", support)

Recommended CPT Codes with Support:
CPT Code: 99996 | Support: 0.05835
CPT Code: 99991 | Support: 0.058265
CPT Code: 99984 | Support: 0.058165
CPT Code: 99983 | Support: 0.058045
CPT Code: 99997 | Support: 0.058025
