In [1]:
import pandas as pd
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
from mlxtend.preprocessing import TransactionEncoder

In [2]:
# Step 1: Reading the dataset from the CSV file
data = pd.read_csv('claims_synthetic_dataset(200000try).csv')

In [3]:
# Step 2: Converting the dataset into a list of transactions
transactions = []
for _, row in data.iterrows():
    cpt_codes = row[1:].tolist() 
    transactions.append([str(cpt_code) for cpt_code in cpt_codes if not pd.isnull(cpt_code)])

In [4]:
# Step 3: Transforming the data into a one-hot encoded format suitable for the Apriori algorithm
te = TransactionEncoder()
one_hot_data = te.fit(transactions).transform(transactions)

In [5]:
# Step 4: Converting the one-hot encoded data into a pandas DataFrame
df = pd.DataFrame(one_hot_data, columns=te.columns_)

In [6]:
print(df)

        99900  99901  99902  99903  99904  99905  99906  99907  99908  99909  \
0       False  False  False  False  False  False  False   True  False  False   
1       False  False  False  False  False  False  False  False   True  False   
2       False  False  False  False  False  False  False  False  False  False   
3       False  False  False  False  False  False  False  False  False  False   
4       False  False  False  False  False  False  False  False   True  False   
...       ...    ...    ...    ...    ...    ...    ...    ...    ...    ...   
199995  False  False  False  False  False  False  False  False  False  False   
199996  False  False  False   True  False  False  False  False  False  False   
199997  False  False  False  False  False   True  False  False  False   True   
199998  False  False  False  False  False   True  False  False  False  False   
199999  False  False  False  False  False  False  False  False  False  False   

        ...  99990  99991  99992  99993

In [8]:
# Step 5: Applying the Apriori algorithm to find frequent itemsets
frequent_itemsets = apriori(df, min_support=0.01, use_colnames=True)

In [9]:
# Step 6: Generating association rules from the frequent itemsets
rules = association_rules(frequent_itemsets, metric='confidence', min_threshold=0.01)

In [10]:
# Step 7: Sorting the association rules by confidence in descending order
rules = rules.sort_values(by='confidence', ascending=False)

In [11]:
def get_suggested_procedures(selected_cpt_codes, rules, num_suggestions):
    # Filter the association rules to include only the selected CPT codes in the antecedents
    filtered_rules = rules[rules['antecedents'].apply(lambda x: set(selected_cpt_codes).issubset(set(x)))]

    # Sort the filtered rules by support in descending order
    filtered_rules = filtered_rules.sort_values(by='support', ascending=False)

    # Retrieve the top N suggested procedures from the consequents
    suggested_procedures = filtered_rules['consequents'].head(num_suggestions).values.tolist()

    return suggested_procedures


In [12]:
print(rules.head())
print(rules.columns)

Empty DataFrame
Columns: [antecedents, consequents, antecedent support, consequent support, support, confidence, lift, leverage, conviction, zhangs_metric]
Index: []
Index(['antecedents', 'consequents', 'antecedent support',
       'consequent support', 'support', 'confidence', 'lift', 'leverage',
       'conviction', 'zhangs_metric'],
      dtype='object')


In [13]:
# Example usage
selected_cpt_codes = ['99913']
num_suggestions = 5
suggested_procedures = get_suggested_procedures(selected_cpt_codes, rules, num_suggestions)
print(suggested_procedures)

KeyError: 'support'