In [1]:
import json
import pandas as pd
import ast

In [2]:
#importing the matched rules
with open('C:/Users/USER/Desktop/FYP/fyp-sandbox-2/Notebooks/Rule_Mining/matched_rules.json', 'r') as f:
    rules = json.load(f)

rules

[{'lhs': ['drug_triamcinolone_acetonide'],
  'rhs': ['demo_origin_us'],
  'confidence': 0.9151515151515152,
  'itemset': ['drug_triamcinolone_acetonide', 'demo_origin_us']},
 {'lhs': ['reaction_skin_haemorrhage'],
  'rhs': ['demo_origin_us'],
  'confidence': 0.9437751004016064,
  'itemset': ['reaction_skin_haemorrhage', 'demo_origin_us']},
 {'lhs': ['reaction_exposure_via_skin_contact'],
  'rhs': ['reaction_accidental_exposure_to_product'],
  'confidence': 0.9171597633136095,
  'itemset': ['reaction_exposure_via_skin_contact',
   'reaction_accidental_exposure_to_product']},
 {'lhs': ['reaction_haemarthrosis'],
  'rhs': ['demo_sex_m'],
  'confidence': 0.9576107899807321,
  'itemset': ['reaction_haemarthrosis', 'demo_sex_m']},
 {'lhs': ['reaction_product_dose_omission_in_error'],
  'rhs': ['demo_origin_us'],
  'confidence': 0.9514925373134329,
  'itemset': ['reaction_product_dose_omission_in_error', 'demo_origin_us']},
 {'lhs': ['drug_nexplanon', 'demo_origin_us'],
  'rhs': ['demo_age_bi

In [4]:
print(f'Number of rules mined: {len(rules)}')

Number of rules mined: 5733


We need to find the matching rules to our sepecific requirements first of all. Which are:

✅ At least 1 drug

✅ At least 1 reaction

✅ Demographic keys: demo_sex_, demo_age_bin_, demo_origin_

❌ demo_weight_bin_ is optional

In [5]:
def filter_rules(rules):
    filtered_rules = []
    
    for rule in rules:
        itemset = rule["itemset"]

        # Flags to check required components
        has_drug = any(item.startswith("drug_") for item in itemset)
        has_reaction = any(item.startswith("reaction_") for item in itemset)
        has_sex = any(item.startswith("demo_sex_") for item in itemset)
        has_age_bin = any(item.startswith("demo_age_bin_") for item in itemset)
        has_origin = any(item.startswith("demo_origin_") for item in itemset)

        # At least 5 items and all required parts
        if has_drug and has_reaction and has_sex and has_age_bin and has_origin:
            if len(itemset) >= 5:
                filtered_rules.append({
                    "itemset": itemset,
                    "confidence": rule["confidence"],
                    "lhs": rule["lhs"],
                    "rhs": rule["rhs"]
                })

    return filtered_rules

In [6]:
filtered = filter_rules(rules)
filtered

[{'itemset': ['drug_proactiv_md_deep_cleansing_face_wash',
   'drug_proactiv_md_adapalene_acne_treatment',
   'demo_sex_f',
   'reaction_acne',
   'drug_proactiv_md_daily_oil_control_spf_30',
   'demo_age_bin_teenager',
   'demo_origin_us'],
  'confidence': 0.9532908704883227,
  'lhs': ['drug_proactiv_md_deep_cleansing_face_wash',
   'demo_sex_f',
   'demo_age_bin_teenager',
   'reaction_acne'],
  'rhs': ['drug_proactiv_md_adapalene_acne_treatment',
   'demo_origin_us',
   'drug_proactiv_md_daily_oil_control_spf_30']},
 {'itemset': ['drug_proactiv_md_deep_cleansing_face_wash',
   'drug_proactiv_md_adapalene_acne_treatment',
   'demo_sex_f',
   'reaction_acne',
   'drug_proactiv_md_daily_oil_control_spf_30',
   'demo_age_bin_teenager',
   'demo_origin_us'],
  'confidence': 0.905241935483871,
  'lhs': ['drug_proactiv_md_daily_oil_control_spf_30',
   'demo_sex_f',
   'demo_age_bin_teenager',
   'reaction_acne'],
  'rhs': ['drug_proactiv_md_deep_cleansing_face_wash',
   'drug_proactiv_md_a

In [7]:
print(f'{len(filtered)} number of rules were filtered from the mined rules')

107 number of rules were filtered from the mined rules


There are rules with same itemset as a whole but with LHS and RHS being different. In total here we have 107 rules but there can be same itemsets shared, therefore we will only get the unique itemsets. It won't affect the final output as the RHS & LHS both (antecedent & consequent) shows probabilities and patterns of the same itemset with confidence over 90%

In [20]:
filtered_itemsets = []

for ibdx, rule in enumerate(filtered):
    item_set = rule['itemset']
    # print('{\'item_' + str(ibdx) + "': " + str(item_set) + "}")
    iden = f'item_{ibdx+1}'
    filt_item = str(item_set)
    item_temp = {iden:filt_item}
    filtered_itemsets.append(item_temp)

print(filtered_itemsets)

[{'item_1': "['drug_proactiv_md_deep_cleansing_face_wash', 'drug_proactiv_md_adapalene_acne_treatment', 'demo_sex_f', 'reaction_acne', 'drug_proactiv_md_daily_oil_control_spf_30', 'demo_age_bin_teenager', 'demo_origin_us']"}, {'item_2': "['drug_proactiv_md_deep_cleansing_face_wash', 'drug_proactiv_md_adapalene_acne_treatment', 'demo_sex_f', 'reaction_acne', 'drug_proactiv_md_daily_oil_control_spf_30', 'demo_age_bin_teenager', 'demo_origin_us']"}, {'item_3': "['drug_proactiv_md_deep_cleansing_face_wash', 'drug_proactiv_md_adapalene_acne_treatment', 'demo_sex_f', 'reaction_acne', 'drug_proactiv_md_daily_oil_control_spf_30', 'demo_age_bin_teenager', 'demo_origin_us']"}, {'item_4': "['drug_proactiv_md_deep_cleansing_face_wash', 'drug_proactiv_md_adapalene_acne_treatment', 'demo_sex_f', 'reaction_acne', 'drug_proactiv_md_daily_oil_control_spf_30', 'demo_age_bin_teenager', 'demo_origin_us']"}, {'item_5': "['drug_proactiv_md_deep_cleansing_face_wash', 'drug_proactiv_md_adapalene_acne_treatmen

In [22]:
print(len(filtered_itemsets))

107


In [24]:
#Getting the unique itemsets
flattened = []
for item in filtered_itemsets:
    for key, value in item.items():
        flattened.append({'id': key, 'outcomes': value})

#Create a DataFrame
filtered_rules_df = pd.DataFrame(flattened)
filtered_rules_df

Unnamed: 0,id,outcomes
0,item_1,"['drug_proactiv_md_deep_cleansing_face_wash', ..."
1,item_2,"['drug_proactiv_md_deep_cleansing_face_wash', ..."
2,item_3,"['drug_proactiv_md_deep_cleansing_face_wash', ..."
3,item_4,"['drug_proactiv_md_deep_cleansing_face_wash', ..."
4,item_5,"['drug_proactiv_md_deep_cleansing_face_wash', ..."
...,...,...
102,item_103,"['reaction_drug_dose_omission_by_device', 'dem..."
103,item_104,"['reaction_drug_dose_omission_by_device', 'dem..."
104,item_105,"['reaction_drug_dose_omission_by_device', 'dru..."
105,item_106,"['reaction_drug_dose_omission_by_device', 'dru..."


In [31]:
unq_rules = pd.DataFrame(filtered_rules_df['outcomes'].drop_duplicates())
len(unq_rules)

38

In [35]:
#Converting unique rules into a list

uniq_rules_ls = unq_rules['outcomes'].apply(ast.literal_eval).tolist()
uniq_rules_ls

[['drug_proactiv_md_deep_cleansing_face_wash',
  'drug_proactiv_md_adapalene_acne_treatment',
  'demo_sex_f',
  'reaction_acne',
  'drug_proactiv_md_daily_oil_control_spf_30',
  'demo_age_bin_teenager',
  'demo_origin_us'],
 ['drug_proactiv_md_deep_cleansing_face_wash',
  'demo_sex_f',
  'reaction_acne',
  'drug_proactiv_md_daily_oil_control_spf_30',
  'demo_age_bin_teenager',
  'demo_origin_us'],
 ['drug_proactiv_md_deep_cleansing_face_wash',
  'drug_proactiv_md_adapalene_acne_treatment',
  'demo_sex_f',
  'reaction_acne',
  'demo_age_bin_teenager',
  'demo_origin_us'],
 ['drug_proactiv_md_deep_cleansing_face_wash',
  'demo_sex_f',
  'reaction_acne',
  'demo_age_bin_teenager',
  'demo_origin_us'],
 ['drug_proactiv_md_adapalene_acne_treatment',
  'demo_sex_f',
  'reaction_acne',
  'drug_proactiv_md_daily_oil_control_spf_30',
  'demo_age_bin_teenager',
  'demo_origin_us'],
 ['demo_sex_f',
  'reaction_acne',
  'drug_proactiv_md_daily_oil_control_spf_30',
  'demo_age_bin_teenager',
  'dem

In [36]:
len(uniq_rules_ls)

38

In [37]:
#Importing all the outcomes transactions
outc_tx = pd.read_csv('C:\\Users\\USER\\Desktop\\FYP\\fyp-sandbox-2\\Data\\ARM_DATA/arm_landing_w_outcomes.csv')
outc_tx.head()

Unnamed: 0,primaryid,combined_case_ls
0,193578573,"['drug_canakinumab', 'demo_weight_10-20 kg', '..."
1,186972702,"['demo_weight_20-40 kg', 'demo_sex_f', 'reacti..."
2,186976496,"['reaction_incorrect_dose_administered', 'reac..."
3,187294742,"['drug_lamotrigine', 'demo_weight_20-40 kg', '..."
4,188004601,"['reaction_bone_disorder', 'demo_weight_20-40 ..."


In [38]:
#removing duplicates (if there's any)
outc_tx_dedup = outc_tx.drop_duplicates()
outc_tx_dedup

Unnamed: 0,primaryid,combined_case_ls
0,193578573,"['drug_canakinumab', 'demo_weight_10-20 kg', '..."
1,186972702,"['demo_weight_20-40 kg', 'demo_sex_f', 'reacti..."
2,186976496,"['reaction_incorrect_dose_administered', 'reac..."
3,187294742,"['drug_lamotrigine', 'demo_weight_20-40 kg', '..."
4,188004601,"['reaction_bone_disorder', 'demo_weight_20-40 ..."
...,...,...
184031,192122501,"['demo_origin_us', 'drug_nexplanon', 'demo_sex..."
184032,237422431,"['drug_doxorubicin', 'reaction_off_label_use',..."
184033,220202801,"['demo_origin_us', 'reaction_aggression', 'dru..."
184034,213026962,"['drug_linezolid', 'reaction_off_label_use', '..."


In [39]:
#Converting outcomes into a list
outc_tx_ls = outc_tx_dedup['combined_case_ls'].apply(ast.literal_eval).tolist()
outc_tx_ls

[['drug_canakinumab',
  'demo_weight_10-20 kg',
  'drug_prednisolone',
  'reaction_necrosis',
  'demo_sex_f',
  'reaction_otitis_media',
  'reaction_circulatory_collapse',
  'reaction_pleural_effusion',
  'reaction_renal_failure',
  'reaction_general_physical_condition_abnormal',
  'reaction_sinus_tachycardia',
  'reaction_general_physical_health_deterioration',
  'reaction_serum_ferritin_increased',
  'reaction_haemorrhage',
  'reaction_haemophagocytic_lymphohistiocytosis',
  'reaction_somnolence',
  'outc_de',
  'outc_ho',
  'drug_amoxicillinclavulanic_acid',
  'demo_origin_de',
  'reaction_pyrexia',
  'reaction_bronchospasm',
  'reaction_aspartate_aminotransferase_abnormal',
  'reaction_hypocalcaemia',
  'reaction_blood_lactate_dehydrogenase_increased',
  'drug_cotrim',
  'outc_ot',
  'reaction_hyponatraemia',
  'reaction_blood_uric_acid_increased',
  'reaction_haemoglobin_decreased',
  'demo_age_bin_child',
  'reaction_cardiac_arrest'],
 ['demo_weight_20-40 kg',
  'demo_sex_f',
  '

In [40]:
len(outc_tx_ls)

184036

In [43]:
for idx_m, rule in enumerate(uniq_rules_ls): #iterate the first 38 rules
    item_set = rule

    for idx, outc_tx_i in enumerate(outc_tx_ls):
        if all(item in outc_tx_i for item in item_set):
            if any(item.startswith('outc_') for item in outc_tx_i):
                print(f'itemset_{idx_m+1}: {item_set}') 
                print(f'Outcome_{idx+1}: {outc_tx_i}')

itemset_1: ['drug_proactiv_md_deep_cleansing_face_wash', 'drug_proactiv_md_adapalene_acne_treatment', 'demo_sex_f', 'reaction_acne', 'drug_proactiv_md_daily_oil_control_spf_30', 'demo_age_bin_teenager', 'demo_origin_us']
Outcome_64448: ['demo_origin_us', 'reaction_wheezing', 'demo_weight_above 60 kg', 'reaction_oral_mucosal_blistering', 'drug_proactiv_md_daily_oil_control_spf_30', 'drug_proactiv_redness_relief_serum', 'demo_sex_f', 'drug_proactiv_deep_cleansing_wash', 'reaction_anaphylactic_reaction', 'drug_proactiv_md_deep_cleansing_face_wash', 'reaction_acne', 'reaction_pruritus', 'demo_age_bin_teenager', 'drug_proactiv_green_tea_moisturizer', 'reaction_lip_swelling', 'outc_ot', 'drug_proactiv_md_adapalene_acne_treatment', 'reaction_hypoaesthesia', 'reaction_throat_tightness']
itemset_2: ['drug_proactiv_md_deep_cleansing_face_wash', 'demo_sex_f', 'reaction_acne', 'drug_proactiv_md_daily_oil_control_spf_30', 'demo_age_bin_teenager', 'demo_origin_us']
Outcome_64448: ['demo_origin_us', 

In [None]:
outcomes = [[item for item in sublist if item.startswith('outc_')] for sublist in item_set]


In [47]:
master_outc_extract = []

for idx_m, rule in enumerate(uniq_rules_ls): #iterate the first 38 rules
    item_set = rule

    for idx, outc_tx_i in enumerate(outc_tx_ls):
        if all(item in outc_tx_i for item in item_set):
            if any(item.startswith('outc_') for item in outc_tx_i):
                outcome_type = []
                for i in outc_tx_i:
                    if i.startswith('outc_'):
                        outcome_type.append(i)
                item_temp = {f'itemset_{idx_m+1}' : item_set,
                f'Outcome_{idx+1}': outc_tx_i,
                f'item_case_{idx_m+1}': outcome_type}
                master_outc_extract.append(item_temp)
                # print(f'itemset_{idx_m+1}: {item_set}') 
                # print(f'Outcome_{idx+1}: {outc_tx_i}')
                # print(f'item_case_{idx_m+1}: {outcome_type}')

master_outc_extract

[{'itemset_1': ['drug_proactiv_md_deep_cleansing_face_wash',
   'drug_proactiv_md_adapalene_acne_treatment',
   'demo_sex_f',
   'reaction_acne',
   'drug_proactiv_md_daily_oil_control_spf_30',
   'demo_age_bin_teenager',
   'demo_origin_us'],
  'Outcome_64448': ['demo_origin_us',
   'reaction_wheezing',
   'demo_weight_above 60 kg',
   'reaction_oral_mucosal_blistering',
   'drug_proactiv_md_daily_oil_control_spf_30',
   'drug_proactiv_redness_relief_serum',
   'demo_sex_f',
   'drug_proactiv_deep_cleansing_wash',
   'reaction_anaphylactic_reaction',
   'drug_proactiv_md_deep_cleansing_face_wash',
   'reaction_acne',
   'reaction_pruritus',
   'demo_age_bin_teenager',
   'drug_proactiv_green_tea_moisturizer',
   'reaction_lip_swelling',
   'outc_ot',
   'drug_proactiv_md_adapalene_acne_treatment',
   'reaction_hypoaesthesia',
   'reaction_throat_tightness'],
  'item_case_1': ['outc_ot']},
 {'itemset_2': ['drug_proactiv_md_deep_cleansing_face_wash',
   'demo_sex_f',
   'reaction_acne',

In [48]:
len(master_outc_extract)

238

In [52]:
master_outc_extract

[{'itemset_1': ['drug_proactiv_md_deep_cleansing_face_wash',
   'drug_proactiv_md_adapalene_acne_treatment',
   'demo_sex_f',
   'reaction_acne',
   'drug_proactiv_md_daily_oil_control_spf_30',
   'demo_age_bin_teenager',
   'demo_origin_us'],
  'Outcome_64448': ['demo_origin_us',
   'reaction_wheezing',
   'demo_weight_above 60 kg',
   'reaction_oral_mucosal_blistering',
   'drug_proactiv_md_daily_oil_control_spf_30',
   'drug_proactiv_redness_relief_serum',
   'demo_sex_f',
   'drug_proactiv_deep_cleansing_wash',
   'reaction_anaphylactic_reaction',
   'drug_proactiv_md_deep_cleansing_face_wash',
   'reaction_acne',
   'reaction_pruritus',
   'demo_age_bin_teenager',
   'drug_proactiv_green_tea_moisturizer',
   'reaction_lip_swelling',
   'outc_ot',
   'drug_proactiv_md_adapalene_acne_treatment',
   'reaction_hypoaesthesia',
   'reaction_throat_tightness'],
  'item_case_1': ['outc_ot']},
 {'itemset_2': ['drug_proactiv_md_deep_cleansing_face_wash',
   'demo_sex_f',
   'reaction_acne',

In [53]:
import json

with open("outc_extract_data.json", "w") as f:
    json.dump(master_outc_extract, f, indent=2)