In [1]:
import pandas as pd
from collections import defaultdict
import re

# Convert to DataFrame for easier processing
df = pd.read_csv('C:\\Users\\17675\\Downloads\\medicine_details.csv')

# Step 1: Define a regex pattern for splitting side effects
def split_side_effects(side_effects):
    # Regular expression to match words starting with uppercase letters (and followed by lowercase letters)
    return re.findall(r'[A-Z][a-z]*\s?[a-z]*', side_effects)

# Step 2: Prepare transactions (compositions and side effects)
transactions = defaultdict(set)  # To hold compositions
side_effects_dict = defaultdict(list)  # To hold side effects

for _, row in df.iterrows():
    # Each medicine is a transaction of components
    composition = row["Composition"].split(' + ')  # Split multiple ingredients
    transactions[row["Medicine Name"]] = composition
    # Store side effects associated with each medicine (using the regex split function)
    side_effects = split_side_effects(row["Side_effects"])
    side_effects_dict[row["Medicine Name"]] = side_effects

In [2]:
import re
from collections import Counter


def count_compositions(composition):

    components = composition.split('+')
    return len(components)


df['Composition Count'] = df['Composition'].apply(count_compositions)


composition_count_stats = df['Composition Count'].value_counts().sort_index()


print("Composition Count Statistics:")
print(composition_count_stats)


Composition Count Statistics:
1    7069
2    3596
3     933
4     150
5      51
6      16
7       7
8       2
9       1
Name: Composition Count, dtype: int64


In [3]:
from itertools import combinations
# Apply A-Priori algorithm
def a_priori(transactions, min_support):
    item_counts = defaultdict(int)
    for transaction in transactions.values():
        for item in transaction:
            item_counts[item] += 1

    num_transactions = len(transactions)
    frequent_itemsets = {item: count / num_transactions for item, count in item_counts.items() if count / num_transactions >= min_support}

    current_itemsets = list(frequent_itemsets.keys())
    all_frequent_itemsets = dict(frequent_itemsets)

    k = 2
    while current_itemsets:
        next_itemsets = defaultdict(int)
        for transaction in transactions.values():
            for itemset in combinations(transaction, k):
                if all(item in transaction for item in itemset):
                    next_itemsets[itemset] += 1
        
        frequent_itemsets_k = {itemset: count / num_transactions for itemset, count in next_itemsets.items() if count / num_transactions >= min_support}
        
        if not frequent_itemsets_k:
            break
        
        all_frequent_itemsets.update(frequent_itemsets_k)
        current_itemsets = list(frequent_itemsets_k.keys())
        k += 1

    return all_frequent_itemsets

# Apply the A-Priori algorithm on the composition data
min_support = 0.001  #  support threshold

frequent_itemsets = a_priori(transactions, min_support)

# Show some of the frequent itemsets
for itemset, support in list(frequent_itemsets.items())[:5]:
    print(f"Itemset: {itemset} -> Support: {support:.4f}")

Itemset: Amoxycillin  (500mg) -> Support: 0.0034
Itemset:  Clavulanic Acid (125mg) -> Support: 0.0047
Itemset: Azithromycin (500mg) -> Support: 0.0025
Itemset: Ambroxol (30mg/5ml) -> Support: 0.0038
Itemset: Levosalbutamol (1mg/5ml) -> Support: 0.0027


In [None]:
from collections import defaultdict

itemset_side_effects = defaultdict(set)  

for medicine_name, components in transactions.items():
    side_effects = side_effects_dict[medicine_name]

    for itemset in frequent_itemsets:

        sorted_itemset = tuple(sorted(itemset)) 

        if all(item in components for item in itemset):
            itemset_side_effects[sorted_itemset].update(side_effects)  


for itemset, side_effects in itemset_side_effects.items():
    print(f"Itemset: {itemset} -> Associated Side Effects: {side_effects}")

Itemset: (' Clavulanic Acid (125mg)', 'Amoxycillin  (500mg)') -> Associated Side Effects: {'Nausea ', 'Diarrhea ', 'Mucocutaneous candidiasis', 'Vomiting '}
Itemset: ('Ambroxol (30mg/5ml)', 'Levosalbutamol (1mg/5ml)') -> Associated Side Effects: {'Stomach pain', 'Bronchitis inflammation', 'Diarrhea ', 'Inflammation of', 'Palpitations ', 'Tremors ', 'Hives ', 'Pharyngitis', 'Rash ', 'Nausea ', 'Allergic reaction', 'Pain ', 'Dizziness ', 'Asthma ', 'Headache ', 'Upset stomach', 'Increased heart', 'Vomiting ', 'Muscle cramp'}
Itemset: ('Ambroxol (30mg/5ml)', 'Guaifenesin (50mg/5ml)') -> Associated Side Effects: {'Palpitations', 'Stomach pain', 'Bronchitis inflammation', 'Diarrhea ', 'Inflammation of', 'Stomach discomfort', 'Palpitations ', 'Tremors ', 'Hives ', 'Pharyngitis', 'Rash ', 'Nausea ', 'Sleepiness', 'Allergic reaction', 'Pain ', 'Dizziness ', 'Asthma ', 'Headache ', 'Upset stomach', 'Increased heart', 'Vomiting ', 'Muscle cramp'}
Itemset: ('Guaifenesin (50mg/5ml)', 'Levosalbutam