Lydia Myla
00001612801
CSCI 185
HW 1-2: Programming

In [6]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder
from tabulate import tabulate

order_items_df = pd.read_csv('/olist_order_items_dataset.csv')
products_df = pd.read_csv('/olist_products_dataset.csv')
category_translation_df = pd.read_csv('/product_category_name_translation.csv')

order_items_with_products = order_items_df.merge(products_df, on='product_id', how='left')
order_items_with_products = order_items_with_products.merge(category_translation_df,
                                                           on='product_category_name',
                                                           how='left')

order_items_with_products = order_items_with_products.dropna(subset=['product_category_name_english'])

transactions = order_items_with_products.groupby('order_id')['product_category_name_english'].apply(list).values

te = TransactionEncoder()
te_ary = te.fit_transform(transactions)
transaction_df = pd.DataFrame(te_ary, columns=te.columns_)

minsup_values = [0.00001, 0.00002, 0.000015]
minconf_values = [0.05, 0.1, 0.5]

def find_top_rules(minsup, minconf):
    frequent_itemsets = apriori(transaction_df, min_support=minsup, use_colnames=True)
    if frequent_itemsets.empty:
        return pd.DataFrame()

    num_itemsets = len(frequent_itemsets)
    rules = association_rules(frequent_itemsets, metric="lift", min_threshold=minconf, num_itemsets=num_itemsets)

    if not rules.empty:
        rules = rules.sort_values(by='lift', ascending=False).head(5)
        rules.reset_index(drop=True, inplace=True)
        rules.index += 1
        rules.index.name = 'Rule Number'
        rules['antecedents'] = rules['antecedents'].apply(lambda x: ', '.join(list(x)))
        rules['consequents'] = rules['consequents'].apply(lambda x: ', '.join(list(x)))
        return rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']]
    else:
        return pd.DataFrame()

for minsup in minsup_values:
    for minconf in minconf_values:
        print(f"\nTop 5 Rules: minsup={minsup} and minconf={minconf}:")
        top_rules = find_top_rules(minsup, minconf)
        if not top_rules.empty:
            table = tabulate(top_rules, headers='keys', tablefmt='fancy_grid')
            print(table)
        else:
            print("No rules found for these parameters.")

  and should_run_async(code)



Top 5 Rules: minsup=1e-05 and minconf=0.05:
╒═══════════════╤════════════════════════════════╤═════════════════════════╤═════════════╤══════════════╤═════════╕
│   Rule Number │ antecedents                    │ consequents             │     support │   confidence │    lift │
╞═══════════════╪════════════════════════════════╪═════════════════════════╪═════════════╪══════════════╪═════════╡
│             1 │ market_place                   │ perfume, bed_bath_table │ 1.02821e-05 │   0.00357143 │ 347.343 │
├───────────────┼────────────────────────────────┼─────────────────────────┼─────────────┼──────────────┼─────────┤
│             2 │ perfume, bed_bath_table        │ market_place            │ 1.02821e-05 │   1          │ 347.343 │
├───────────────┼────────────────────────────────┼─────────────────────────┼─────────────┼──────────────┼─────────┤
│             3 │ cool_stuff, telephony          │ cine_photo              │ 1.02821e-05 │   0.166667   │ 249.374 │
├───────────────┼──────────