In [1]:
# ==========================================
# Filter Online Retail II (InvoiceNo Genap) & Analisis Apriori + Visualisasi
# ==========================================

# 1. Install library yang diperlukan
!pip install mlxtend openpyxl matplotlib networkx

# 2. Import library
import pandas as pd
import matplotlib.pyplot as plt
import networkx as nx
from google.colab import files

# 3. Upload file
print("Silakan upload file 'online_retail_II.xlsx'")
uploaded = files.upload()

# 4. Baca file Excel
file_path = list(uploaded.keys())[0]
print("Membaca file...")
df = pd.read_excel(file_path)

# 5. Tampilkan nama kolom asli
print("\nNama kolom asli:")
print(df.columns.tolist())

# 6. Pastikan kolom InvoiceNo bertipe string
df['InvoiceNo'] = df['InvoiceNo'].astype(str)

# 7. Filter InvoiceNo genap
df_genap = df[df['InvoiceNo'].str[-1].astype(int) % 2 == 0]

# 8. Simpan dataset hasil filter ke CSV
output_path = "online_retail_II_genap.csv"
df_genap.to_csv(output_path, index=False)
files.download(output_path)
print(f"\nDataset hasil filter disimpan ke: {output_path}")
print(f"Jumlah baris: {len(df_genap)}")

# 9. Informasi algoritma & tujuan
print("\n=== Algoritma & Tujuan Analisis ===")
print("Algoritma: Market Basket Analysis (Apriori Algorithm)")
print("Tujuan: Menemukan aturan asosiasi (association rules) antara produk yang sering dibeli bersamaan.")
print("Contoh output: Jika membeli A, kemungkinan besar akan membeli B.")

# 10. Analisis Apriori
from mlxtend.frequent_patterns import apriori, association_rules

# Membuat tabel basket (InvoiceNo x Product)
basket = (df_genap
          .groupby(['InvoiceNo', 'Description'])['Quantity']
          .sum().unstack().fillna(0))
basket = basket.applymap(lambda x: 1 if x > 0 else 0)

# Jalankan apriori
frequent_itemsets = apriori(basket, min_support=0.02, use_colnames=True)
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1)

# 11. Visualisasi Top 10 aturan asosiasi berdasarkan lift (Bar Chart)
top_rules = rules.sort_values(by='lift', ascending=False).head(10)

plt.figure(figsize=(10, 6))
plt.barh(
    range(len(top_rules)),
    top_rules['lift'],
    color='skyblue'
)
plt.yticks(
    range(len(top_rules)),
    [f"{list(a)[0]} → {list(c)[0]}" for a, c in zip(top_rules['antecedents'], top_rules['consequents'])]
)
plt.xlabel('Lift')
plt.title('Top 10 Aturan Asosiasi Berdasarkan Lift')
plt.gca().invert_yaxis()
plt.show()

# 12. Visualisasi Network Graph
G = nx.DiGraph()

for idx, row in top_rules.iterrows():
    G.add_node(list(row['antecedents'])[0], color='lightblue')
    G.add_node(list(row['consequents'])[0], color='lightgreen')
    G.add_edge(list(row['antecedents'])[0], list(row['consequents'])[0], weight=row['lift'])

plt.figure(figsize=(12, 8))
pos = nx.spring_layout(G, k=0.5, iterations=50)
colors = ['lightblue' if node in [list(r['antecedents'])[0] for _, r in top_rules.iterrows()] else 'lightgreen' for node in G.nodes()]

nx.draw(
    G, pos,
    with_labels=True,
    node_size=2500,
    node_color=colors,
    font_size=10,
    font_weight='bold',
    edge_color='gray',
    arrowsize=20
)

# Tambahkan label untuk edge (lift)
edge_labels = {(list(r['antecedents'])[0], list(r['consequents'])[0]): f"{r['lift']:.2f}" for _, r in top_rules.iterrows()}
nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_color='red')

plt.title("Network Graph Aturan Asosiasi (Top 10 Lift)")
plt.show()


Silakan upload file 'online_retail_II.xlsx'


Saving online_retail_II_light.xlsx to online_retail_II_light.xlsx
Membaca file...

Nama kolom asli:
['Invoice', 'StockCode', 'Description', 'Quantity', 'InvoiceDate', 'Price', 'Customer ID', 'Country']


KeyError: 'InvoiceNo'

In [None]:
from google.colab import drive
drive.mount('/content/drive')