<a href="https://colab.research.google.com/github/mihir0804/TATA_Vertual_Internship/blob/main/TATA_Vertual_Internship.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd

# Reload the Excel file
df = pd.read_excel('/content/Online Retail.xlsx')

# Clean the data
df.dropna(subset=['CustomerID'], inplace=True)
df['Revenue'] = df['Quantity'] * df['UnitPrice']
df['InvoiceDate'] = pd.to_datetime(df['InvoiceDate'])

In [None]:
# Filter for one country to reduce noise (e.g., United Kingdom)
basket = df[df['Country'] == "United Kingdom"].pivot_table(
    index='InvoiceNo',
    columns='Description',
    values='Quantity',
    aggfunc='sum'
).fillna(0)

# Convert quantities to 1 (purchased) or 0 (not purchased)
basket_binary = basket.applymap(lambda x: 1 if x > 0 else 0)

In [None]:
# Filter for UK data
basket = df[df['Country'] == "United Kingdom"].pivot_table(
    index='InvoiceNo',
    columns='Description',
    values='Quantity',
    aggfunc='sum'
).fillna(0)

# Convert to boolean
basket_binary = basket > 0

# Run Apriori
from mlxtend.frequent_patterns import apriori, association_rules

frequent_items = apriori(basket_binary, min_support=0.01, use_colnames=True)
rules = association_rules(frequent_items, metric="lift", min_threshold=1.0)

rules.sort_values('lift', ascending=False).head(10)

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Take top 10 rules sorted by lift
top_rules = rules.sort_values('lift', ascending=False).head(10)

# Plot
plt.figure(figsize=(10, 6))
sns.barplot(x=top_rules['lift'], y=top_rules['antecedents'].apply(lambda x: ', '.join(list(x))))
plt.title('Top 10 Association Rules by Lift')
plt.xlabel('Lift')
plt.ylabel('Antecedent Products')
plt.grid(axis='x')
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(8, 6))
sns.scatterplot(data=rules, x='support', y='confidence', size='lift', hue='lift', palette='cool', sizes=(20, 200))
plt.title('Association Rules: Support vs Confidence (Bubble = Lift)')
plt.xlabel('Support')
plt.ylabel('Confidence')
plt.legend(title='Lift', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
# Convert frozensets to strings for saving
rules_export = rules.copy()
rules_export['antecedents'] = rules_export['antecedents'].apply(lambda x: ', '.join(list(x)))
rules_export['consequents'] = rules_export['consequents'].apply(lambda x: ', '.join(list(x)))

# Export
rules_export.to_csv('association_rules.csv', index=False)

# Download from Colab
from google.colab import files
files.download('association_rules.csv')