<a href="https://colab.research.google.com/github/tejasprasadd/iGnosis-Task-Tejas/blob/main/iGnosis_Task_Tejas.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [16]:
pip install tabulate pandas



In [24]:
import pandas as pd
from tabulate import tabulate

# Load datasets
transaction_df = pd.read_csv("/content/transaction_data.csv")
transaction_df["DATE"] = pd.to_datetime(transaction_df["DATE"], errors='coerce')
purchase_df = pd.read_csv("/content/purchase_behaviour.csv")

# Remove invalid dates
transaction_df = transaction_df.dropna(subset=["DATE"])
transaction_df["YEAR_MONTH"] = transaction_df["DATE"].dt.to_period("M")

# Identify top 3 bestselling products
product_sales = transaction_df.groupby(["PROD_NBR", "PROD_NAME"]).agg(
    total_sales=("TOT_SALES", "sum"),
    units_sold=("PROD_QTY", "sum")
).reset_index()

top_products = product_sales.sort_values(by="total_sales", ascending=False).head(3)
print("\n" + "="*40)
print(" TOP 3 BESTSELLING PRODUCTS ")
print("="*40)
print(tabulate(top_products, headers="keys", tablefmt="pretty"))

# Monthly sales trends for top products
top_product_ids = top_products["PROD_NBR"].tolist()
monthly_trends = transaction_df[transaction_df["PROD_NBR"].isin(top_product_ids)]
monthly_trends = monthly_trends.groupby(["PROD_NAME", "YEAR_MONTH"])["TOT_SALES"].sum().unstack(0)
print("\n" + "="*40)
print(" MONTHLY SALES TRENDS ")
print("="*40)
print(tabulate(monthly_trends.tail(10).fillna(0), headers="keys", tablefmt="pretty"))

# Customer purchase frequency and spend
customer_spending = transaction_df.groupby("LYLTY_CARD_NBR").agg(
    transactions=("TXN_ID", "count"),
    total_spent=("TOT_SALES", "sum")
).reset_index()

# Identify high-value customers (top 20% spenders)
high_value_customers = customer_spending[customer_spending["total_spent"] > customer_spending["total_spent"].quantile(0.8)]

# Merge with customer demographics
high_value_customers = high_value_customers.merge(purchase_df, on="LYLTY_CARD_NBR", how="left")

# Segment Analysis
segment_analysis = high_value_customers.groupby(["LIFESTAGE", "PREMIUM_CUSTOMER"]).agg(
    customer_count=("LYLTY_CARD_NBR", "nunique"),
    total_spend=("total_spent", "sum")
).reset_index().sort_values(by="total_spend", ascending=False)
print("\n" + "="*40)
print(" SEGMENT ANALYSIS ")
print("="*40)
print(tabulate(segment_analysis.head(5), headers="keys", tablefmt="pretty"))

# Product preference by top segments
segment_transactions = transaction_df.merge(high_value_customers, on="LYLTY_CARD_NBR", how="inner")
product_preference = segment_transactions.groupby(["LIFESTAGE", "PREMIUM_CUSTOMER", "PROD_NAME"]).agg(
    total_sales=("TOT_SALES", "sum")
).reset_index().sort_values(by="total_sales", ascending=False)
print("\n" + "="*40)
print(" PRODUCT PREFERENCE BY TOP SEGMENTS ")
print("="*40)
print(tabulate(product_preference.head(5), headers="keys", tablefmt="pretty"))

# Number of different types of customers
customer_distribution = purchase_df.groupby(["LIFESTAGE", "PREMIUM_CUSTOMER"]).agg(
    num_customers=("LYLTY_CARD_NBR", "nunique")
).reset_index().sort_values(by="num_customers", ascending=False)
print("\n" + "="*40)
print(" CUSTOMER TYPE DISTRIBUTION ")
print("="*40)
print(tabulate(customer_distribution, headers="keys", tablefmt="pretty"))



 TOP 3 BESTSELLING PRODUCTS 
+----+----------+------------------------------------------+--------------------+------------+
|    | PROD_NBR |                PROD_NAME                 |    total_sales     | units_sold |
+----+----------+------------------------------------------+--------------------+------------+
| 3  |    4     |     Dorito Corn Chp     Supreme 380g     |      40352.0       |    6509    |
| 13 |    14    |  Smiths Crnkle Chip  Orgnl Big Bag 380g  |      36367.6       |    6164    |
| 15 |    16    | Smiths Crinkle Chips Salt & Vinegar 330g | 34804.200000000004 |    6106    |
+----+----------+------------------------------------------+--------------------+------------+

 MONTHLY SALES TRENDS 
+------------+----------------------------------+------------------------------------------+----------------------------------------+
| YEAR_MONTH | Dorito Corn Chp     Supreme 380g | Smiths Crinkle Chips Salt & Vinegar 330g | Smiths Crnkle Chip  Orgnl Big Bag 380g |
+------------

# **My Hypothesis**

 **1.Family-Oriented Shoppers (Older & Younger Families)**:

These customers likely prefer products that offer value for money, such as larger pack sizes, bulk discounts, and family-friendly options.
Their purchasing patterns indicate frequent repition of buying  household essentials, snacks for children, and convenient meal solutions.

**2.Premium Customers**:

They are drawn to high-quality or specialty products, such as organic, gourmet, or health-conscious options.
These shoppers are less price-sensitive but value exclusivity, brand reputation, and convenience.

**3.Budget-Conscious Shoppers**:

These customers tend to buy products on promotion, lower-cost alternatives, or store-brand items.
Bulk buying is prevalent among this group to maximize savings over time.

**4.Frequent Shoppers**:

Customers with high transaction frequency are drawn to daily-use consumables like beverages, snacks, and personal care items.
They also take advantage of loyalty discounts and promotional offers, making them more engaged in the store’s marketing strategies.