In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [7]:
# Load datasets
C = pd.read_csv("Customers.csv")
P = pd.read_csv("Products.csv")
T = pd.read_csv("Transactions.csv")

In [9]:
# Merge datasets
df = T.merge(C, on="CustomerID", how="left").merge(P, on="ProductID", how="left")

In [11]:
# Total spending per customer
customer_value = df.groupby(["CustomerID", "Region"])["TotalValue"].sum().reset_index()
customer_value = customer_value.sort_values(by="TotalValue", ascending=False)

In [13]:
# Revenue by region
revenue_by_region = df.groupby("Region")["TotalValue"].sum().sort_values(ascending=False)

In [15]:
# Revenue by product category
revenue_by_category = df.groupby("Category")["TotalValue"].sum().sort_values(ascending=False)

In [17]:
# Sales by day of week
df["TransactionDate"] = pd.to_datetime(df["TransactionDate"])
df["DayOfWeek"] = df["TransactionDate"].dt.day_name()
daywise_sales = df.groupby("DayOfWeek")["TotalValue"].sum().reindex(
    ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
).reset_index()

In [19]:
# Plot: Sales trend by day of week
plt.figure(figsize=(10, 5))
x = daywise_sales["DayOfWeek"]
y = daywise_sales["TotalValue"]
colors = np.where(y.diff() > 0, "green", "red")
for i in range(1, len(x)):
    plt.fill_between([x.iloc[i-1], x.iloc[i]], [y.iloc[i-1], y.iloc[i]], color=colors[i], alpha=0.2)
sns.lineplot(data=daywise_sales, x="DayOfWeek", y="TotalValue", marker="o", color="black")
plt.xlabel("Day of Week")
plt.ylabel("Total Sales Value")
plt.title("Sales Trend by Day of Week")
plt.savefig("sales_by_day.png")
plt.close()

In [21]:
# Plot: Revenue by region
plt.figure(figsize=(8, 6))
sns.barplot(x=revenue_by_region.values, y=revenue_by_region.index, palette="Blues_d")
plt.xlabel("Total Revenue")
plt.ylabel("Region")
plt.title("Revenue by Region")
plt.savefig("revenue_by_region.png")
plt.close()


Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=revenue_by_region.values, y=revenue_by_region.index, palette="Blues_d")


In [23]:
# Plot: Revenue by category
plt.figure(figsize=(8, 6))
sns.barplot(x=revenue_by_category.values, y=revenue_by_category.index, palette="Greens_d")
plt.xlabel("Total Revenue")
plt.ylabel("Category")
plt.title("Revenue by Product Category")
plt.savefig("revenue_by_category.png")
plt.close()


Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=revenue_by_category.values, y=revenue_by_category.index, palette="Greens_d")


In [25]:
# Spending by customer tenure
df["SignupDate"] = pd.to_datetime(df["SignupDate"])
df["TenureDays"] = (df["TransactionDate"] - df["SignupDate"]).dt.days
tenure_spending = df.groupby("TenureDays")["TotalValue"].mean().reset_index()

In [27]:
# Plot: Average spending by tenure
plt.figure(figsize=(10, 5))
sns.scatterplot(data=tenure_spending, x="TenureDays", y="TotalValue", color="purple")
plt.xlabel("Tenure (Days)")
plt.ylabel("Average Spending")
plt.title("Average Spending by Customer Tenure")
plt.savefig("spending_by_tenure.png")
plt.close()