In [None]:
# Phase 4: Wholesaler vs. Retail Customers

# In this section, we test the "Wholesaler Hypothesis".  
# The business suspects that some customers are "wholesalers" who purchase in bulk, while others are "regular retail buyers".

# We will:
# 1. Calculate each customer's total spending.
# 2. Plot spending distribution using a histogram and boxplot.
# 3. Identify and separate "low spenders (retail customers)"and "high spenders (wholesalers)".
# 4. Suggest different business strategies for these groups.


In [1]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import csv

#load the dataset
df = pd.read_csv('online_retail_cleaned.csv')  

# Group data by customer and calculate total spending
customer_spending = df.groupby('Customer ID')['TotalPrice'].sum().reset_index()

# Rename column for clarity
customer_spending.rename(columns={'TotalPrice': 'TotalSpending'}, inplace=True)

# Quick check
customer_spending.head()

# Use a clean style
sns.set_style("whitegrid")

# Histogram
plt.figure(figsize=(10,6))
sns.histplot(customer_spending['TotalSpending'], bins=100, kde=True, color="#4C72B0")
plt.title("Customer Spending Distribution", fontsize=14, weight='bold')
plt.xlabel("Total Spending (£)")
plt.ylabel("Number of Customers")
plt.show()

# Boxplot
plt.figure(figsize=(10,3))
sns.boxplot(x=customer_spending['TotalSpending'], color="#DD8452")
plt.title("Customer Spending Boxplot", fontsize=14, weight='bold')
plt.xlabel("Total Spending (£)")
plt.show()

# Find threshold for top 10% spenders
threshold = customer_spending['TotalSpending'].quantile(0.90)

# Add a customer type column
customer_spending['CustomerType'] = customer_spending['TotalSpending'].apply(
    lambda x: 'Wholesaler' if x >= threshold else 'Retail'
)

# Count how many in each group
counts = customer_spending['CustomerType'].value_counts()
total_customers = len(customer_spending)
retail_count = counts.get('Retail', 0)
wholesale_count = counts.get('Wholesaler', 0)

# ----- Print Summary -----
print("Findings from Wholesaler Hypothesis\n")
print(f"Total Customers Analyzed: {total_customers}")
print(f"Retail Customers (≈90%): {retail_count}")
print(f"Wholesalers (≈10%): {wholesale_count}\n")

print("Observations")
print("- The spending distribution is highly right-skewed.")
print("- Majority of customers are low-spending Retail buyers.")
print("- A small but significant group of high-spending customers exists (Wholesalers).")
print("- Wholesalers, though fewer in number, contribute a disproportionately large share of total revenue.\n")

print("Business Strategy Recommendations")
print("1. Wholesalers (High-Value Segment)")
print("   - Provide bulk purchase discounts and tiered loyalty rewards.")
print("   - Offer priority services such as faster delivery and dedicated account managers.")
print("   - Focus on long-term contracts and exclusive product offerings to strengthen relationships.\n")

print("2. Retail Customers (Mass Market Segment)")
print("   - Implement seasonal promotions and time-limited discounts to encourage frequent purchases.")
print("   - Use personalized marketing campaigns (email/SMS) based on purchase history.")
print("   - Promote bundled deals (e.g., Buy More, Save More) to increase basket size.")


FileNotFoundError: [Errno 2] No such file or directory: 'online_retail_cleaned.csv'