In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from fpdf import FPDF

# Load datasets
customers = pd.read_csv("/content/Customers.csv")
products = pd.read_csv("/content/Products.csv")
transactions = pd.read_csv("/content/Transactions.csv")

# Check for missing values and data types
print("Missing values:\n", customers.isnull().sum())
print("\nData types:\n", customers.dtypes)
print("\nMissing values:\n", products.isnull().sum())
print("\nData types:\n", products.dtypes)
print("\nMissing values:\n", transactions.isnull().sum())
print("\nData types:\n", transactions.dtypes)

# Merge datasets
merged = transactions.merge(customers, on='CustomerID').merge(products, on='ProductID')

# Customer distribution by region
plt.figure(figsize=(8, 5))
sns.countplot(y=customers['Region'], order=customers['Region'].value_counts().index, palette='viridis')
plt.title("Customer Distribution by Region")
plt.xlabel("Count")
plt.ylabel("Region")
plt.savefig("/content/customer_region.png")
plt.show()

# Top-selling products
top_products = merged['ProductName'].value_counts().head(10)
top_products.plot(kind='bar', figsize=(10, 5), color='skyblue')
plt.title("Top 10 Selling Products")
plt.xlabel("Product Name")
plt.ylabel("Count")
plt.xticks(rotation=45)
plt.savefig("/content/top_products.png")
plt.show()

# Transactions over time
merged['TransactionDate'] = pd.to_datetime(merged['TransactionDate'])
transactions_over_time = merged.groupby(merged['TransactionDate'].dt.to_period("M")).size()
transactions_over_time.plot(kind='line', figsize=(12, 5), marker='o', color='red')
plt.title("Transactions Trend Over Time")
plt.xlabel("Month")
plt.ylabel("Number of Transactions")
plt.grid()
plt.savefig("/content/transactions_trend.png")
plt.show()

# Customer spending pattern
customer_spending = merged.groupby('CustomerID')['TotalValue'].sum().sort_values(ascending=False).head(10)
customer_spending.plot(kind='bar', figsize=(10, 5), color='green')
plt.title("Top 10 Customers by Total Spending")
plt.xlabel("Customer ID")
plt.ylabel("Total Spending")
plt.xticks(rotation=45)
plt.savefig("/content/top_customers.png")
plt.show()

# Generate PDF Report
pdf = FPDF()
pdf.set_auto_page_break(auto=True, margin=15)
pdf.add_page()
pdf.set_font("Arial", size=12)

# Adding insights
insights = [
    "1. The majority of customers come from a few key regions, indicating potential targeted marketing opportunities.",
    "2. A small set of products account for the majority of sales, suggesting a focus on these high-performing items.",
    "3. Transaction volume has seasonal trends, which can inform inventory planning and promotional activities.",
    "4. A few customers contribute disproportionately to revenue, highlighting the need for VIP customer engagement strategies.",
    "5. Customers who buy from specific product categories tend to make repeat purchases, which can guide personalized marketing.",
]

pdf.cell(200, 10, "Business Insights Report", ln=True, align='C')
pdf.ln(10)
for insight in insights:
    pdf.multi_cell(0, 10, insight)
    pdf.ln(5)

# Add Images
pdf.add_page()
pdf.cell(200, 10, "EDA Visualizations", ln=True, align='C')
pdf.ln(10)

for img in ["customer_region.png", "top_products.png", "transactions_trend.png", "top_customers.png"]:
    pdf.image(f"/content/{img}", x=10, w=180)
    pdf.ln(10)

pdf.output("/content/EDA_Report.pdf")

print("EDA completed. Report saved as EDA_Report.pdf")
