In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load data
data = pd.read_csv('sales_data.csv')

# Total sales by category
category_sales = data.groupby('Category')['Sales Amount'].sum().sort_values(ascending=False)
print("Total Sales by Category:\n", category_sales)

# Top 5 products by revenue
top_products = data.groupby('Product Name')['Sales Amount'].sum().nlargest(5)
print("Top 5 Products by Revenue:\n", top_products)

# Plotting sales trend
data['Date'] = pd.to_datetime(data['Date'])
daily_sales = data.groupby('Date')['Sales Amount'].sum()
plt.figure(figsize=(10, 6))
plt.plot(daily_sales, marker='o')
plt.title('Daily Sales Trend')
plt.xlabel('Date')
plt.ylabel('Sales Amount')
plt.grid()
plt.show()


In [None]:
Sales_Data_Analysis/
├── data/
│   └── sales_data.csv
├── notebooks/
│   └── analysis.ipynb
├── reports/
│   └── Sales_Analysis_Report.pdf
├── visuals/
│   ├── sales_trend.png
│   └── category_sales.png
├── README.md
└── requirements.txt


In [None]:
import pandas as pd
import numpy as np
from faker import Faker

# Initialize Faker and seed for reproducibility
fake = Faker()
np.random.seed(42)

# Generate sample data for a commercial store
num_entries = 500
data = {
    "Date": [fake.date_between(start_date="-1y", end_date="today") for _ in range(num_entries)],
    "Product Name": [fake.word(ext_word_list=["Laptop", "Smartphone", "Tablet", "Headphones", "Camera", "Printer"]) for _ in range(num_entries)],
    "Category": [np.random.choice(["Electronics", "Accessories", "Photography"]) for _ in range(num_entries)],
    "Sales Amount": np.random.randint(50, 1500, size=num_entries),
    "Quantity Sold": np.random.randint(1, 10, size=num_entries),
    "Discount": np.random.choice([5, 10, 15, 20], size=num_entries),
    "Profit": np.random.randint(10, 500, size=num_entries),
    "Store Location": [fake.city() for _ in range(num_entries)]
}

# Create DataFrame
sales_data = pd.DataFrame(data)

# Save the dataset to CSV for the project
file_path = '/mnt/data/sales_data.csv'
sales_data.to_csv(file_path, index=False)
file_path


In [None]:
import matplotlib.pyplot as plt

# Load the dataset
data = pd.read_csv('/mnt/data/sales_data.csv')

# Convert 'Date' to datetime
data['Date'] = pd.to_datetime(data['Date'])

# Analysis: Total sales by category
category_sales = data.groupby('Category')['Sales Amount'].sum()

# Analysis: Top 5 products by revenue
top_products = data.groupby('Product Name')['Sales Amount'].sum().nlargest(5)

# Sales trend over time
daily_sales = data.groupby('Date')['Sales Amount'].sum()

# Visualization: Sales trend
plt.figure(figsize=(10, 6))
plt.plot(daily_sales, marker='o', color='blue')
plt.title('Daily Sales Trend', fontsize=16)
plt.xlabel('Date', fontsize=12)
plt.ylabel('Sales Amount', fontsize=12)
plt.grid()
plt.tight_layout()
sales_trend_path = '/mnt/data/sales_trend.png'
plt.savefig(sales_trend_path)
plt.close()

# Visualization: Sales by category
plt.figure(figsize=(8, 5))
category_sales.plot(kind='bar', color='skyblue', edgecolor='black')
plt.title('Total Sales by Category', fontsize=16)
plt.xlabel('Category', fontsize=12)
plt.ylabel('Sales Amount', fontsize=12)
plt.xticks(rotation=45)
plt.tight_layout()
category_sales_path = '/mnt/data/category_sales.png'
plt.savefig(category_sales_path)
plt.close()

sales_trend_path, category_sales_path 


In [None]:
from fpdf import FPDF

# Create a PDF report
class PDFReport(FPDF):
    def header(self):
        self.set_font('Arial', 'B', 12)
        self.cell(0, 10, 'Sales Data Analysis Report', border=False, ln=True, align='C')

    def footer(self):
        self.set_y(-15)
        self.set_font('Arial', 'I', 8)
        self.cell(0, 10, f'Page {self.page_no()}', align='C')

pdf = PDFReport()
pdf.add_page()
pdf.set_font('Arial', '', 12)

# Introduction
pdf.multi_cell(0, 10, "This report summarizes the sales performance of a commercial store based on simulated data. "
                      "Key metrics such as total sales by category, top-performing products, and sales trends over time "
                      "have been analyzed to provide actionable insights.")

# Key Insights
pdf.set_font('Arial', 'B', 12)
pdf.cell(0, 10, "Key Insights:", ln=True)
pdf.set_font('Arial', '', 12)
pdf.multi_cell(0, 10, f"- Total Sales by Category:\n{category_sales.to_string()}\n\n"
                      f"- Top 5 Products by Revenue:\n{top_products.to_string()}")

# Add visuals
pdf.add_page()
pdf.cell(0, 10, "Visualizations:", ln=True)
pdf.image('/mnt/data/sales_trend.png', x=10, y=30, w=180)
pdf.ln(100)
pdf.image('/mnt/data/category_sales.png', x=10, y=140, w=180)

# Save the PDF
report_path = '/mnt/data/Sales_Analysis_Report.pdf'
pdf.output(report_path)
report_path


In [None]:
import shutil

# Define project folder structure
project_dir = '/mnt/data/Sales_Data_Analysis_Project'
folders = ['data', 'notebooks', 'reports', 'visuals']

# Create project folders
for folder in folders:
    (shutil.os.makedirs(f"{project_dir}/{folder}", exist_ok=True))

# Move files into respective folders
shutil.copy('/mnt/data/sales_data.csv', f"{project_dir}/data/")
shutil.copy(sales_trend_path, f"{project_dir}/visuals/")
shutil.copy(category_sales_path, f"{project_dir}/visuals/")
shutil.copy(report_path, f"{project_dir}/reports/")

# Create a README file
readme_content = """
# Sales Data Analysis Project

## Overview
This project analyzes sales data from a commercial store to uncover key insights, trends, and recommendations.

## Files
- `data/sales_data.csv`: The dataset used for the analysis.
- `visuals/sales_trend.png`: Daily sales trend plot.
- `visuals/category_sales.png`: Total sales by category plot.
- `reports/Sales_Analysis_Report.pdf`: Summary report of the analysis.

## How to Use
1. Load the dataset (`data/sales_data.csv`) into a Python environment.
2. Review the analysis scripts in `notebooks/` for detailed data exploration and visualization.
3. Refer to the report in `reports/` for insights and recommendations.

## Tools Used
- Python (Pandas, Matplotlib, Seaborn)
- FPDF for report generation

---
"""

with open(f"{project_dir}/README.md", "w") as readme_file:
    readme_file.write(readme_content)

# Zip the project folder
zip_path = '/mnt/data/Sales_Data_Analysis_Project.zip'
shutil.make_archive(project_dir, 'zip', project_dir)
zip_path
