# E-commerce Sales Analysis Pipeline
**Objective:** Clean and analyze a large-scale retail dataset to extract business KPIs.<br>
**Author:** [Rafael Campos Andr√©s]<br>
**Date:** January 2026

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
from pathlib import Path

In [None]:
# Loading raw data
base_dir = Path.cwd().parent
data_path = base_dir / "data" / "raw_sales_data.csv"
if data_path.exists():
    try:
        df = pd.read_csv(data_path)
        df['date'] = pd.to_datetime(df['date'])
        print(f"File: {data_path} uploaded succesfully")
    except:
        print(f"File: {data_path} is empty or could not be found")
else:
    print(f"File: {data_path} could not be found")

In [None]:
# Cleaning category names
df['category'] = df['category'].str.replace('_', '').str.strip().str.capitalize()
condition = df['category'].str.contains('Elec', na=False)
df['category'] = np.where(condition, 'Electronics', df['category'])

In [None]:
# Fill na
df['customer_email'] = df['customer_email'].fillna('user@gmail.com')

df['unit_price'] = df.groupby('product')['unit_price'].transform('mean')

In [None]:
# Profit analysis
df['revenue'] = df['quantity'] * df['unit_price']

days_map = {
    0: 'Mon', 1: 'Tue', 2: 'Wed', 3: 'Thu', 
    4: 'Fri', 5: 'Sat', 6: 'Sun'
}
months_map = {
    1: 'Jan', 2: 'Feb', 3: 'Mar', 4: 'Apr', 
    5: 'May', 6: 'Jun', 7: 'Jul', 8: 'Aug', 
    9: 'Sep', 10: 'Oct', 11: 'Nov', 12: 'Dec'
}
temp = df[['date', 'revenue']].copy()
temp['day'] = temp['date'].dt.day_of_week
temp['month'] = temp['date'].dt.month
day_of_week_analysis = temp.groupby('day')['revenue'].mean().sort_index()
month_analysis = temp.groupby('month')['revenue'].mean().sort_index()
day_of_week_analysis.index = day_of_week_analysis.index.map(days_map)
month_analysis.index = month_analysis.index.map(months_map)

mean_week = day_of_week_analysis.mean()
mean_year = month_analysis.mean()

plt.figure(figsize=(4, 2))
sns.lineplot(x=day_of_week_analysis.index, y=day_of_week_analysis.values, marker='o')
plt.axhline(mean_week, color='red', linestyle='--', label=f'Mean: {mean_week:.2f}')
plt.title('Average Revenue by Day')
plt.legend()
plt.show()

plt.figure(figsize=(8, 2))
sns.lineplot(x=month_analysis.index, y=month_analysis.values, marker='o')
plt.axhline(mean_year, color='red', linestyle='--', label=f'Mean: {mean_year:.2f}')
plt.title('Average Revenue by Month')
plt.legend()
plt.show()