In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Step 1: Load the sales data
sales_data = pd.read_csv("data/sales_data_Q1_2024.csv")

# Step 2: Handle Missing Values
sales_data.fillna({
    'SalesAmount': 0,
    'Discount': 0,
    'CustomerID': 'Unknown'
}, inplace=True)

# Step 3: Detect Outliers using the IQR method
Q1 = sales_data['SalesAmount'].quantile(0.25)
Q3 = sales_data['SalesAmount'].quantile(0.75)
IQR = Q3 - Q1
outlier_condition = (sales_data['SalesAmount'] < (Q1 - 1.5 * IQR)) | (sales_data['SalesAmount'] > (Q3 + 1.5 * IQR))
sales_data = sales_data[~outlier_condition]



In [None]:

# Step 4: Feature Engineering (e.g., calculating Profit)
sales_data['Profit'] = sales_data['SalesAmount'] - sales_data['Cost']

# Convert the 'Date' column to datetime
sales_data['Date'] = pd.to_datetime(sales_data['Date'])

# Step 5: Analyze Daily Sales Trends Over Time
daily_sales = sales_data.groupby('Date')['SalesAmount'].sum()

# Creating publication-ready static chart for daily sales trends
plt.figure(figsize=(12, 6))
plt.plot(daily_sales.index, daily_sales.values, color='blue', marker='o', linestyle='-')
plt.title('Daily Sales Trends - Q1 2024')
plt.xlabel('Date')
plt.ylabel('Sales Amount')
plt.grid()
plt.tight_layout()
plt.savefig('daily_sales_trends.png')  # Save the figure
plt.show()



In [None]:

# Step 6: Distribution of Sales Amount
plt.figure(figsize=(12, 6))
sns.histplot(sales_data['SalesAmount'], bins=30, kde=True)
plt.title('Distribution of Sales Amount - Q1 2024')
plt.xlabel('Sales Amount')
plt.ylabel('Frequency')
plt.tight_layout()
plt.savefig('sales_distribution.png')  # Save the figure
plt.show()

# Step 7: Correlation Heatmap
plt.figure(figsize=(10, 8))
correlation_matrix = sales_data.corr()
sns.heatmap(correlation_matrix, annot=True, fmt=".2f", cmap='coolwarm')
plt.title('Sales Data Correlation Heatmap')
plt.tight_layout()
plt.savefig('correlation_heatmap.png')  # Save the figure
plt.show()



In [None]:

# Step 8: Sales by Product Category
plt.figure(figsize=(12, 6))
sns.barplot(data=sales_data, x='ProductCategory', y='SalesAmount', estimator=sum)
plt.title('Total Sales by Product Category - Q1 2024')
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig('sales_by_product_category.png')  # Save the figure
plt.show()

# Step 9: Integration of Matplotlib and Seaborn in a Cohesive Dashboard
fig, axs = plt.subplots(3, 1, figsize=(12, 18))

# Daily Sales Trends
axs[0].plot(daily_sales.index, daily_sales.values, color='blue', marker='o', linestyle='-')
axs[0].set_title('Daily Sales Trends - Q1 2024')
axs[0].set_xlabel('Date')
axs[0].set_ylabel('Sales Amount')
axs[0].grid()



In [None]:

# Sales Distribution
sns.histplot(sales_data['SalesAmount'], bins=30, kde=True, ax=axs[1])
axs[1].set_title('Distribution of Sales Amount - Q1 2024')
axs[1].set_xlabel('Sales Amount')
axs[1].set_ylabel('Frequency')

# Sales by Product Category
sns.barplot(data=sales_data, x='ProductCategory', y='SalesAmount', estimator=sum, ax=axs[2])
axs[2].set_title('Total Sales by Product Category - Q1 2024')
axs[2].set_xticklabels(axs[2].get_xticklabels(), rotation=45)

plt.tight_layout()
plt.savefig('comprehensive_dashboard.png')  # Save the cohesive dashboard
plt.show()
