In [None]:
# Retail Sales Analysis Project
# This script analyzes retail sales data using Python's Pandas and Matplotlib libraries.
# It includes various visualizations to understand sales trends, customer distribution, and product performance.
# The dataset is assumed to be in CSV format and contains columns such as order_date, quantity, item_price, province, city, brand, and customer_id.
# The analysis includes monthly sales trends, sales by province, order volume, average item prices, and more.
# The script is structured to provide insights into the retail business, helping to identify growth opportunities and areas for improvement.

import pandas as pd
import matplotlib.pyplot as plt

# Load the dataset
# pd.read_csv() is used to load CSV file into a DataFrame
# 'C:\\FAWAZUL\\Bootcamp\\DQLab\\dataset_matplotlib1.csv' is the full path to the dataset
# Make sure the file path is correctly typed and that the file is accessible
retail = pd.read_csv("C:\\FAWAZUL\\Bootcamp\\DQLab\\matplotlib_exercise\\dataset_matplotlib1.csv")

# Convert order_date column to datetime format
# This enables us to extract month and year from the dates for time-based analysis
retail['order_date'] = pd.to_datetime(retail['order_date'])

# ===============================
# CASE 1: Monthly Sales Trend Analysis
# ===============================
# Objective: Show how total sales trend varies per month
# This chart shows the fluctuation of total sales across months, helping identify sales trends over time.

retail['sales'] = retail['quantity'] * retail['item_price']
retail['order_month'] = retail['order_date'].dt.to_period('M')
monthly_sales = retail.groupby('order_month')['sales'].sum()

plt.figure(figsize=(10,5))
plt.plot(monthly_sales.index.astype(str), monthly_sales.values, marker='o')
plt.title('Monthly Sales Trend')
plt.xlabel('Order Month')
plt.ylabel('Total Sales')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
