# Final Report

This notebook retrieves analysis results from SQL Server and generates simple reports with visualizations.

## What This Notebook Does:
1. Connects to SQL Server database
2. Retrieves analysis results using SQL queries
3. Saves final reports as CSV files
4. Generates simple plots using **Pandas with Matplotlib** and saves the plots as **PNG files**

# Import Libraries

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Connect to SQL Server

In [None]:
from sqlalchemy import create_engine
from urllib.parse import quote_plus

SERVER = r"GEMY"
DATABASE = "RetailDB"
ODBC_DRIVER = "ODBC Driver 17 for SQL Server"

conn_str = (
    f"DRIVER={{{ODBC_DRIVER}}};"
    f"SERVER={SERVER};"
    f"DATABASE={DATABASE};"
    "Trusted_Connection=yes;"
)

engine = create_engine(
    "mssql+pyodbc:///?odbc_connect=%s" % quote_plus(conn_str),
    fast_executemany=True
)


# Retrieve Analysis DataFrames

## a) sales analysis

### Query 1 - Top 10 Products

In [None]:
query1 = """
select top 10
    p.product_id,
    p.product_name,
    sum(oi.quantity) as total_quantity_sold
from orderitems as oi
join products as p on oi.product_id = p.product_id
group by p.product_id, p.product_name
order by total_quantity_sold desc
"""
df_top_products = pd.read_sql(query1, engine)

In [None]:
df_top_products.head()

### Query 2 - Top 5 Customers

In [None]:
query2 = """
select top 5
    c.customer_id,
    c.first_name + ' ' + c.last_name as customer_name,
    sum(oi.quantity * oi.list_price) as total_spending
from orders as o
join customers as c on o.customer_id = c.customer_id
join orderitems as oi on o.order_id = oi.order_id
group by c.customer_id, c.first_name, c.last_name
order by total_spending desc
"""
df_top_customers = pd.read_sql(query2, engine)

In [None]:
df_top_customers.head()

### Query 3 - Revenue per Store

In [None]:
query3 = """
select
    s.store_id,
    s.store_name,
    sum(oi.quantity * oi.list_price) as total_revenue
from orders as o
join stores as s on o.store_id = s.store_id
join orderitems as oi on o.order_id = oi.order_id
group by s.store_id, s.store_name
order by total_revenue desc
"""
df_stores_revenue = pd.read_sql(query3, engine)

In [None]:
df_stores_revenue.head()

### Query 4 - Revenue per Category

In [None]:
query4 = """
select
    c.category_id,
    c.category_name,
    sum(oi.quantity * oi.list_price) as total_revenue
from orderitems as oi
join products as p on oi.product_id = p.product_id
join categories as c on p.category_id = c.category_id
group by c.category_id, c.category_name
order by total_revenue desc
"""
df_categories_revenue = pd.read_sql(query4, engine)

In [None]:
df_categories_revenue.head()

### Query 5 - Monthly Sales

In [None]:
query5 = """
select
    year(o.order_date) as year,
    month(o.order_date) as month,
    sum(oi.quantity * oi.list_price) as monthly_revenue
from orders as o
join orderitems as oi on o.order_id = oi.order_id
group by year(o.order_date), month(o.order_date)
order by year, month
"""
df_monthly_sales = pd.read_sql(query5, engine)

In [None]:
df_monthly_sales.head()

## b) Inventory Analysis

### Query 1 - Products with Low Stock (less than 10 units)

In [None]:
query_low_stock = """
select 
    p.product_id,
    p.product_name,
    st.store_name,
    s.quantity as stock_quantity
from stocks as s
join products as p on s.product_id = p.product_id
join stores as st on s.store_id = st.store_id
where s.quantity < 10
order by s.quantity asc
"""
df_low_stock = pd.read_sql(query_low_stock, engine)

### Query 2 - Stores with Highest Inventory Levels

In [None]:
query_store_inventory = """
select 
    st.store_id,
    st.store_name,
    sum(s.quantity) as total_stock
from stocks as s
join stores as st on s.store_id = st.store_id
group by st.store_id, st.store_name
order by total_stock desc
"""
df_store_inventory = pd.read_sql(query_store_inventory, engine)

## c) Staff Performance

### Query 1 - Number of Orders per Staff

In [None]:
query_staff_orders = """
select 
    st.staff_id,
    st.first_name + ' ' + st.last_name as staff_name,
    count(o.order_id) as orders_handled
from staffs as st
left join orders as o on st.staff_id = o.staff_id
group by st.staff_id, st.first_name, st.last_name
order by orders_handled desc
"""
df_staff_orders = pd.read_sql(query_staff_orders, engine)

### Query 2 - Best Performing Staff by Total Sales

In [None]:
query_best_staff = """
select top 1
    st.staff_id,
    st.first_name + ' ' + st.last_name as staff_name,
    sum(oi.quantity * oi.list_price) as total_sales
from staffs as st
join orders as o on st.staff_id = o.staff_id
join orderitems as oi on o.order_id = oi.order_id
group by st.staff_id, st.first_name, st.last_name
order by total_sales desc
"""
df_best_staff = pd.read_sql(query_best_staff, engine)

## d) Customer Insights

### Query 1 - Customers with No Orders

In [None]:
query_no_orders = """
select 
    c.customer_id,
    c.first_name + ' ' + c.last_name as customer_name
from customers as c
left join orders as o on c.customer_id = o.customer_id
where o.order_id is null
"""
df_no_orders = pd.read_sql(query_no_orders, engine)

### Query 2 - Average Spending per Customer

In [None]:
query_avg_spending = """
select 
    o.customer_id,
    c.first_name + ' ' + c.last_name as customer_name,
    avg(oi.quantity * oi.list_price) as avg_spending
from orders as o
join orderitems as oi on o.order_id = oi.order_id
join customers as c on o.customer_id = c.customer_id
group by o.customer_id, c.first_name, c.last_name
order by avg_spending desc
"""
df_avg_spending = pd.read_sql(query_avg_spending, engine)

### Save DataFrames as CSV

In [None]:
df_top_products.to_csv(r'D:\Downloads\Project\reports\top_products.csv', index=False)
df_top_customers.to_csv(r'D:\Downloads\Project\reports\top_customers.csv', index=False)
df_stores_revenue.to_csv(r'D:\Downloads\Project\reports\store_revenue.csv', index=False)
df_categories_revenue.to_csv(r'D:\Downloads\Project\reports\category_revenue.csv', index=False)
df_monthly_sales.to_csv(r'D:\Downloads\Project\reports\monthly_sales.csv', index=False)
df_low_stock.to_csv(r'D:\Downloads\Project\reports\low_stock_products.csv', index=False)
df_store_inventory.to_csv(r'D:\Downloads\Project\reports\store_inventory_levels.csv', index=False)
df_staff_orders.to_csv(r'D:\Downloads\Project\reports\staff_orders_handled.csv', index=False)
df_best_staff.to_csv(r'D:\Downloads\Project\reports\best_staff_sales.csv', index=False)
df_no_orders.to_csv(r'D:\Downloads\Project\reports\customers_no_orders.csv', index=False)
df_avg_spending.to_csv(r'D:\Downloads\Project\reports\avg_customer_spending.csv', index=False)

# Set Plot Style

In [None]:
plt.style.use('classic')

### Plot 1 - Top Products Bar Chart

In [None]:
df_top_products.plot(
    kind='barh',
    x='product_name',
    y='total_quantity_sold',
    title='Top 10 Products by Quantity Sold',
    figsize=(10,6),
    color='skyblue'  
)

plt.xlabel('Total Quantity Sold')
plt.ylabel('Product Name')
plt.tight_layout()
plt.savefig(r'D:\Downloads\Project\plots\plot_top_products.png')
plt.show()

### Plot 2 - Top Customers Horizontal Bar

In [None]:
df_top_customers.plot(
    kind='barh',
    x='customer_name',
    y='total_spending',
    title='Top 5 Customers by Spending',
    figsize=(10,6),
    color='skyblue'
)
plt.xlabel('Total Spending')
plt.tight_layout()
plt.savefig(r'D:\Downloads\Project\plots\plot_top_customers.png')
plt.show()

### Plot 3 - Store Revenue Bar Chart

In [None]:
df_stores_revenue.plot(
    kind='bar',
    x='store_name',
    y='total_revenue',
    title='Revenue per Store', 
    figsize=(10,6),
    color='skyblue'
)
plt.ylabel('Revenue')
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig(r'D:\Downloads\Project\plots\plot_store_revenue.png')
plt.show()

### Plot 4 - Category Revenue Pie Chart

In [None]:
df_categories_revenue.plot(
    kind='pie',
    y='total_revenue',
    labels=df_categories_revenue['category_name'],
    autopct='%1.1f%%',
    title='Revenue Distribution by Category',
    figsize=(8,8),
    legend=False
)
plt.savefig(r'D:\Downloads\Project\plots\plot_category_revenue.png')
plt.show()

### Plot 5 - Monthly Sales Line Chart

In [None]:
df_monthly_sales['year_month'] = (
    df_monthly_sales['year'].astype(str)
    + '-'
    + df_monthly_sales['month'].astype(str).str.zfill(2)
)

In [None]:
df_monthly_sales.plot(
    kind='line',
    x='year_month',
    y='monthly_revenue',
    title='Monthly Sales Trend',
    figsize=(12,6),
    marker='o'
)

plt.xlabel('Month')
plt.ylabel('Revenue')
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig(r'D:\Downloads\Project\plots\plot_monthly_trend.png')
plt.show()

## b) Inventory Analysis

### Plot 1 - Products with Low Stock Bar Chart

In [None]:
df_low_stock_sorted = df_low_stock.sort_values(
    by='stock_quantity',
    ascending=True
).head(10)

In [None]:
df_low_stock['product_store'] = (
    df_low_stock['product_name'] + ' - ' + df_low_stock['store_name']
)

In [None]:
df_low_stock_sorted.plot(
    kind='barh',
    x='product_store',
    y='stock_quantity',
    title='Top 10 Lowest Stock Products',
    figsize=(10,6),
    color='skyblue'  
)

plt.xlabel('Stock Quantity')
plt.ylabel('Product - Store')
plt.tight_layout()
plt.savefig(r'D:\Downloads\Project\plots\plot_low_stock_products.png')
plt.show()

#### Plot 2 -  Stores with Highest Inventory Levels Bar Chart

In [None]:
df_store_inventory.plot(
    kind='bar',
    x='store_name',
    y='total_stock',
    title='Total Inventory per Store',
    figsize=(10,6),
    color='skyblue'  
)

plt.ylabel('Total Stock')
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig(r'D:\Downloads\Project\plots\plot_store_inventory.png')
plt.show()

## c) Staff Performance

#### Plot 1 -  Number of Orders per Staff Bar Chart

In [None]:
df_staff_orders.plot(
    kind='barh',
    x='staff_name',
    y='orders_handled',
    title='Orders Handled by Each Staff Member',
    figsize=(10,6),
    color='skyblue'  
)

plt.xlabel('Number of Orders')
plt.tight_layout()
plt.savefig(r'D:\Downloads\Project\plots\plot_staff_orders.png')
plt.show()

#### Plot 2 -  Best Performing Staff by Total Sales Bar Chart

In [None]:
df_best_staff.plot(
    kind='bar',
    x='staff_name',
    y='total_sales',
    title='Best Performing Staff by Total Sales',
    figsize=(6,5),
    color='skyblue'  
)

plt.ylabel('Total Sales')
plt.tight_layout()
plt.savefig(r'D:\Downloads\Project\plots\plot_best_staff.png')
plt.show()

## d) Customer Insights

#### Plot 1 - Customers with No Orders Pie Chart

In [None]:
query_total_customers = """
SELECT COUNT(*) AS total_customers
FROM customers
"""
df_total_customers = pd.read_sql(query_total_customers, engine)

total_customers = df_total_customers.loc[0, 'total_customers']

In [None]:
# customers with no orders
no_orders_count = df_no_orders.shape[0]


In [None]:
labels = ['Customers with Orders', 'Customers with No Orders']
values = [
    total_customers - no_orders_count,
    no_orders_count
]

plt.figure(figsize=(6,6))
plt.pie(values, labels=labels, autopct='%1.1f%%', startangle=90)
plt.title('Customer Order Distribution')
plt.tight_layout()
plt.savefig(r'D:\Downloads\Project\plots\plot_customer_orders_distribution.png')
plt.show()


#### Plot 2 - Average Spending per Customer Bar Chart

In [None]:
df_avg_spending.head(10).plot(
    kind='barh',
    x='customer_name',
    y='avg_spending',
    title='Top Customers by Average Spending',
    figsize=(10,6),
    color='skyblue'  
)

plt.xlabel('Average Spending')
plt.tight_layout()
plt.savefig(r'D:\Downloads\Project\plots\plot_avg_spending.png')
plt.show()