Explored relational databases using SQL to perform comprehensive Exploratory Data Analysis (EDA), covering database exploration, segmentation, trend analysis, and performance ranking. Developed reusable SQL scripts to analyze dimensions, measures, and time-based metrics, helping uncover key business insights.
- Conducted EDA to understand data structure, identify outliers, and uncover trends.
- Designed SQL queries to calculate key metrics (total sales, customer count, average order value).
- Performed segmentation analysis (sales by product category, revenue by country).
- Developed ranking queries to identify top-performing products and underperforming categories.
- Applied best practices in SQL query writing for efficient data retrieval and analysis.
-- This query retrieves all tables present in the currently selected database.
-- INFORMATION_SCHEMA.TABLES contains metadata about all tables across all databases.
-- We filter by TABLE_SCHEMA = DATABASE() to only see tables in the current database.
SELECT
TABLE_SCHEMA, -- Name of the database (schema)
TABLE_NAME, -- Name of the table
TABLE_TYPE -- Type of object (BASE TABLE, VIEW, etc.)
FROM INFORMATION_SCHEMA.TABLES
WHERE TABLE_SCHEMA = DATABASE(); -- Filter to show only tables in the active database-- This query retrieves metadata about each column in the dim_customers table.
SELECT
COLUMN_NAME, -- Name of the column
DATA_TYPE, -- Data type
IS_NULLABLE, -- NULL values (YES/NO)
CHARACTER_MAXIMUM_LENGTH -- Max length for character-based columns (null for non-char types)
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_SCHEMA = DATABASE() -- Only look at tables in the active database
AND TABLE_NAME = 'dim_customers'; -- Specifically focus on the 'dim_customers' table-- Retrieve a list of unique countries from which customers originate
select distinct country
from dim_customers
order by country;-- Determine the first and last order date and the total duration in months
select min(order_date) as First_Order, max(order_date) as Last_Order,
timestampdiff(month,min(order_date),max(order_date)) as Duration_months
from fact_sales;
-- Find the youngest and oldest customer based on birthdate
select min(birthdate) as Oldest,
timestampdiff(year,min(birthdate),current_date()) as Oldest_age,
max(birthdate) as Youngest,
timestampdiff(year,max(birthdate),current_date()) as Young_age
from dim_customers;-- Generate a Report that shows all key metrics of the business
-- Find the Total Sales
SELECT 'Total Sales' AS measure_name, SUM(sales_amount) AS measure_value FROM fact_sales
UNION
-- Find how many items are sold
SELECT 'Total Quantity', SUM(quantity) FROM fact_sales
UNION
-- Find the average selling price
SELECT 'Average Price', AVG(price) FROM fact_sales
UNION
-- Find the Total number of Orders
SELECT 'Total Orders', COUNT(DISTINCT order_number) FROM fact_sales
UNION
-- Find the total number of products
SELECT 'Total Products', COUNT(DISTINCT product_name) FROM dim_products
UNION
-- Find the total number of customers that has placed an order
SELECT 'Total Customers', COUNT(customer_key) FROM dim_customers;-- Find total customers by countries
select country,count(customer_id) as Total_customer
from dim_customers
group by country ;
-- Find total customers by gender
select gender,count(customer_id) as Total_customer
from dim_customers
group by gender ;
-- Find total products by category
select category,count(product_id) as Total_customer
from dim_products
group by category ;-- What is the average costs in each category?
select category,avg(cost) as Avg_cost
from dim_products
group by category;
-- What is the total revenue generated for each category?
select category,sum(sales_amount)
from fact_sales s
left join dim_products p
on p.product_key=s.product_key
group by category;-- What is the total revenue generated by each customer?
select c.customer_key,first_name,sum(sales_amount) as Total_revenue
from dim_customers c
left join fact_sales s
on c.customer_key=s.customer_key
group by s.customer_key
order by Total_revenue desc;-- What is the distribution of sold items across countries?
select c.country,sum(s.quantity) as Total_items_sold
from fact_sales s
join dim_customers c
on c.customer_key=s.customer_key
group by c.country
order by total_items_sold desc;-- Which 5 products subcategory Generating the Highest Revenue?
-- Simple Ranking
select p.subcategory,sum(s.sales_amount) as revenue
from fact_sales s
left join dim_products p
on s.product_key=p.product_key
group by p.subcategory
order by revenue desc
limit 5;
-- Complex but Flexibly Ranking Using Window Functions
with ranked_products as (
select p.product_name,
sum(s.sales_amount) as revenue,
rank() over (order by sum(s.sales_amount) desc) as rank_products
from fact_sales s
left join dim_products p
on s.product_key=p.product_key
group by p.product_name
)
select *
from ranked_products
where rank_products<=5;-- What are the 5 worst-performing products in terms of sales?
select p.product_name,sum(s.sales_amount) as revenue
from fact_sales s
left join dim_products p
on s.product_key=p.product_key
group by p.product_key
order by revenue
limit 5;-- Find the top 10 customers who have generated the highest revenue
select c.first_name,sum(s.sales_amount) as revenue
from fact_sales s
left join dim_customers c
on c.customer_key=s.customer_key
group by c.customer_key
order by revenue desc
limit 10;
-- The 3 customers with the fewest orders placed
select c.first_name,count(s.customer_key) as no_of_orders
from fact_sales s
left join dim_customers c
on s.customer_key=c.customer_key
group by c.customer_key
order by count(s.customer_key)
limit 3;-- Analyse sales performance over time
-- Quick Date Functions
-- 1. Quick Date Functions - Grouping by Year and Month
SELECT
YEAR(order_date) AS order_year,
MONTH(order_date) AS order_month,
SUM(sales_amount) AS total_sales,
COUNT(DISTINCT customer_key) AS total_customers,
SUM(quantity) AS total_quantity
FROM fact_sales
WHERE order_date IS NOT NULL
GROUP BY YEAR(order_date), MONTH(order_date)
ORDER BY YEAR(order_date), MONTH(order_date);-- 2. DATE_FORMAT() to group by month (first day of each month)
SELECT
DATE_FORMAT(order_date, '%Y-%m-01') AS order_month,
SUM(sales_amount) AS total_sales,
COUNT(DISTINCT customer_key) AS total_customers,
SUM(quantity) AS total_quantity
FROM fact_sales
WHERE order_date IS NOT NULL
GROUP BY order_month
ORDER BY order_month;
-- 3. Formatting date into 'YYYY-MMM' format (like '2024-Mar')
SELECT
DATE_FORMAT(order_date, '%Y-%b') AS order_month,
SUM(sales_amount) AS total_sales,
COUNT(DISTINCT customer_key) AS total_customers,
SUM(quantity) AS total_quantity
FROM fact_sales
WHERE order_date IS NOT NULL
GROUP BY order_month
ORDER BY order_month;



















