### First Query

In [82]:
import psycopg2
import pandas as pd
from sqlalchemy import create_engine, inspect, text
from database_utils import DatabaseConnector

connector = DatabaseConnector('db_creds.yaml')
pd.set_option('display.max_colwidth', None)


In [83]:


alter_command = """
SELECT country_code, COUNT(country_code) as total_no_stores 
FROM dim_store_details 
GROUP BY country_code 
ORDER BY total_no_stores DESC 
LIMIT 3;

"""

result = connector.local_connection.execute(text(alter_command))
print("Column data types changed successfully.")

df = pd.DataFrame(result, columns=['country_code', 'total_no_stores'])
df

Column data types changed successfully.


Unnamed: 0,country_code,total_no_stores
0,GB,266
1,DE,141
2,US,34


### Second Query

In [84]:


alter_command = """
SELECT locality, COUNT(locality) as total_no_stores 
FROM dim_store_details 
GROUP BY locality 
ORDER BY total_no_stores DESC 
LIMIT 7;

"""

result = connector.local_connection.execute(text(alter_command))
print("Column data types changed successfully.")

df = pd.DataFrame(result, columns=['country_code', 'total_no_stores'])
df

Column data types changed successfully.


Unnamed: 0,country_code,total_no_stores
0,Chapletown,14
1,Belper,13
2,Bushey,12
3,Exeter,11
4,Arbroath,10
5,High Wycombe,10
6,Rutherglen,10


### Third Query

In [85]:


alter_command = """
SELECT SUM(o.product_quantity * p.product_price) AS total_sales,
    t.month AS month
FROM orders_table o
INNER JOIN dim_products p ON o.product_code = p.product_code
INNER JOIN dim_date_times t ON o.date_uuid = t.date_uuid
GROUP BY month
ORDER BY total_sales DESC
LIMIT 6;


"""

result = connector.local_connection.execute(text(alter_command))
print("Column data types changed successfully.")

df = pd.DataFrame(result, columns=['total_sales', 'month'])
df

Column data types changed successfully.


Unnamed: 0,total_sales,month
0,673295.68,8
1,668041.45,1
2,657335.84,10
3,650321.43,5
4,645741.7,7
5,645463.0,3


### Fourth Query

In [86]:


alter_command = """
SELECT 
  COUNT(*) AS number_of_sales,
  SUM(o.product_quantity) AS product_quantity_count,
  s.store_category
FROM 
  (SELECT 
     store_code,
     CASE 
       WHEN store_type != 'Web Portal' THEN 'Offline' 
       ELSE 'Web' 
     END AS store_category
   FROM dim_store_details) AS s
JOIN orders_table AS o ON s.store_code = o.store_code
GROUP BY s.store_category;


"""

result = connector.local_connection.execute(text(alter_command))
print("Column data types changed successfully.")

df = pd.DataFrame(result, columns=['number_of_sales', 'product_quantity_count', 'location'])
df

Column data types changed successfully.


Unnamed: 0,number_of_sales,product_quantity_count,location
0,93166,374047,Offline
1,26957,107739,Web


### Fifth Query

In [87]:


alter_command = """
SELECT 
  d.store_type AS store_type,
  ROUND(SUM(o.product_quantity * p.product_price)::numeric, 2) AS total_sales,
  ROUND((100.0 * SUM(o.product_quantity * p.product_price) / SUM(SUM(o.product_quantity * p.product_price)) OVER())::numeric, 2) AS percentage
FROM 
  orders_table AS o
JOIN dim_store_details AS d ON d.store_code = o.store_code
JOIN dim_products AS p ON o.product_code = p.product_code 
GROUP BY d.store_type;


"""

result = connector.local_connection.execute(text(alter_command))
print("Column data types changed successfully.")

df = pd.DataFrame(result, columns=['store_type', 'total_sales', 'percentage'])
df

Column data types changed successfully.


Unnamed: 0,store_type,total_sales,percentage
0,Local,3440896.52,44.56
1,Mall Kiosk,698791.61,9.05
2,Outlet,631804.81,8.18
3,Super Store,1224293.65,15.85
4,Web Portal,1726547.05,22.36


### Sixth Query

In [88]:


alter_command = """
WITH MonthlySales AS (
  SELECT 
    dt.year,
    dt.month,
    ROUND(SUM(o.product_quantity * p.product_price)::numeric, 2) AS total_sales,
    RANK() OVER (PARTITION BY dt.year ORDER BY SUM(o.product_quantity * p.product_price) DESC) AS sales_rank
  FROM orders_table AS o
  JOIN dim_date_times AS dt ON o.date_uuid = dt.date_uuid 
  JOIN dim_products AS p ON o.product_code = p.product_code
  GROUP BY dt.year, dt.month
)
SELECT 
  
  total_sales,
  year,
  month
FROM MonthlySales
WHERE sales_rank = 1
ORDER BY total_sales DESC
LIMIT 10;


"""

result = connector.local_connection.execute(text(alter_command))
print("Column data types changed successfully.")

df = pd.DataFrame(result, columns=['total_sales', 'year', 'month'])
df

Column data types changed successfully.


Unnamed: 0,total_sales,year,month
0,27936.77,1994,3
1,27356.14,2019,1
2,27091.67,2009,8
3,26679.98,1997,11
4,26310.97,2018,12
5,26236.67,2017,9
6,25798.12,2010,5
7,25648.29,1996,8
8,25614.54,2000,1
9,25290.66,2008,6


### Seventh Query

In [89]:


alter_command = """
SELECT SUM(staff_numbers) AS total_staff_numbers, country_code 
FROM dim_store_details 
WHERE staff_numbers IS NOT NULL 
GROUP BY country_code
ORDER BY total_staff_numbers DESC;


"""

result = connector.local_connection.execute(text(alter_command))
print("Column data types changed successfully.")

df = pd.DataFrame(result, columns=['total_staff_numbers', 'country_code'])
df

Column data types changed successfully.


Unnamed: 0,total_staff_numbers,country_code
0,13132,GB
1,6054,DE
2,1304,US


### Eighth Query

In [90]:


alter_command = """
SELECT 
  
  ROUND(SUM(o.product_quantity * p.product_price)::numeric, 2) AS total_sales,
  d.store_type AS store_type,
  d.country_code AS country_code
FROM 
  orders_table AS o
JOIN 
  dim_store_details AS d ON d.store_code = o.store_code
JOIN 
  dim_products AS p ON o.product_code = p.product_code 
WHERE 
  d.country_code = 'DE'
GROUP BY 
  d.store_type, d.country_code
ORDER BY
  total_sales ASC;



"""

result = connector.local_connection.execute(text(alter_command))
print("Column data types changed successfully.")

df = pd.DataFrame(result, columns=[ 'total_sales','store_type', 'country_code'])
df

Column data types changed successfully.


Unnamed: 0,total_sales,store_type,country_code
0,198373.57,Outlet,DE
1,247634.2,Mall Kiosk,DE
2,384625.03,Super Store,DE
3,1109909.59,Local,DE


### Nineth Query

In [91]:
alter_command = """
WITH SaleTimestamps AS (
  SELECT
    dt.year,
    -- Combine date parts into a full timestamp
    CAST(dt.year || '-' || dt.month || '-' || dt.day || ' ' || dt.timestamp AS TIMESTAMP) AS full_timestamp
  FROM dim_date_times AS dt
),
TimeDiffs AS (
  SELECT
    year,
    -- Calculate the difference in seconds between consecutive sales timestamps
    EXTRACT(EPOCH FROM (LEAD(full_timestamp) OVER(PARTITION BY year ORDER BY full_timestamp) - full_timestamp)) AS diff_seconds
  FROM SaleTimestamps
),
AveragedDiffs AS (
  SELECT
    year,
    AVG(diff_seconds) AS avg_diff_seconds
  FROM TimeDiffs
  WHERE diff_seconds IS NOT NULL -- Exclude the last record of each year which doesn't have a next sale
  GROUP BY year
)
SELECT
  year,
  -- Convert average seconds back into a human-readable format (HH:MM:SS.mmm)
  CONCAT(
    'hours: ', FLOOR(avg_diff_seconds / 3600), 
    ', minutes: ', FLOOR((avg_diff_seconds % 3600) / 60), 
    ', seconds: ', FLOOR(avg_diff_seconds % 60), 
    ', milliseconds: ', ROUND((avg_diff_seconds - FLOOR(avg_diff_seconds)) * 1000)
  ) AS average_time
FROM AveragedDiffs
ORDER BY avg_diff_seconds DESC -- Order by the average time taken in descending order
LIMIT 5;
"""

result = connector.local_connection.execute(text(alter_command))
print("Column data types changed successfully.")

df = pd.DataFrame(result, columns=[ 'year','average_time'])
df

Column data types changed successfully.


Unnamed: 0,year,average_time
0,2013,"hours: 2, minutes: 17, seconds: 13, milliseconds: 713"
1,1993,"hours: 2, minutes: 15, seconds: 35, milliseconds: 482"
2,2002,"hours: 2, minutes: 13, seconds: 39, milliseconds: 916"
3,2008,"hours: 2, minutes: 13, seconds: 3, milliseconds: 770"
4,2022,"hours: 2, minutes: 13, seconds: 2, milliseconds: 4"
