In [None]:
! pip install duckdb
! pip install pandas
! pip install seaborn
! pip install matplotlib
! pip install jupysql pandas matplotlib duckdb-engine

In [None]:
import duckdb 
%load_ext sql
conn = duckdb.connect()
%sql conn --alias duckdb
%sql INSTALL spatial; LOAD spatial;

In [None]:
%%sql 
SELECT DISTINCT Theatre FROM st_read('Delays Salford.xlsx', open_options = ['HEADERS=FORCE'], layer="April") ORDER BY Theatre

In [24]:
import calendar
format_sheets = [
f"""
SELECT *
FROM st_read('Delays Salford.xlsx', layer='{x}', open_options = ['HEADERS=FORCE'])
""" for x in calendar.month_name[1:12]
]

conn.sql("""

DROP TABLE IF EXISTS Salford_data;
""")
query = " UNION BY NAME ".join(format_sheets)

conn.sql(f"CREATE TABLE Salford_data AS ({query})")


In [None]:
%config SqlMagic.displaylimit = None
%sql SELECT * FROM Salford_data ORDER BY Date, Theatre LIMIT 50

In [None]:
%config SqlMagic.displaylimit = None
%sql SELECT DISTINCT * FROM Sheet_Desc

In [None]:
%%sql 
WITH Salford_data_columns AS (
    SELECT
        Date,
        Theatre,
        Speciality,
        Surgeon,
        "List Start Time",
        "Sent for",
        "Actual Start Time",
        "Responsible Person"
    FROM Salford_data
),
Salford_data_filtered AS (
    SELECT *
    FROM Salford_data_columns
    WHERE Date != '2024-03-11'
    AND Theatre != 'None'
    -- Not relevant speciailities
    AND Speciality NOT IN (
         'General Emeregency', 
         'General Emergencies', 
         'Neuro Emergencies', 
         'Emergencies'
         'Cardioversions', 
    --     'Endoscopy',
    --     'Dental',
    --     'Pain',
    --     'Renal'
    )
    AND Surgeon NOT IN (
        'Emergencies'
    )
    -- AND "Actual Start Time" IS NOT NULL
    -- AND "List Start Time" IS NOT NULL
),
Salford_data_theatre_conversion_float AS (
    SELECT 
        * EXCLUDE (Theatre),
        -- Making the theatre consistent
        IF(contains(Theatre, '.'), split_part(Theatre, '.', 1), Theatre) AS Theatre
    FROM Salford_data_filtered
),
Salford_data_theatre_conversion_upper AS (
    SELECT 
        * EXCLUDE (Theatre),
        -- Making the theatre consistent
        UPPER(Theatre) AS Theatre
    FROM Salford_data_theatre_conversion_float
),
Salford_data_date_conversion AS (
    SELECT 
        * EXCLUDE (Date),
        COALESCE(try_strptime(Date, '%Y-%m-%d'), strptime(Date, '%d %bt %Y')) AS Date
    FROM Salford_data_theatre_conversion_upper 
)
SELECT Theatre, Date, COUNT(*)
FROM Salford_data_date_conversion
GROUP BY Theatre, Date


In [None]:
%%sql
WITH Salford_delay_January AS (
  SELECT * 
  FROM st_read('Delays Salford.xlsx', layer='January', open_options = ['HEADERS=FORCE'])  
)
SELECT Date
, Theatre
, Speciality
, Surgeon
, "List Start Time"
, "Team Brief"
, "Sent for"
, "Actual Start Time"
, "Golden Patient"
, *
FROM Salford_delay_January
LIMIT 10
