In [3]:
import altair as alt
import pandas as pd
import duckdb

%load_ext sql
conn = duckdb.connect()
%sql conn --alias duckdb
%sql INSTALL spatial; LOAD spatial;
%config SqlMagic.displaylimit = None


The sql extension is already loaded. To reload it, use:
  %reload_ext sql


In [40]:
!pip install vl-convert-python

Collecting vl-convert-python
  Downloading vl_convert_python-1.7.0-cp37-abi3-macosx_11_0_arm64.whl.metadata (5.2 kB)
Downloading vl_convert_python-1.7.0-cp37-abi3-macosx_11_0_arm64.whl (26.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m26.9/26.9 MB[0m [31m9.6 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0mm
[?25hInstalling collected packages: vl-convert-python
Successfully installed vl-convert-python-1.7.0

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.3.1[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [4]:
%%sql
CREATE TABLE IF NOT EXISTS Oxford_data AS
SELECT *
FROM read_csv('../Oxford_data_turnovertime_v1.csv');

CREATE TABLE IF NOT EXISTS Oxford_protocol AS
SELECT * FROM st_read('../Materials in Protocols.xlsx', layer="Oxford Churchill");

Count
19307


In [5]:
%%sql 
WITH oxford_data_month_year_aggregated AS (
    SELECT 
        year(date) AS year, 
        month(date) AS month, 
        specialism_name, 
        AVG(actual_turnovertime_minutes - expected_turnovertime_minutes) AS avg_diff_actual_turnovertime,
        COUNT(*) AS number_of_records
    FROM Oxford_data
    WHERE expected_turnovertime IS NOT NULL AND actual_day_sequence_number = expected_day_sequence_number
    GROUP BY year(date), month(date), specialism_name
    HAVING COUNT(DISTINCT (year(date), month(date))) <>COUNT(*)
),
oxford_data_aggregated_before_impl AS
(
    SELECT 
        specialism_name,
        AVG(actual_turnovertime_minutes - expected_turnovertime_minutes) AS avg_turnovertime 
    FROM Oxford_data WHERE date < '2024-03-05'::DATE
    GROUP BY specialism_name
),
oxford_data_aggregated_after_impl AS
(
    SELECT 
        specialism_name,
        AVG(actual_turnovertime_minutes - expected_turnovertime_minutes) AS avg_turnovertime 
    FROM Oxford_data WHERE date >= '2024-03-05'::DATE
    GROUP BY specialism_name
),
oxford_data_aggregated_joined AS (
    SELECT odmya.*, oabi.avg_turnovertime AS before_avg_turnovertime, oaai.avg_turnovertime AS after_avg_turnovertime
    FROM oxford_data_month_year_aggregated odmya
    LEFT OUTER JOIN oxford_data_aggregated_before_impl oabi ON odmya.specialism_name = oabi.specialism_name 
    AND (CONCAT(odmya.year, '-', odmya.month, '-', '01')::DATE) < '2024-03-05'::DATE
        LEFT OUTER JOIN oxford_data_aggregated_after_impl oaai ON odmya.specialism_name = oaai.specialism_name 
    AND (CONCAT(odmya.year, '-', odmya.month, '-', '01')::DATE) >= '2024-03-05'::DATE
)
SELECT * 
FROM oxford_data_aggregated_joined


year,month,specialism_name,avg_diff_actual_turnovertime,number_of_records,before_avg_turnovertime,after_avg_turnovertime
2023,11,Dermatology,-7.545454545454546,11,-4.595744680851064,
2024,3,Dermatology,0.8,5,-4.595744680851064,
2024,2,Dermatology,-28.5,4,-4.595744680851064,
2023,12,Dermatology,-17.5,4,-4.595744680851064,
2024,1,Dermatology,-10.75,8,-4.595744680851064,
2023,10,Dermatology,-10.692307692307692,13,-4.595744680851064,
2023,11,Paediatrics,13.454545454545457,11,16.09375,
2024,2,Paediatrics,-14.333333333333334,3,16.09375,
2024,1,Paediatrics,61.0,3,16.09375,
2023,12,Paediatrics,-9.6,5,16.09375,


## Visualization Turnovertime

In [6]:
conn.sql("SELECT AVG(actual_turnovertime_minutes - expected_turnovertime_minutes) FROM Oxford_data WHERE date < '2024-03-05'::DATE ").fetchone()[0]

14.694591029023746

In [42]:
grouped_df = conn.sql(
    """
WITH oxford_data_month_year_aggregated AS (
    SELECT 
        year(date) AS year, 
        month(date) AS month, 
        specialism_name, 
        AVG(actual_turnovertime_minutes - expected_turnovertime_minutes) AS avg_diff_actual_turnovertime,
        COUNT(*) AS number_of_records
    FROM Oxford_data
    WHERE expected_turnovertime IS NOT NULL AND actual_day_sequence_number = expected_day_sequence_number
    GROUP BY year(date), month(date), specialism_name
    HAVING COUNT(DISTINCT (year(date), month(date))) <>COUNT(*)
),
oxford_data_aggregated_before_impl AS
(
    SELECT 
        specialism_name,
        AVG(actual_turnovertime_minutes - expected_turnovertime_minutes) AS avg_turnovertime 
    FROM Oxford_data WHERE date < '2024-03-05'::DATE
    GROUP BY specialism_name
),
oxford_data_aggregated_after_impl AS
(
    SELECT 
        specialism_name,
        AVG(actual_turnovertime_minutes - expected_turnovertime_minutes) AS avg_turnovertime 
    FROM Oxford_data WHERE date >= '2024-03-05'::DATE
    GROUP BY specialism_name
),
oxford_data_aggregated_joined AS (
    SELECT odmya.*, 
        oabi.avg_turnovertime AS before_avg_turnovertime, 
        oaai.avg_turnovertime AS after_avg_turnovertime,
        round(oaai.avg_turnovertime - oabi.avg_turnovertime, 2) AS difference,
        abs(oabi.avg_turnovertime - oaai.avg_turnovertime) + least(oabi.avg_turnovertime , oaai.avg_turnovertime) AS mid_point
    FROM oxford_data_month_year_aggregated odmya
    LEFT OUTER JOIN oxford_data_aggregated_before_impl oabi ON odmya.specialism_name = oabi.specialism_name 
    AND (CONCAT(odmya.year, '-', odmya.month, '-', '01')::DATE) < '2024-03-05'::DATE
        LEFT OUTER JOIN oxford_data_aggregated_after_impl oaai ON odmya.specialism_name = oaai.specialism_name 
    AND (CONCAT(odmya.year, '-', odmya.month, '-', '01')::DATE) >= '2024-03-01'::DATE
)
SELECT * 
FROM oxford_data_aggregated_joined
    """
).df()

grouped_df['yearmonth'] =  pd.to_datetime(dict(year=grouped_df['year'], month=grouped_df['month'], day=1))

before_rule = (
    alt.Chart()
    .mark_line(strokeWidth=2, strokeDash=[8,8])
    .encode(
        y="before_avg_turnovertime:Q",
        x="yearmonth(yearmonth)",
            color=alt.value("grey")
    )
)
after_rule = (
    alt.Chart()
    .mark_line(strokeWidth=2, strokeDash=[8,8])
    .encode(
        y="after_avg_turnovertime:Q",
        x="yearmonth(yearmonth)"
        
    )
)
xrule = (
    alt.Chart()
    .mark_rule(strokeWidth=1)
    .encode(
        x=alt.datum(alt.DateTime(year=2024, month="March")),
        color=alt.value("black")
        
    )
)
line_before = alt.Chart().mark_line().encode(
    y=alt.Y('avg_diff_actual_turnovertime:Q').title("Minutes"), 
    color=alt.value("blue")
).transform_filter(
    'datum.yearmonth < datetime(2024, 2, 4)' 
)
line_after = alt.Chart().mark_line().encode(
    y='avg_diff_actual_turnovertime:Q', 
).transform_filter(
    'datum.yearmonth >= datetime(2024, 2, 1)' 
)

text = line_after.mark_text(xOffset=-5, yOffset=-50, align="right").encode(
    text="difference:O"
).transform_filter(
    'datum.difference !== null' 
).transform_calculate(difference='datum.difference + " min"')
(
    alt.layer(
        before_rule,
        after_rule, 
        line_before, 
        line_after, 
        text, 
        xrule,  
        data=grouped_df
    ).encode(
        x= alt.X('yearmonth(yearmonth):T', scale=alt.Scale(nice={'interval':'month', 'step':1}))
                .title("Time")
                .axis(labels=True, labelAngle=90),

    )
).facet(column="specialism_name:N")
