In [2]:
import altair as alt
import pandas as pd
import duckdb 

%load_ext sql
conn = duckdb.connect()
%sql conn --alias duckdb
%sql INSTALL spatial; LOAD spatial;
%config SqlMagic.displaylimit = None

In [5]:
%%sql
CREATE TABLE IF NOT EXISTS Oxford_data_efficiency AS
SELECT *
FROM read_csv('../Oxford_parsed_efficiency_data_v1.csv')

Count
10985


In [6]:
%%sql 
DESCRIBE Oxford_data_efficiency

column_name,column_type,null,key,default,extra
theatre,VARCHAR,YES,,,
date,DATE,YES,,,
actual_day_start,TIMESTAMP,YES,,,
expected_day_start,TIMESTAMP,YES,,,
actual_day_end,TIMESTAMP,YES,,,
expected_day_end,TIMESTAMP,YES,,,
day_start_timeline_type,VARCHAR,YES,,,
day_end_timeline_type,VARCHAR,YES,,,
difference_day_start,TIME,YES,,,
difference_day_end,TIME,YES,,,


In [15]:
%%sql
WITH oxford_data_month_year_aggregated AS (
    SELECT 
        year(date) AS year, 
        month(date) AS month, 
        AVG(difference_minutes_day_start) AS avg_difference_minutes_day_start,
        AVG(difference_minutes_day_end) AS avg_difference_minutes_day_end,
        AVG(difference_minutes_day_end - difference_minutes_day_start) AS avg_difference_minutes_day_delay
    FROM Oxford_data_efficiency
    GROUP BY year(date), month(date)
),
oxford_data_aggregated_before_impl AS
(
    SELECT 
        AVG(difference_minutes_day_start) AS before_avg_difference_minutes_day_start,
        AVG(difference_minutes_day_end) AS before_avg_difference_minutes_day_end,
        AVG(difference_minutes_day_end - difference_minutes_day_start) AS before_avg_difference_minutes_day_delay
    FROM Oxford_data_efficiency WHERE date < '2024-03-05'::DATE
),
oxford_data_aggregated_after_impl AS
(
    SELECT 
        AVG(difference_minutes_day_start) AS after_avg_difference_minutes_day_start,
        AVG(difference_minutes_day_end) AS after_avg_difference_minutes_day_end,
        AVG(difference_minutes_day_end - difference_minutes_day_start) AS after_avg_difference_minutes_day_delay
    FROM Oxford_data_efficiency WHERE date >= '2024-03-05'::DATE
),
oxford_data_aggregated_joined AS (
    SELECT odmya.*, 
        oabi.*,
        oaai.*
    FROM oxford_data_month_year_aggregated odmya
    LEFT OUTER JOIN oxford_data_aggregated_before_impl oabi ON (CONCAT(odmya.year, '-', odmya.month, '-', '01')::DATE) < '2024-03-05'::DATE
    LEFT OUTER JOIN oxford_data_aggregated_after_impl oaai ON (CONCAT(odmya.year, '-', odmya.month, '-', '01')::DATE) >= '2024-03-01'::DATE
)
SELECT * 
FROM oxford_data_aggregated_joined


year,month,avg_difference_minutes_day_start,avg_difference_minutes_day_end,avg_difference_minutes_day_delay,before_avg_difference_minutes_day_start,before_avg_difference_minutes_day_end,before_avg_difference_minutes_day_delay,after_avg_difference_minutes_day_start,after_avg_difference_minutes_day_end,after_avg_difference_minutes_day_delay
2024,3,38.75696767001115,91.314381270903,52.55741360089186,38.50478142076503,93.14936247723134,54.6445810564663,37.57394205976035,93.29728499924164,55.72334293948127
2024,4,33.917410714285715,92.13839285714286,58.220982142857146,,,,37.57394205976035,93.29728499924164,55.72334293948127
2024,5,35.47065101387407,91.35005336179296,55.87940234791889,,,,37.57394205976035,93.29728499924164,55.72334293948127
2024,6,36.83828775267538,89.48394768133174,52.645659928656364,,,,37.57394205976035,93.29728499924164,55.72334293948127
2024,7,33.79704433497537,93.32019704433498,59.52315270935961,,,,37.57394205976035,93.29728499924164,55.72334293948127
2024,8,38.82896015549077,98.3712342079689,59.542274052478135,,,,37.57394205976035,93.29728499924164,55.72334293948127
2024,9,43.60687732342007,95.78624535315986,52.17936802973978,,,,37.57394205976035,93.29728499924164,55.72334293948127
2023,10,39.17695961995249,97.47268408551068,58.2957244655582,38.50478142076503,93.14936247723134,54.6445810564663,,,
2023,11,36.54761904761905,92.8840579710145,56.33643892339545,38.50478142076503,93.14936247723134,54.6445810564663,,,
2023,12,47.36363636363637,93.88531468531468,46.52167832167832,38.50478142076503,93.14936247723134,54.6445810564663,,,


# Visualization Delays

In [80]:
grouped_df = conn.sql(
    """
    WITH oxford_data_month_year_aggregated AS (
        SELECT 
            year(date) AS year, 
            month(date) AS month, 
            AVG(difference_minutes_day_start) AS avg_difference_minutes_day_start,
            AVG(difference_minutes_day_end) AS avg_difference_minutes_day_end,
            AVG(difference_minutes_day_start - difference_minutes_day_end) AS avg_difference_minutes_day_delay
        FROM Oxford_data_efficiency
        GROUP BY year(date), month(date)
    ),
    oxford_data_aggregated_before_impl AS
    (
        SELECT 
            AVG(difference_minutes_day_start) AS before_avg_difference_minutes_day_start,
            AVG(difference_minutes_day_end) AS before_avg_difference_minutes_day_end,
            AVG(difference_minutes_day_start - difference_minutes_day_end) AS before_avg_difference_minutes_day_delay
        FROM Oxford_data_efficiency WHERE date < '2024-03-05'::DATE
    ),
    oxford_data_aggregated_after_impl AS
    (
        SELECT 
            AVG(difference_minutes_day_start) AS after_avg_difference_minutes_day_start,
            AVG(difference_minutes_day_end) AS after_avg_difference_minutes_day_end,
            AVG(difference_minutes_day_start - difference_minutes_day_end) AS after_avg_difference_minutes_day_delay
        FROM Oxford_data_efficiency WHERE date >= '2024-03-05'::DATE
    ),
    oxford_data_aggregated_joined AS (
        SELECT odmya.*, 
            oabi.*,
            oaai.*
        FROM oxford_data_month_year_aggregated odmya
        LEFT OUTER JOIN oxford_data_aggregated_before_impl oabi ON (CONCAT(odmya.year, '-', odmya.month, '-', '01')::DATE) < '2024-03-05'::DATE
        LEFT OUTER JOIN oxford_data_aggregated_after_impl oaai ON (CONCAT(odmya.year, '-', odmya.month, '-', '01')::DATE) >= '2024-03-01'::DATE
    )
    SELECT * 
    FROM oxford_data_aggregated_joined
    """
).df()
grouped_df


Unnamed: 0,year,month,avg_difference_minutes_day_start,avg_difference_minutes_day_end,avg_difference_minutes_day_delay,before_avg_difference_minutes_day_start,before_avg_difference_minutes_day_end,before_avg_difference_minutes_day_delay,after_avg_difference_minutes_day_start,after_avg_difference_minutes_day_end,after_avg_difference_minutes_day_delay
0,2024,3,38.756968,91.314381,-52.557414,38.504781,93.149362,-54.644581,37.573942,93.297285,-55.723343
1,2024,4,33.917411,92.138393,-58.220982,,,,37.573942,93.297285,-55.723343
2,2024,5,35.470651,91.350053,-55.879402,,,,37.573942,93.297285,-55.723343
3,2024,6,36.838288,89.483948,-52.64566,,,,37.573942,93.297285,-55.723343
4,2024,7,33.797044,93.320197,-59.523153,,,,37.573942,93.297285,-55.723343
5,2024,8,38.82896,98.371234,-59.542274,,,,37.573942,93.297285,-55.723343
6,2024,9,43.606877,95.786245,-52.179368,,,,37.573942,93.297285,-55.723343
7,2023,10,39.17696,97.472684,-58.295724,38.504781,93.149362,-54.644581,,,
8,2023,11,36.547619,92.884058,-56.336439,38.504781,93.149362,-54.644581,,,
9,2023,12,47.363636,93.885315,-46.521678,38.504781,93.149362,-54.644581,,,


In [103]:
grouped_df['yearmonth'] =  pd.to_datetime(dict(year=grouped_df['year'], month=grouped_df['month'], day=1))

xrule = (
    alt.Chart()
    .mark_rule(strokeWidth=1)
    .encode(x=alt.datum(alt.DateTime(year=2024, month="March")))
)
before_rule = (
    alt.Chart()
    .mark_line(strokeWidth=2, strokeDash=[8,8])
    .encode(
        y=alt.Y("before_avg_difference_minutes_day_start:Q", title="Minutes"),
        x=alt.X("yearmonth(yearmonth)", title="Time").axis(labels=True, labelAngle=90),
        color=alt.value("black")
    )
)
after_rule = (
    alt.Chart()
    .mark_line(strokeWidth=2, strokeDash=[8,8])
    .encode(
        y="after_avg_difference_minutes_day_start:Q",
        x="yearmonth(yearmonth)",
        color=alt.value("black")
        
    )
)
line_before_day_start = alt.Chart().mark_line().encode(
    x='yearmonth(yearmonth):T',
    y='avg_difference_minutes_day_start:Q', 
    color=alt.value("blue")
).transform_filter(
    'datum.yearmonth < datetime(2024, 2, 4)' 
)
line_after_day_start = alt.Chart().mark_line().encode(
    x='yearmonth(yearmonth):T',
    y='avg_difference_minutes_day_start:Q', 
).transform_filter(
    'datum.yearmonth >= datetime(2024, 2, 1)' 
)

# Day end visualization
line_before_day_end = alt.Chart().mark_line().encode(
    x=alt.X("yearmonth(yearmonth)", title="Time").axis(labels=True, labelAngle=90),
    y=alt.Y('avg_difference_minutes_day_end:Q', title=" "), 
    color=alt.value("blue")
).transform_filter(
    'datum.yearmonth < datetime(2024, 2, 4)' 
)
line_after_day_end = alt.Chart().mark_line().encode(
    x='yearmonth(yearmonth):T',
    y='avg_difference_minutes_day_end:Q', 
).transform_filter(
    'datum.yearmonth >= datetime(2024, 2, 1)' 
)
before_rule_day_end = (
    alt.Chart()
    .mark_line(strokeWidth=2, strokeDash=[8,8])
    .encode(
        y="before_avg_difference_minutes_day_end:Q",
        x="yearmonth(yearmonth)",
        color=alt.value("black")

    )
)
after_rule_day_end = (
    alt.Chart()
    .mark_line(strokeWidth=2, strokeDash=[8,8])
    .encode(
        y="after_avg_difference_minutes_day_end:Q",
        x="yearmonth(yearmonth)",
        color=alt.value("black")

    )
)

# Day difference visualization
line_before_day_difference = alt.Chart().mark_line().encode(
    x=alt.X("yearmonth(yearmonth)", title="Time"),
    y=alt.Y('avg_difference_minutes_day_delay:Q', title=""), 
    color=alt.value("blue")
).transform_filter(
    'datum.yearmonth < datetime(2024, 2, 4)' 
)
line_after_day_difference = alt.Chart().mark_line().encode(
    x=alt.X("yearmonth(yearmonth)", title="Time").axis(labels=True, labelAngle=90),
    y='avg_difference_minutes_day_delay:Q', 
).transform_filter(
    'datum.yearmonth >= datetime(2024, 2, 1)' 
)
before_rule_day_difference = (
    alt.Chart()
    .mark_line(strokeWidth=2, strokeDash=[8,8])
    .encode(
        y=alt.Y("before_avg_difference_minutes_day_delay:Q"), 
        x="yearmonth(yearmonth)",
        color=alt.value("black")

    )
)
after_rule_day_difference = (
    alt.Chart()
    .mark_line(strokeWidth=2, strokeDash=[8,8])
    .encode(
        y="after_avg_difference_minutes_day_delay:Q",
        x="yearmonth(yearmonth)",
        color=alt.value("black")
        
    )
)
# text = line_after.mark_text(xOffset=-5, yOffset=-50, align="right").encode(
#     text="difference:Q",
# ).transform_filter(
#     'datum.difference !== null' 
# )
chart = alt.hconcat()
chart |= alt.layer(
        line_before_day_start, 
        line_after_day_start, 
        before_rule,
        after_rule, 
        # text, 
        xrule,  
        data=grouped_df,

    ).properties(title="Start Delay")
chart |= alt.layer(
        xrule,
        before_rule_day_end,
        after_rule_day_end,
        line_before_day_end,
        line_after_day_end,
        data=grouped_df
    ).properties(title="End Delay")
chart |= alt.layer(
        xrule,
        line_before_day_difference,
        line_after_day_difference,
        before_rule_day_difference,
        after_rule_day_difference,
        data=grouped_df
    ).properties(title="Work Delay")
chart.resolve_scale(y='shared', x='shared').configure_axis(
    grid=True
)