In [None]:
import pandas as pd  # a module which provides the data structures and functions to store and manipulate tables in dataframes
import pydbtools as pydb  # A module which allows SQL queries to be run on the Analytical Platform from Python, see https://github.com/moj-analytical-services/pydbtools
import boto3  # allows you to directly create, update, and delete AWS resources from Python scripts

# sets parameters to view dataframes for tables easier
pd.set_option("display.max_columns", 100)
pd.set_option("display.width", 900)
pd.set_option("display.max_colwidth", 200)

In [None]:
#Variables to be used in this notebook

#this is the athena database we will be storing our tables in
fcsq_database = "fcsq"

#this is the s3 bucket we will be saving data to
s3 = boto3.resource("s3")
bucket = s3.Bucket("alpha-family-data")

#Last full year to be published - including this publication
annual_year = 2023

#Current publication variables
current_year = 2024
current_quarter = 2

#Next publication variables
next_quarter_year = 2024 
next_quarter = 3

In [None]:
create_nisi_rep_timeliness_table =f"""
SELECT
  FM_CASE_CID,
  legal_case_id,
  Year,
  Quarter,
  region,
  pet_rep,
  resp_rep,   
  both_rep,
  neither_rep,
  app_to_nisi_weeks

FROM fcsq.divorce_fct_nisi

WHERE Proceeding_type_code in ('D','N') AND Law = 'Old';
"""
pydb.create_temp_table(create_nisi_rep_timeliness_table,'nisi_rep_timeliness')

In [None]:
#test = pydb.read_sql_query("SELECT * from __temp__.nisi_rep_timeliness LIMIT 10")
#test

In [None]:
#pydb.delete_table_and_data(database="__temp__", table="nisi_rep_timeliness")

In [None]:
create_nisi_repgrp_timeliness_table =f"""
SELECT
  FM_CASE_CID,
  legal_case_id,
  Year,
  Quarter,
  Region,
  CASE WHEN both_rep = 1 THEN '1 Both'
       WHEN pet_rep = 1 THEN '2 Petitioner Only'
       WHEN resp_rep = 1 THEN '3 Respondent Only'
       WHEN neither_rep = 1 THEN '4 Neither'
   END AS Representation,
   app_to_nisi_weeks  
    
FROM __temp__.nisi_rep_timeliness

WHERE Year > 2010 AND 
      NOT (Year = {next_quarter_year} AND Quarter = {next_quarter});
"""
pydb.create_temp_table(create_nisi_repgrp_timeliness_table,'nisi_repgrp_timeliness')

In [None]:
#test = pydb.read_sql_query("SELECT * from __temp__.nisi_repgrp_timeliness LIMIT 10")
#test

In [None]:
#pydb.delete_table_and_data(database="__temp__", table="nisi_repgrp_timeliness")

In [None]:
create_nisi_rep_timeliness_region_table =f"""
SELECT
  Case_Type,
  Quarter,
  region, 
  representation,
  COUNT (*) AS number_of_disposals,    
  ROUND(AVG(CASE WHEN app_to_nisi_weeks >= 0 THEN app_to_nisi_weeks END),1) AS mean_duration,
  ROUND((CASE WHEN COUNT(*) % 2 = 0
              THEN (MAX(CASE WHEN data_half = 1 THEN app_to_nisi_weeks END) + MIN(CASE WHEN data_half = 2 THEN app_to_nisi_weeks END)) /2.0 
              ELSE MAX(CASE WHEN data_half = 1 THEN app_to_nisi_weeks END)
         END), 1) AS median_duration    
    
FROM (

SELECT
  'Divorce (inc. Financial Remedy)' as Case_type,
   CONCAT(CAST(Year AS VARCHAR), '-Q', CAST(Quarter AS VARCHAR)) AS Quarter,    
   Region,
   Representation,
   app_to_nisi_weeks,
   NTILE(2) OVER (PARTITION BY CONCAT(CAST(Year AS VARCHAR), '-Q', CAST(Quarter AS VARCHAR)), region, representation 
                   ORDER BY app_to_nisi_weeks) AS data_half

FROM __temp__.nisi_repgrp_timeliness)
    
GROUP BY Case_Type, Quarter, Region, Representation;
"""
pydb.create_temp_table(create_nisi_rep_timeliness_region_table,'nisi_rep_timeliness_region')

In [None]:
#test = pydb.read_sql_query("SELECT * from __temp__.nisi_rep_timeliness_region LIMIT 10")
#test

In [None]:
#pydb.delete_table_and_data(database="__temp__", table="nisi_rep_timeliness_region")

In [None]:
create_nisi_rep_timeliness_region_ew_table =f"""
SELECT
  Case_Type,
  Quarter,
  region, 
  representation,
  COUNT (*) AS number_of_disposals,    
  ROUND(AVG(CASE WHEN app_to_nisi_weeks >= 0 THEN app_to_nisi_weeks END),1) AS mean_duration,
  ROUND((CASE WHEN COUNT(*) % 2 = 0
              THEN (MAX(CASE WHEN data_half = 1 THEN app_to_nisi_weeks END) + MIN(CASE WHEN data_half = 2 THEN app_to_nisi_weeks END)) /2.0 
              ELSE MAX(CASE WHEN data_half = 1 THEN app_to_nisi_weeks END)
         END), 1) AS median_duration    
    
FROM (

SELECT
  'Divorce (inc. Financial Remedy)' as Case_type,
   CONCAT(CAST(Year AS VARCHAR), '-Q', CAST(Quarter AS VARCHAR)) AS Quarter,    
   'England & Wales' AS Region,
   Representation,
   app_to_nisi_weeks,
   NTILE(2) OVER (PARTITION BY CONCAT(CAST(Year AS VARCHAR), '-Q', CAST(Quarter AS VARCHAR)), representation 
                   ORDER BY app_to_nisi_weeks) AS data_half

FROM __temp__.nisi_repgrp_timeliness)
    
GROUP BY Case_Type, Quarter, Region, Representation;
"""
pydb.create_temp_table(create_nisi_rep_timeliness_region_ew_table,'nisi_rep_timeliness_region_ew')

In [None]:
#test = pydb.read_sql_query("SELECT * from __temp__.nisi_rep_timeliness_region_ew LIMIT 10")
#test

In [None]:
#pydb.delete_table_and_data(database="__temp__", table="nisi_rep_timeliness_region_ew")

In [None]:
create_DIV_TABLE_10_LOOKUP_ANNUAL_table =f"""
SELECT
  CONCAT('Old Divorce (incl. annulment and FR)|', CAST(Year AS VARCHAR),'|') as lookup,
  SUM(both_rep) AS both_n,
  ROUND(AVG(CASE WHEN both_rep = 1 THEN app_to_nisi_weeks end),3) AS both_mean,
  SUM(pet_rep) AS applicant_n,
  ROUND(AVG(CASE WHEN pet_rep = 1 THEN app_to_nisi_weeks end),3) AS applicant_mean,
  SUM(resp_rep) AS respondent_n,
  ROUND (AVG(CASE WHEN resp_rep = 1 THEN app_to_nisi_weeks end),3) AS respondent_mean,
  SUM (neither_rep) AS neither_n,
  ROUND (AVG(CASE WHEN neither_rep = 1 THEN app_to_nisi_weeks end),3) AS neither_mean,
  '.' AS unknown_n,
  '.' AS unknown_mean,
  COUNT(*) AS all_n,
  ROUND(AVG(app_to_nisi_weeks),3) AS all_mean
    
FROM __temp__.Nisi_rep_timeliness

WHERE Year BETWEEN 2011 and {annual_year}

GROUP BY Year;
"""
pydb.create_temp_table(create_DIV_TABLE_10_LOOKUP_ANNUAL_table,'DIV_TABLE_10_LOOKUP_ANNUAL')

In [None]:
test = pydb.read_sql_query("SELECT * from __temp__.DIV_TABLE_10_LOOKUP_ANNUAL LIMIT 10")
test

In [None]:
#pydb.delete_table_and_data(database="__temp__", table="DIV_TABLE_10_LOOKUP_ANNUAL")

In [None]:
create_DIV_TABLE_10_LOOKUP_QTR_table =f"""
SELECT
  CONCAT('Old Divorce (incl. annulment and FR)|',CAST(Year AS VARCHAR),'|Q', CAST(Quarter AS VARCHAR)) as lookup,
  SUM(both_rep) AS both_n,
  ROUND(AVG(CASE WHEN both_rep = 1 THEN app_to_nisi_weeks end),3) AS both_mean,
  SUM(pet_rep) AS applicant_n,
  ROUND(AVG(CASE WHEN pet_rep = 1 THEN app_to_nisi_weeks end),3) AS applicant_mean,
  SUM(resp_rep) AS respondent_n,
  ROUND (AVG(CASE WHEN resp_rep = 1 THEN app_to_nisi_weeks end),3) AS respondent_mean,
  SUM (neither_rep) AS neither_n,
  ROUND (AVG(CASE WHEN neither_rep = 1 THEN app_to_nisi_weeks end),3) AS neither_mean,
  '.' AS unknown_n,
  '.' AS unknown_mean,
  COUNT(*) AS all_n,
  ROUND(AVG(app_to_nisi_weeks),3) AS all_mean
    
FROM __temp__.nisi_rep_timeliness

WHERE Year BETWEEN 2011 AND {current_year} AND
      NOT(Year = {next_quarter_year} AND Quarter = {next_quarter})

GROUP BY Year, Quarter;
"""
pydb.create_temp_table(create_DIV_TABLE_10_LOOKUP_QTR_table,'DIV_TABLE_10_LOOKUP_QTR')

In [None]:
#test = pydb.read_sql_query("SELECT * from __temp__.DIV_TABLE_10_LOOKUP_QTR LIMIT 10")
#test

In [None]:
#pydb.delete_table_and_data(database="__temp__", table="DIV_TABLE_10_LOOKUP_QTR")

In [None]:
drop_div_table_10_lookup = "DROP TABLE IF EXISTS fcsq.div_table_10_lookup"
pydb.start_query_execution_and_wait(drop_div_table_10_lookup)
bucket.objects.filter(Prefix="fcsq_processing/Divorce/div_table_10_lookup").delete();

In [None]:
create_DIV_TABLE_10_LOOKUP_table =f"""
CREATE TABLE IF NOT EXISTS fcsq.div_table_10_lookup
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Divorce/div_table_10_lookup') AS
SELECT *
FROM
(SELECT * FROM __temp__.DIV_TABLE_10_LOOKUP_ANNUAL
UNION ALL
SELECT * FROM __temp__.DIV_TABLE_10_LOOKUP_QTR)
ORDER BY lookup;
"""
pydb.start_query_execution_and_wait(create_DIV_TABLE_10_LOOKUP_table)

In [None]:
df = pydb.read_sql_query("select * from fcsq.div_table_10_lookup;")
df.to_csv(path_or_buf = 's3://alpha-family-data/fcsq_processing/Divorce/sdp_process/div_table_10_lookup.csv',index=False)


In [None]:
#test = pydb.read_sql_query("SELECT * from fcsq.DIV_TABLE_10_LOOKUP LIMIT 10")
#test

In [None]:
test = pydb.read_sql_query("SELECT count(*) as row_count from fcsq.DIV_TABLE_10_LOOKUP")
test

In [None]:
#pydb.delete_table_and_data(database="__temp__", table="DIV_TABLE_10_LOOKUP")

In [None]:
drop_nisi_rep_timeliness = "DROP TABLE IF EXISTS fcsq.nisi_rep_timeliness"
pydb.start_query_execution_and_wait(drop_nisi_rep_timeliness)
bucket.objects.filter(Prefix="fcsq_processing/Divorce/nisi_rep_timeliness").delete();


In [None]:
create_nisi_rep_timeliness_table =f"""
CREATE TABLE IF NOT EXISTS fcsq.nisi_rep_timeliness
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Divorce/nisi_rep_timeliness') AS
SELECT * FROM __temp__.nisi_rep_timeliness_region
UNION ALL
SELECT * FROM __temp__.nisi_rep_timeliness_region_ew;
"""
pydb.start_query_execution_and_wait(create_nisi_rep_timeliness_table)

In [None]:
df = pydb.read_sql_query("select * from fcsq.nisi_rep_timeliness;")
df.to_csv(path_or_buf = 's3://alpha-family-data/fcsq_processing/Divorce/sdp_process/nisi_rep_timeliness.csv',index=False)


In [None]:
#test = pydb.read_sql_query("SELECT * from fcsq.nisi_rep_timeliness LIMIT 10")
#test

In [None]:
#pydb.delete_table_and_data(database="__temp__", table="nisi_rep_timeliness")

In [None]:
#pydb.delete_database_and_data("__temp__")