## Children Act timeliess

#### This code has been put together trying to replicate the existing SAS output

In [30]:
# libraries
import pandas as pd
import pydbtools as pydb
import os  # for file paths
import awswrangler as wr

In [31]:
# Structured  dataframes
pd.set_option("display.max_columns", 100)
pd.set_option("display.width", 900)
pd.set_option("display.max_colwidth", 200)

##### Assigning key variables

In [59]:
# define some variables to be used throughout the notebook
db1 = "familyman_dev_v3" #database where Familyman data is stored
db2 = "fcsq" #database where tables created as part of FCSQ processing are stored where required
#snapshot date and pub periods are set in the main run file. However, if running this notebook independently you will need to set them here
snapshot_date = "2023-05-10"
pub_year = 2023 #set the publication year
pub_qtr = 1 #set the publication quarter
fcsq_db_path = f"s3://alpha-family-data/fcsq_processing/CA_disps/" # create path for within athena FCSQ database in the S3 folder, alongside other S3 items

#### Extract date of issue

In [60]:
print("extracting date of issue....")
pydb.create_temp_table(
f""" 
SELECT 
  case_number,
  CAST(value AS Date) AS case_DOI
FROM 
  {db1}.case_fields AS f
 
WHERE
  field_model = 'FM2C_DI'
  AND mojap_snapshot_date = DATE'{snapshot_date}'

""",

"DOI")

extracting date of issue....


#### Create start date using date of issue

In [61]:
#Adding the start date to the main child apps data created in the main child apps process (so data is at child and order level)
#Only including specific order types
print("creating start date....")
pydb.create_temp_table(
f""" 
SELECT 
  t1.case_number,
  t1.receipt_date,
  t1.event,
  t1.field_model,
  t1.order_type,
  t1.order_code,
  t1.order_desc,
  t1.child_role_id,
  t1.order_case_type,
  t2.case_DOI,
  CASE WHEN  t2.case_DOI is null
        THEN t1.receipt_date
       WHEN t1.field_model = 'U22_AT' AND (t2.Case_DOI<t1.Receipt_date)
        THEN t2.Case_DOI
        ELSE t1.Receipt_date END
      AS Start_date
FROM 
  {db2}.ca_apps_child AS t1
      LEFT JOIN __temp__.DOI AS t2 
       ON t1.case_number = t2.case_number
WHERE
  t1.order_code IN (1,4,14,25,27,29,30,31,32)

""",

"ca_apps_issue_date")

creating start date....


#### Matching applications to disposals

In [62]:
#Matching apps data to the main child disposal data set created in the child disposals processing - matches at child and order level
#Filetering out certain order types alongside interim orders
#Ranks the earlies diposal as the first disposal
print("matching applications to disposals....")
pydb.create_temp_table(
f""" 
SELECT 
  a.*,
  YEAR(d.disp_date) AS Year,
  QUARTER(d.disp_date) AS Quarter,
  MONTH(d.disp_date) AS Month,
  d.disp_date,
  date_diff('day',a.start_date, d.disp_date) AS days,
  date_diff('week',a.start_date, d.disp_date) AS weeks,
  ROW_NUMBER() OVER(PARTITION BY a.case_number, a.child_role_id, a.order_code, a.start_date
                       ORDER BY d.disp_date) 
      AS disp_rank,
  d.disposal_court,
  d.disposal_dfj,
  d.disposal_region 
FROM
  __temp__.ca_apps_issue_date a
  LEFT JOIN {db2}.ca_disps_all_children d
    ON a.case_number = d.case_number
    AND a.order_case_type = d.order_case_type
    AND a.child_role_id = d.child_role_id
WHERE 
  date_diff('day',a.start_date, d.disp_date) >= 0
  AND d.event_model NOT IN ('CPA','C21','C27','C30','C31','C33','C35B','C44A','C44B','C46A','C46B','C47A','C47C','C48A', 
                          'C48B','C48C','C49','D51','D84C','MAGEPO','MAGS37')
  AND d.disp_type_code <> 2

""",

"app_disp_match")

matching applications to disposals....


#### Selecting the earliest disposal

In [63]:
#Selects the first diposal for each order type/child matched
#Multiple children can get matched to the same disposal (if the disposal has multiple child id's)
#More than one application/order type can be matched to one disposal - here we are replicating the SAS process 
print("selecting the earliest disposal....")
pydb.create_temp_table(
f""" 
SELECT 
  *
FROM 
  __temp__.app_disp_match
WHERE
  disp_rank = 1

""",

"first_disp")

selecting the earliest disposal....


#### Creation of main timeliess dataset, deletion of dup apps

In [64]:
#Here we get rid of any instances of an application of the same order type for the same child, and only the earliest one is selected
print("creating main timeliness dataset....")
pydb.create_temp_table(
f""" 
WITH dup_app_type AS
 (SELECT 
   *,
   ROW_NUMBER() OVER(PARTITION BY case_number, child_role_id, order_code
                       ORDER BY start_date, case_number, child_role_id, order_code) 
     AS dup_app_rank
 FROM 
   __temp__.first_disp
 )

SELECT
  *
FROM
  dup_app_type
WHERE
  dup_app_rank = 1

""",

"ca_timeliness_all")

creating main timeliness dataset....


#### Legal rep

##### Applicants

In [65]:
#Table of case numbers in which at least one applicant has legal rep
#Applicants table was created during main application processing
print("getting applicants with legal representation....")
pydb.create_temp_table(
f""" 
SELECT 
  DISTINCT
    case_number,
    representation
FROM 
  {db2}.ca_applicants
WHERE
  representation = 'Y'

""",

"app_legal_rep")

getting applicants with legal representation....


##### Respondents

In [66]:
#Table of case numbers in which at least one respondent has legal rep
#Respondents table was created during main application processing
print("getting respondents with legal representation....")
pydb.create_temp_table(
f""" 
SELECT 
  DISTINCT
    case_number,
    representation
FROM 
  {db2}.ca_respondents
WHERE
  representation = 'Y'

""",

"resp_legal_rep")

getting respondents with legal representation....


#### Timeliness with legal rep

In [67]:
print("creating timeliness with party representation....")
pydb.create_temp_table(
f""" 
SELECT 
  t.*,
  CASE WHEN a.representation = 'Y'
       AND  r.representation = 'Y'
         THEN 'Both'
       WHEN a.representation = 'Y'
       AND  r.representation IS NULL
         THEN 'Applicant Only'
       WHEN a.representation IS NULL
       AND  r.representation = 'Y'
         THEN 'Respondent Only' 
      WHEN a.representation IS NULL
      AND  r.representation IS NULL
         THEN 'Neither' END
    AS representation
FROM 
  __temp__.ca_timeliness_all t
  LEFT JOIN __temp__.app_legal_rep a
    ON t.case_number = a.case_number
  LEFT JOIN __temp__.resp_legal_rep r
    ON t.case_number = r.case_number
WHERE 
  t.year > 2010

""",

"timeliness_legal_rep")

creating timeliness with party representation....


#### Creating median groups

In [68]:
#Here the ntile function splits the data in to 2 groups to help calculate the median. Different groupings (eg regional or national) require separate splits
print("creating median tiles....")
pydb.create_temp_table(
f"""
SELECT
  *,
  NTILE(2) OVER (PARTITION BY year, quarter, order_case_type, disposal_region, representation ORDER BY days) 
    AS median_tile_regional,
  NTILE(2) OVER (PARTITION BY year, quarter, order_case_type, representation ORDER BY days) 
    AS median_tile_national
FROM
  __temp__.timeliness_legal_rep

    
""",

"ca_time_median_groups")

creating median tiles....


#### CSV outputs

In [69]:
#Regional, quarterly for csv
#Median calculation takes the max value from the lower half of the median group if an even number of rows, else takes the aveage of the max value from group one and min value from group 2
print("creating regional csv....")
pydb.create_temp_table(
f""" 
 SELECT
  'Children Act (Private)' as Case_type,
  disposal_region AS region,
  Representation,
  CAST(Year AS VARCHAR) ||'-Q'||CAST(Quarter AS VARCHAR) AS Quarter,
  COUNT (*) AS Number_of_cases,
  ROUND(AVG ((days)/7),1) as mean_duration,
  ROUND((CASE WHEN COUNT(*) % 2 = ROUND(COUNT(*),0)
        THEN (MAX(CASE WHEN median_tile_regional = 1 THEN days END) +
              MIN(CASE WHEN median_tile_regional = 2 THEN days END)) / 2.0
        ELSE MAX(CASE WHEN median_tile_regional = 1 THEN days END)
         END)/7,1) 
     AS median_duration
FROM
  __temp__.ca_time_median_groups
WHERE
  order_case_type = 'P'  
GROUP BY
  year,
  quarter,
  disposal_region,
  representation
""",

"ca_timeliness_region_csv")

creating regional csv....


In [70]:
#E&W, quarterly for csv
#Median calculation takes the max value from the lower half of the median group if an even number of rows, else takes the aveage of the max value from group one and min value from group 2
print("creating E&W csv....")
pydb.create_temp_table(
f""" 
 SELECT
  'Children Act (Private)' as Case_type,
  'England & Wales' AS region,
  Representation,
  CAST(Year AS VARCHAR) ||'-Q'||CAST(Quarter AS VARCHAR) AS Quarter,
  COUNT (*) AS Number_of_disposals,
  ROUND(AVG ((days)/7),1) as mean_duration,
  ROUND((CASE WHEN COUNT(*) % 2 = ROUND(COUNT(*),0)
        THEN (MAX(CASE WHEN median_tile_national = 1 THEN days END) +
              MIN(CASE WHEN median_tile_national = 2 THEN days END)) / 2.0
        ELSE MAX(CASE WHEN median_tile_national = 1 THEN days END)
         END)/7,1) 
     AS median_duration
FROM
  __temp__.ca_time_median_groups
WHERE
  order_case_type = 'P'
GROUP BY
  year,
  quarter,
  representation
""",

"ca_timeliness_national_csv")

creating E&W csv....


In [71]:
#drop table in Athena (if it already exists)
_ = pydb.start_query_execution_and_wait(f"""DROP TABLE {db2}.ca_csv_timeliness""")
print("dropping previous child csv timeliness dataset in athena....")

dropping previous child csv timeliness dataset in athena....


In [72]:
#Append regional and national csv data
print("combining regional and national csv's....")

#set S3 file path
ca_csv_timeliness_s3_path = os.path.join(fcsq_db_path, "ca_csv_timeliness/")
# Delete all the underlying data stored within the S3 location
if wr.s3.list_objects(ca_csv_timeliness_s3_path):
    print("deleting child csv timeliness dataset in s3....")
    wr.s3.delete_objects(ca_csv_timeliness_s3_path)
    
#Create table in Athena
print("creating child csv timeliness dataset....")

t_child =  f"""
CREATE TABLE {db2}.ca_csv_timeliness WITH
(
    external_location='{ca_csv_timeliness_s3_path}'
) AS
SELECT
   *
FROM
  __temp__.ca_timeliness_region_csv
UNION ALL
SELECT
   *
FROM
  __temp__.ca_timeliness_national_csv;

"""

_ = pydb.start_query_execution_and_wait(t_child)

combining regional and national csv's....
creating child csv timeliness dataset....


##### Export csv to S3

In [73]:
ca_time_csv_data = pydb.read_sql_query ("select * from fcsq.ca_csv_timeliness")
ca_time_csv_data

Unnamed: 0,case_type,region,representation,quarter,number_of_cases,mean_duration,median_duration
0,Children Act (Private),SOUTH EAST,Neither,2016-Q4,1660,12.8,8.0
1,Children Act (Private),MIDLANDS,Neither,2022-Q3,1670,36.0,30.0
2,Children Act (Private),NORTH WEST,Applicant Only,2016-Q2,832,11.5,8.1
3,Children Act (Private),SOUTH EAST,Both,2013-Q3,1171,15.6,10.0
4,Children Act (Private),NORTH EAST,Both,2013-Q3,1505,23.0,19.7
...,...,...,...,...,...,...,...
1563,Children Act (Private),SOUTH EAST,Both,2022-Q4,915,30.0,22.7
1564,Children Act (Private),WALES,Neither,2020-Q2,262,15.6,13.6
1565,Children Act (Private),WALES,Respondent Only,2019-Q1,226,24.7,18.0
1566,Children Act (Private),LONDON,Neither,2022-Q1,995,40.6,35.6


#### Lookup

In [74]:
#Annual lookup
print("creating annual lookup....")
pydb.create_temp_table(
f""" 
 SELECT
  'Private Law|'||CAST(Year AS VARCHAR)||'|' as lookup,
  COUNT(CASE WHEN representation = 'Both' THEN case_number END)
    AS Both_n,
  ROUND ((AVG(CASE WHEN representation = 'Both' THEN (days/7) END)),3)
    AS Both_mean,
  COUNT(CASE WHEN representation = 'Applicant Only' THEN case_number END)
    AS Applicant_n,
  ROUND ((AVG(CASE WHEN representation = 'Applicant Only' THEN (days/7) END)),3)
    AS Applicant_mean,
  COUNT(CASE WHEN representation = 'Respondent Only' THEN case_number END)
    AS Respondent_n,
  ROUND ((AVG(CASE WHEN representation = 'Respondent Only' THEN (days/7) END)),3)
    AS respondent_mean,
  COUNT(CASE WHEN representation = 'Neither' THEN case_number END)
    AS neither_n,
  ROUND ((AVG(CASE WHEN representation = 'Neither' THEN (days/7) END)),3)
    AS neither_mean,  
  COUNT(*) AS All_n,
  ROUND((AVG(days/7)),3)
    AS All_mean
FROM
  __temp__.timeliness_legal_rep
WHERE  
  CASE WHEN {pub_qtr} = 4
        THEN Year BETWEEN 2011 AND {pub_year}
        ELSE Year BETWEEN 2011 AND {pub_year} -1 END
  AND order_case_type = 'P'
GROUP BY
  Year;
""",

"Annual_lookup")

creating annual lookup....


In [75]:
#Quarterly lookup
print("creating quarterly lookup....")
pydb.create_temp_table(
f""" 
 SELECT
  'Private Law|'||CAST(Year AS VARCHAR)||'|Q'||CAST(Quarter AS VARCHAR) AS lookup,
  COUNT(CASE WHEN representation = 'Both' THEN case_number END)
    AS Both_n,
  ROUND ((AVG(CASE WHEN representation = 'Both' THEN (days/7) END)),3)
    AS Both_mean,
  COUNT(CASE WHEN representation = 'Applicant Only' THEN case_number END)
    AS Applicant_n,
  ROUND ((AVG(CASE WHEN representation = 'Applicant Only' THEN (days/7) END)),3)
    AS Applicant_mean,
  COUNT(CASE WHEN representation = 'Respondent Only' THEN case_number END)
    AS Respondent_n,
  ROUND ((AVG(CASE WHEN representation = 'Respondent Only' THEN (days/7) END)),3)
    AS respondent_mean,
  COUNT(CASE WHEN representation = 'Neither' THEN case_number END)
    AS neither_n,
  ROUND ((AVG(CASE WHEN representation = 'Neither' THEN (days/7) END)),3)
    AS neither_mean,  
  COUNT(*) AS All_n,
  ROUND((AVG(days/7)),3)
    AS All_mean
FROM
  __temp__.timeliness_legal_rep
WHERE  
  order_case_type = 'P'
GROUP BY
  Year,
  Quarter;
""",

"Quarter_lookup")

creating quarterly lookup....


In [76]:
#drop table in Athena (if it already exists)
_ = pydb.start_query_execution_and_wait(f"""DROP TABLE {db2}.ca_t10_timeliness""")
print("dropping previous child t10 timeliness dataset in athena....")

dropping previous child t10 timeliness dataset in athena....


In [77]:
print("combining annual and quarterly lookups....")

#set S3 file path
ca_t10_timeliness_s3_path = os.path.join(fcsq_db_path, "ca_t10_timeliness/")
# Delete all the underlying data stored within the S3 location
if wr.s3.list_objects(ca_t10_timeliness_s3_path):
    print("deleting child t10 timeliness dataset in s3....")
    wr.s3.delete_objects(ca_t10_timeliness_s3_path)
    
#Create table in Athena
print("creating child t10 timeliness dataset....")
t_child =  f"""
CREATE TABLE {db2}.ca_t10_timeliness WITH
(
    external_location='{ca_t10_timeliness_s3_path}'
) AS
SELECT
  *
FROM
  __temp__.annual_lookup
UNION ALL
SELECT
  *
FROM
  __temp__.quarter_lookup;

"""

_ = pydb.start_query_execution_and_wait(t_child)

combining annual and quarterly lookups....
creating child t10 timeliness dataset....


In [78]:
ca_time_csv_data = pydb.read_sql_query ("select * from fcsq.ca_t10_timeliness")
ca_time_csv_data

Unnamed: 0,lookup,both_n,both_mean,applicant_n,applicant_mean,respondent_n,respondent_mean,neither_n,neither_mean,all_n,all_mean
0,Private Law|2014|Q4,3899,18.973,6440,11.669,2131,18.167,5382,14.514,17852,14.898
1,Private Law|2019|,18807,17.368,29053,12.648,12802,20.725,38596,16.333,99258,16.017
2,Private Law|2021|,20483,26.771,26987,21.833,15206,32.166,37340,26.396,100016,26.119
3,Private Law|2021|Q4,5101,28.401,6517,22.682,3588,34.836,9206,28.204,24412,27.746
4,Private Law|2016|Q1,4066,15.982,6290,11.444,2309,18.388,6197,14.447,18862,14.259
...,...,...,...,...,...,...,...,...,...,...,...
56,Private Law|2018|Q1,3900,15.586,6203,12.314,2524,19.010,7362,15.366,19989,14.922
57,Private Law|2020|Q4,5280,23.338,6732,18.103,3512,28.148,9423,22.202,24947,22.174
58,Private Law|2013|Q2,7893,21.660,8554,11.037,2258,18.589,3890,11.817,22595,15.637
59,Private Law|2017|Q2,4025,14.533,7027,11.217,2453,17.492,7617,13.800,21122,13.509
