## Children Act timeliess

#### This code has been put together trying to replicate the existing SAS output

In [None]:
# libraries
import pandas as pd
import pydbtools as pydb
import os  # for file paths
import awswrangler as wr

In [None]:
# Structured  dataframes
pd.set_option("display.max_columns", 100)
pd.set_option("display.width", 900)
pd.set_option("display.max_colwidth", 200)

##### Assigning key variables

In [None]:
# define some variables to be used throughout the notebook
db1 = "familyman_live_v4" #database where Familyman data is stored
db2 = "fcsq" #database where tables created as part of FCSQ processing are stored where required
#snapshot date and pub periods are set in the main run file. However, if running this notebook independently you will need to set them here
#snapshot_date = "2022-08-04"
get_snapshot_date = f"SELECT mojap_snapshot_date from {db1}.events order by mojap_snapshot_date desc limit 1"
snapshot_date = str(pydb.read_sql_query(get_snapshot_date)['mojap_snapshot_date'].values[0])
pub_year = 2023 #set the publication year
pub_qtr = 3 #set the publication quarter
fcsq_db_path = f"s3://alpha-family-data/fcsq_processing/CA_disps/" # create path for within athena FCSQ database in the S3 folder, alongside other S3 items

#### Extract date of issue

In [None]:
print("extracting date of issue....")
pydb.create_temp_table(
f""" 
SELECT 
  case_number,
  CAST(value AS Date) AS case_DOI
FROM 
  {db1}.case_fields AS f
 
WHERE
  field_model = 'FM2C_DI'
  AND mojap_snapshot_date = DATE'{snapshot_date}'

""",

"DOI")

#### Create start date using date of issue

In [None]:
#Adding the start date to the main child apps data created in the main child apps process (so data is at child and order level)
#Only including specific order types
print("creating start date....")
pydb.create_temp_table(
f""" 
SELECT 
  t1.case_number,
  t1.receipt_date,
  t1.event,
  t1.field_model,
  t1.order_type,
  t1.order_code,
  t1.order_desc,
  t1.child_role_id,
  t1.order_case_type,
  t2.case_DOI,
  CASE WHEN  t2.case_DOI is null
        THEN t1.receipt_date
       WHEN t1.field_model = 'U22_AT' AND (t2.Case_DOI<t1.Receipt_date)
        THEN t2.Case_DOI
        ELSE t1.Receipt_date END
      AS Start_date
FROM 
  {db2}.ca_apps_child AS t1
      LEFT JOIN __temp__.DOI AS t2 
       ON t1.case_number = t2.case_number
WHERE
  t1.order_code IN (1,4,14,25,27,29,30,31,32)

""",

"ca_apps_issue_date")

#### Matching applications to disposals

In [None]:
#Matching apps data to the main child disposal data set created in the child disposals processing - matches at child and order level
#Filetering out certain order types alongside interim orders
#Ranks the earlies diposal as the first disposal
print("matching applications to disposals....")
pydb.create_temp_table(
f""" 
SELECT 
  a.*,
  YEAR(d.disp_date) AS Year,
  QUARTER(d.disp_date) AS Quarter,
  MONTH(d.disp_date) AS Month,
  d.disp_date,
  date_diff('day',a.start_date, d.disp_date) AS days,
  date_diff('week',a.start_date, d.disp_date) AS weeks,
  ROW_NUMBER() OVER(PARTITION BY a.case_number, a.child_role_id, a.order_code, a.start_date
                       ORDER BY d.disp_date) 
      AS disp_rank,
  d.disposal_court,
  d.disposal_dfj,
  d.disposal_region 
FROM
  __temp__.ca_apps_issue_date a
  LEFT JOIN {db2}.ca_disps_all_children d
    ON a.case_number = d.case_number
    AND a.order_case_type = d.order_case_type
    AND a.child_role_id = d.child_role_id
WHERE 
  date_diff('day',a.start_date, d.disp_date) >= 0
  AND d.event_model NOT IN ('CPA','C21','C27','C30','C31','C33','C35B','C44A','C44B','C46A','C46B','C47A','C47C','C48A', 
                          'C48B','C48C','C49','D51','D84C','MAGEPO','MAGS37')
  AND d.disp_type_code <> 2

""",

"app_disp_match")

#### Selecting the earliest disposal

In [None]:
#Selects the first diposal for each order type/child matched
#Multiple children can get matched to the same disposal (if the disposal has multiple child id's)
#More than one application/order type can be matched to one disposal - here we are replicating the SAS process 
print("selecting the earliest disposal....")
pydb.create_temp_table(
f""" 
SELECT 
  *
FROM 
  __temp__.app_disp_match
WHERE
  disp_rank = 1

""",

"first_disp")

#### Creation of main timeliess dataset, deletion of dup apps

In [None]:
#Here we get rid of any instances of an application of the same order type for the same child, and only the earliest one is selected
print("creating main timeliness dataset....")
pydb.create_temp_table(
f""" 
WITH dup_app_type AS
 (SELECT 
   *,
   ROW_NUMBER() OVER(PARTITION BY case_number, child_role_id, order_code
                       ORDER BY start_date, case_number, child_role_id, order_code) 
     AS dup_app_rank
 FROM 
   __temp__.first_disp
 )

SELECT
  *
FROM
  dup_app_type
WHERE
  dup_app_rank = 1

""",

"ca_timeliness_all")

#### Legal rep

##### Applicants

In [None]:
#Table of case numbers in which at least one applicant has legal rep
#Applicants table was created during main application processing
print("getting applicants with legal representation....")
pydb.create_temp_table(
f""" 
SELECT 
  DISTINCT
    case_number,
    representation
FROM 
  {db2}.ca_applicants
WHERE
  representation = 'Y'

""",

"app_legal_rep")

##### Respondents

In [None]:
#Table of case numbers in which at least one respondent has legal rep
#Respondents table was created during main application processing
print("getting respondents with legal representation....")
pydb.create_temp_table(
f""" 
SELECT 
  DISTINCT
    case_number,
    representation
FROM 
  {db2}.ca_respondents
WHERE
  representation = 'Y'

""",

"resp_legal_rep")

#### Timeliness with legal rep
##### - For FOI have only incuded 2023

In [None]:
print("creating timeliness with party representation....")
pydb.create_temp_table(
f""" 
SELECT 
  t.*,
  CASE WHEN a.representation = 'Y'
       AND  r.representation = 'Y'
         THEN 'Both'
       WHEN a.representation = 'Y'
       AND  r.representation IS NULL
         THEN 'Applicant Only'
       WHEN a.representation IS NULL
       AND  r.representation = 'Y'
         THEN 'Respondent Only' 
      WHEN a.representation IS NULL
      AND  r.representation IS NULL
         THEN 'Neither' END
    AS representation
FROM 
  __temp__.ca_timeliness_all t
  LEFT JOIN __temp__.app_legal_rep a
    ON t.case_number = a.case_number
  LEFT JOIN __temp__.resp_legal_rep r
    ON t.case_number = r.case_number
WHERE 
  t.year = 2023
  AND quarter IN (1,2,3)

""",

"timeliness_legal_rep")

#### CSV outputs
##### For FOI have included dfj and only those dfjs we are interested in. Have removed year/quarter as we have already filtered time period in earlier step

In [None]:
#Regional, quarterly for csv
#Median calculation takes the max value from the lower half of the median group if an even number of rows, else takes the aveage of the max value from group one and min value from group 2
print("creating regional csv....")
pydb.create_temp_table(
f""" 
 SELECT
  'Children Act (Private)' as Case_type,
  disposal_dfj AS dfj,
  disposal_region AS region,
  Representation,
  COUNT (*) AS Number_of_cases,
  ROUND(AVG ((days)/7),1) as mean_duration
FROM
  __temp__.timeliness_legal_rep
WHERE
  order_case_type = 'P'  
  AND disposal_dfj IN ('Central Family Court', 'Chelmsford DFJ', 'East London Family Court', 'Medway DFJ', 'West London Family Court')
GROUP BY
  disposal_dfj,
  disposal_region,
  representation
""",

"ca_timeliness_region_csv")

In [None]:
#E&W, quarterly for csv
#Median calculation takes the max value from the lower half of the median group if an even number of rows, else takes the aveage of the max value from group one and min value from group 2
print("creating E&W csv....")
pydb.create_temp_table(
f""" 
 SELECT
  'Children Act (Private)' as Case_type,
  'England & Wales' AS dfj,
  'England & Wales' AS region,
  Representation,
  COUNT (*) AS Number_of_disposals,
  ROUND(AVG ((days)/7),1) as mean_duration
FROM
  __temp__.timeliness_legal_rep
WHERE
  order_case_type = 'P'
GROUP BY
  representation
""",

"ca_timeliness_national_csv")

In [None]:
#Append regional and national csv data
print("combining regional and national csv's....")
pydb.create_temp_table(
f""" 
SELECT
   *
FROM
  __temp__.ca_timeliness_region_csv
UNION ALL
SELECT
   *
FROM
  __temp__.ca_timeliness_national_csv

""",

"ca_timeliness_csv")

##### Export csv to S3

In [None]:
ca_time_csv_data = pydb.read_sql_query ("select * from __temp__.ca_timeliness_csv")

In [None]:
ca_time_csv_df = pd.DataFrame(ca_time_csv_data)

In [None]:
print("exporting csv to S3....")
ca_time_csv_df.to_csv (r's3://alpha-family-data/Adhoc/timeliness_dfj.csv',header = True, index = False)

#### Separate code for total timeliness - not goruped by representation - have not added this to the csv output, will just copy and paste for now

In [None]:
#Regional - total timeliness
print("creating regional total timeliness....")
pydb.create_temp_table(
f""" 
 SELECT
  'Children Act (Private)' as Case_type,
  disposal_dfj AS dfj,
  disposal_region AS region,
  COUNT (*) AS Number_of_cases,
  ROUND(AVG ((days)/7),1) as mean_duration
FROM
  __temp__.timeliness_legal_rep
WHERE
  order_case_type = 'P'  
  AND disposal_dfj IN ('Central Family Court', 'Chelmsford DFJ', 'East London Family Court', 'Medway DFJ', 'West London Family Court')
GROUP BY
  disposal_dfj,
  disposal_region
""",

"ca_total_timeliness_region")

In [None]:
pydb.read_sql_query ("select * from __temp__.ca_total_timeliness_region")

In [None]:
#E&W - total timeliness
print("creating national total timeliness....")
pydb.create_temp_table(
f""" 
 SELECT
  'Children Act (Private)' as Case_type,
  'England & Wales' AS dfj,
  'England & Wales' AS region,
  COUNT (*) AS Number_of_disposals,
  ROUND(AVG ((days)/7),1) as mean_duration
FROM
  __temp__.timeliness_legal_rep
WHERE
  order_case_type = 'P'
""",

"ca_total_timeliness_national")

In [None]:
pydb.read_sql_query ("select * from __temp__.ca_total_timeliness_national")