## Private Law timeliness (table 9)

#### Import packages

In [None]:
# libraries
import pandas as pd
import pydbtools as pydb
import os  # for file paths
import awswrangler as wr

In [None]:
# Structured  dataframes
pd.set_option("display.max_columns", 100)
pd.set_option("display.width", 900)
pd.set_option("display.max_colwidth", 200)

#### Assigning key variables

In [None]:
db1 = "familyman_dev_v3" # This is the database where the familyman data is stored on the platform
fcsq_db = "fcsq" # This database is used for FCCSQ processing
# create path for within athena FCSQ database in the S3 folder, alongside other S3 items
fcsq_db_path = f"s3://alpha-family-data/fcsq_processing/CA_disps/"
#snapshot dates and publication period are set in the main run file. However, if running this notebook independently you will need to set them here
#snapshot_date = "2022-08-04"
#pub_year = 2022
#pub_qtr = 3

### There are 2 steps used in this process (extraction of DOI and calculation of the first case close date), that are identical to previous processing steps used in other areas of the children act process. They have been deliberately duplicated here (rather than using the existing tables) to help show that T9 is more or less a standalone process. Other than the final disposal date for each case, the calcualtion for T9 does not use any of the tables created in any of the other child act FCSQ processing stages

In [None]:
#Extract date of issue from Familyman
print("extracting date of issue....")
pydb.create_temp_table(
f""" 
SELECT 
  case_number,
  CAST(value AS Date) AS case_DOI
FROM 
  {db1}.case_fields AS f
WHERE
  field_model = 'FM2C_DI'
  AND mojap_snapshot_date = DATE'{snapshot_date}'

""",

"DOI_P")

#### Extraction of all private law initial applications

In [None]:
# Creating a table with all the initial applications (U22) for private law cases, no filters used on order types etc. This replicates the SAS process
# Other fields such as court, applications types will evenually be dropped but including in the initial extraction in case they are handy to refer back to in future
print("extracting private law intial applications....")
pydb.create_temp_table( 
f"""
SELECT 
    e.case_number,
    e.receipt_date,
    f.event,
    f.field_model,
    f.value as all_event_app_types,
    CAST(SUBSTR(CAST(f.event AS varchar),1,3) AS integer) AS court_code
  FROM 
    {db1}.event_fields f
    INNER JOIN {db1}.events e
      ON f.event = e.event
  WHERE 
    field_model = 'U22_AT'
    AND SUBSTR(e.case_number,5,1) = 'P'
    AND e.error = 'N'
    AND f.mojap_snapshot_date = DATE'{snapshot_date}'
    AND e.mojap_snapshot_date = DATE'{snapshot_date}'
""",

"ca_apps_p")

#### Add date of issue to private law applications

##### Does selecting the MIN DOI exclude cases where there is no DOI recorded (from SAS process - taken from OPT according to notes) - although only a couple of thousand or so do not have DOI recorded

In [None]:
print("joining issue date to applications....")
pydb.create_temp_table(
f"""
SELECT
  a.case_number,
  MIN(d.Case_DOI) AS issue_date
FROM
   __temp__.ca_apps_p a
   LEFT JOIN __temp__.DOI_P d
     ON a.case_number = d.case_number
GROUP BY
  a.case_number  

""",

"ca_apps_doi_p")

#### Extracting case close event dates and ranking to keep the earliest date

In [None]:
#Extracting the case closed event, and keep the earliest date so we know the first time the case was closed (as it is possible for cases to be re-opened)
#Entry date used as this is the date used in the SAS processing
print("extracting case close dates and ranking to keep the earliest....")
pydb.create_temp_table(
f"""
WITH case_closed_events_p AS (
SELECT
    case_number,
    CAST (entry_date AS DATE) AS entry_date,
    ROW_NUMBER() OVER(PARTITION BY case_number
                       ORDER BY case_number, entry_date ASC)
        AS min_date_rank
FROM
    {db1}.events
WHERE
  event_model = 'U24'
  AND SUBSTR(case_number,5,1) = 'P'
  AND error = 'N'
  AND mojap_snapshot_date = date '{snapshot_date}'
)

SELECT
  case_number,
  entry_date AS first_close_date
FROM
  case_closed_events_p
WHERE 
  min_date_rank = 1
""",

"ca_first_close_p")

#### Get last disposal date in the case

In [None]:
#get the last disposal date in the case, excluding those that happened after the first close date
#the disposal table is created in the main disposal processsing. It includes dipsosal types that are not included in the final child dipsosal dataset (which filters out codes > 53)
#as with previous steps the logic has been used in order to match SAS output
#contains all disposal types and not just final orders made
print("getting the last disposal in the case....")
pydb.create_temp_table(
f"""
SELECT
  f.case_number,
  f.first_close_date,
  MAX(c.disp_date) AS last_disposal_date
FROM
  __temp__.ca_first_close_p f
  LEFT JOIN {fcsq_db}.ca_non_filtered_disposals c
    ON f.case_number = c.case_number
WHERE 
  c.disp_date <= first_close_date
  AND c.event_model <> 'C21'
GROUP BY
  f.case_number,
  f.first_close_date
""",

"ca_last_disp_date_p")

#### Matching starts to final disposal

In [None]:
#Calculating the time from the date of issue to the final disposal in the case (not including if the case has been subsequently re-opened)
#Time periods are based on the date of the final disposal
print("matching starts to final disposal....")
pydb.create_temp_table(
f"""
SELECT
  s.case_number,
  s.issue_date,
  c.last_disposal_date,
  date_diff('day',s.issue_date, c.last_disposal_date) AS issue_to_last_disp_days,
  EXTRACT(YEAR FROM c.last_disposal_date) AS disp_year,
  EXTRACT(QUARTER FROM c.last_disposal_date) AS disp_quarter
FROM
  __temp__.ca_apps_doi_p s
  INNER JOIN __temp__.ca_last_disp_date_p c
    ON s.case_number = c.case_number
WHERE
  date_diff('day',s.issue_date, c.last_disposal_date) >= 0
  AND (EXTRACT(YEAR FROM c.last_disposal_date) BETWEEN 2011 AND {pub_year})
  AND NOT ((EXTRACT(YEAR FROM c.last_disposal_date)) = {pub_year} 
            AND EXTRACT(QUARTER FROM c.last_disposal_date) = {pub_qtr} + 1)
    
""",

"ca_issue_to_last_disp")

#### Creating median groups

In [None]:
#Here the ntile function splits the data in to 2 groups to help calculate the median. Different groupings (eg year or quarter) require separate splits
print("creating median tiles....")
pydb.create_temp_table(
f"""
SELECT
  *,
  NTILE(2) OVER (PARTITION BY disp_year ORDER BY issue_to_last_disp_days) 
    AS median_tile_annual,
  NTILE(2) OVER (PARTITION BY disp_year, disp_quarter ORDER BY issue_to_last_disp_days) 
    AS median_tile_quarter
FROM
  __temp__.ca_issue_to_last_disp

    
""",

"pri_time_median_groups")

### CSV

In [None]:
#Annual for csv
#Median calculation takes the max value from the lower half of the median group if an even number of rows, else takes the aveage of the max value from group one and min value from group 2
print("creating annual csv....")
pydb.create_temp_table(
f""" 
 SELECT
  'Children Act (Private)' as case_type,
  disp_year AS year,
  CAST (NULL AS VARCHAR) AS quarter,
  COUNT (*) AS Number_of_disposals,
  ROUND(AVG(issue_to_last_disp_days)/7,1) as Mean_duration,
  ROUND((CASE WHEN COUNT(*) % 2 = ROUND(COUNT(*),0)
        THEN (MAX(CASE WHEN median_tile_annual = 1 THEN issue_to_last_disp_days END) +
                 MIN(CASE WHEN median_tile_annual = 2 THEN issue_to_last_disp_days END)
                  ) / 2.0
        ELSE MAX(CASE WHEN median_tile_annual = 1 THEN issue_to_last_disp_days END)
    END)/7,1) as median_duration
FROM
  __temp__.pri_time_median_groups
WHERE  
  CASE WHEN {pub_qtr} = 4
        THEN disp_year BETWEEN 2011 AND {pub_year}
        ELSE disp_year BETWEEN 2011 AND {pub_year} -1 END     
GROUP BY
  disp_year
""",

"private_annual")

In [None]:
#Quarterly for csv 
#Median calculation takes the max value from the lower half of the median group if an even number of rows, else takes the aveage of the max value from group one and min value from group 2
print("creating quarterly csv....")
pydb.create_temp_table(
f""" 
 SELECT
  'Children Act (Private)' as case_type,
  CAST (NULL AS INTEGER) AS year,
  CAST(disp_year AS VARCHAR) ||'-Q'||CAST(disp_quarter AS VARCHAR) AS quarter,
  COUNT (*) AS Number_of_disposals,
  ROUND(AVG (issue_to_last_disp_days)/7,1) as Mean_duration,
  ROUND((CASE WHEN COUNT(*) % 2 = ROUND(COUNT(*),0)
        THEN (MAX(CASE WHEN median_tile_quarter = 1 THEN issue_to_last_disp_days END) +
                 MIN(CASE WHEN median_tile_quarter = 2 THEN issue_to_last_disp_days END)
                  ) / 2.0
        ELSE MAX(CASE WHEN median_tile_quarter = 1 THEN issue_to_last_disp_days END)
    END)/7,1) as median_duration
FROM
  __temp__.pri_time_median_groups
GROUP BY
  disp_year,
  disp_quarter
""",

"private_quarterly")

In [None]:
#Appending annual and quarterly to make the final csv
print("combining annual and quarterly csv....")
pydb.create_temp_table(
f""" 
SELECT
  *
FROM
  __temp__.private_annual
UNION ALL
SELECT
  *
FROM
  __temp__.private_quarterly
""",

"private_time_csv")

#### Exporting to S3

In [None]:
pri_time_data = pydb.read_sql_query ("select * from __temp__.private_time_csv")

In [None]:
pri_time_df = pd.DataFrame(pri_time_data)

In [None]:
print("exporting csv to S3....")
pri_time_df.to_csv (r's3://alpha-family-data/fcsq_processing/children_act_temporary_outputs/t9_data.csv',header = True)