# Childrens Act Disposals

### [Disposals](all_disposals)
##### [Disposal events - orders made, orders refused and orders of no order]()
##### [Withdrawn events]()
##### [Combining all disposals]()
##### [Adding order type details]()
##### [Removing duplicate disposal types]()
### [Children]()
##### [Children recorded on the disposal event]()
##### [Children recorded on the case]()
##### [No children recorded]()
##### [Child disposals dataset]()
### [Cases]()
##### [Final/non final order date]()
##### [Case closed event date]()
##### [Case re-opened event date]()
##### [Dates added to cases]()
##### [Final date in case]()
##### [Case close date calculation]()
##### [Case closed dataset]()

#### Import packages

In [None]:
# libraries
import pandas as pd
import pydbtools as pydb
import boto3
from datetime import datetime
#import re
import os  # for file paths
import awswrangler as wr

In [None]:
# Structured  dataframes
pd.set_option("display.max_columns", 100)
pd.set_option("display.width", 900)
pd.set_option("display.max_colwidth", 200)

#### Assigning key variables

In [None]:
# This is the database where the familyman data is stored on the platform
db1 = "familyman_dev_v3"
# This database is used for FCCSQ processing
fcsq_db = "fcsq"
# create path for within athena FCSQ database in the S3 folder, alongside other S3 items
fcsq_db_path = f"s3://alpha-family-data/fcsq_processing/CA_disps/"
s3 = boto3.resource("s3")
bucket = s3.Bucket("alpha-family-data")
#Select the snapshot date you require. Each record in Familyman has a snapshot date (essentially just a column in the tables) that relates to the database at that point in time
snapshot_date = "2022-08-04"

## Extracting disposals 

#### All disposals - excluding withdrawn events

In [None]:
# The first select query extracts orders where the type of order is defined by the event model.
# The second select query extracts disposals where the type of order is determined by the value of the field model. Included in this are orders of no order (ORDNOM) and orders refused (ORDREF).
    # C43 contains child arrangement orders (contact/residence - the most common orders types in children act cases) 
# The cross join unnest function in the second select query splits the value (order types - where multiple values can be entered) into one row per value
pydb.create_temp_table(
f""" 
SELECT 
  event,
  case_number,
  event_model,
  event_model as lookup_event_code,
  receipt_date,
  entry_date,
  COALESCE(receipt_date, entry_date) AS disp_date,
  creating_court,
  CAST(NULL AS VARCHAR) AS field_model,
  CAST(NULL AS VARCHAR) AS all_event_ord_types,
  'N/A' AS order_type
FROM 
  {db1}.events 
WHERE 
  (event_model IN ('C23','C25','C26','C27','C28','C29',
                    'C30','C31','C32A','C32B','C33','C34A','C34B','C35A','C35B','C37',
                    'C38A','C38B','C39','C40','C42','C43A','C44A','C44B','C45A','C45B',
                    'C53','C80','C81','C82') 
            AND error = 'N')
  AND mojap_snapshot_date = date '{snapshot_date}'

UNION ALL

SELECT 
  e.event,
  e.case_number,
  e.event_model,
  CASE WHEN e.event_model IN ('C43', 'ORDNOM', 'ORDREF')
      THEN 'GEN'
      ELSE e.event_model END
    AS lookup_event_code,
  e.receipt_date,
  e.entry_date,
  COALESCE(e.receipt_date, e.entry_date) AS disp_date,
  e.creating_court,
  f.field_model,
  f.value as all_event_ord_types,
  TRIM(ord_type) as order_type
FROM 
  {db1}.events e
  INNER JOIN {db1}.event_fields f
    ON e.event = f.event
  CROSS JOIN UNNEST(SPLIT(f.value,',')) AS t(ord_type)  
WHERE 
  ((f.field_model IN ('C21_3', 'C24_6', 'C36_5', 'C43_5', 'ORDNOM_5', 'ORDREF_5'))
    AND error = 'N')
  AND e.mojap_snapshot_date = date '{snapshot_date}'
  AND f.mojap_snapshot_date = date '{snapshot_date}'
  
""",

"ca_disps1")

#### Withdrawn events
###### Withdrawn events are extracted seperately to other disposal events as they need to be linked back to the original application to get the order type being withdrawn

In [None]:
#Extracting withdrawn events (G63). 
#To get the type of order being withdrawn we can link back to the original application (U22/G50) event
#Cross Join unnest function splits out the application values into a single row per order type

pydb.create_temp_table(
f""" 
SELECT 
  w.event,
  e.case_number,
  e.event_model,
  'GEN' AS lookup_event_code,
  e.receipt_date,
  e.entry_date,
  COALESCE(e.receipt_date, e.entry_date) AS disp_date,
  e.creating_court,
  w.field_model,
  a.value as all_event_ord_types,
  TRIM(ord_type) as order_type
FROM 
  {db1}.event_fields w
  left join {db1}.event_fields a
    on w.value = CAST (a.event AS VARCHAR)
  left join {db1}.events e
    on w.event = e.event
CROSS JOIN UNNEST(SPLIT(a.value,',')) AS t(ord_type)    
WHERE 
  w.field_model = 'G63_1' 
  AND a.field_model IN ('G50_AT','U22_AT')
  AND e.error = 'N'
  AND w.mojap_snapshot_date = date '{snapshot_date}'
  AND e.mojap_snapshot_date = date '{snapshot_date}'
  AND a.mojap_snapshot_date = date '{snapshot_date}'
  
""",

"ca_withdrawn")

#### Combining withdrawn events with other disposal events

In [None]:
#drop table in Athena (if it already exists)
_ = pydb.start_query_execution_and_wait(f"""DROP TABLE {fcsq_db}.ca_non_filtered_disposals""")

In [None]:
#Append withdrawn events to the main set of disposal events
#Table is also used within the private law timeliness process

#set S3 file path
non_filtered_disps_s3_path = os.path.join(fcsq_db_path, "non_filtered_disps/")
# Delete all the underlying data stored within the S3 location
if wr.s3.list_objects(non_filtered_disps_s3_path):
    print("deleting objs")
    wr.s3.delete_objects(non_filtered_disps_s3_path)
    
#Create table in Athena

non_filtered_disps =  f"""
CREATE TABLE {fcsq_db}.ca_non_filtered_disposals WITH
(
    external_location='{non_filtered_disps_s3_path}'
) AS
SELECT 
  *,
  EXTRACT(YEAR FROM disp_date) AS Year,
  EXTRACT(QUARTER FROM disp_date) AS Quarter,
  EXTRACT(MONTH FROM disp_date) AS Month
FROM
  __temp__.ca_disps1
UNION ALL 
SELECT
  *,
  EXTRACT(YEAR FROM disp_date) AS Year,
  EXTRACT(QUARTER FROM disp_date) AS Quarter,
  EXTRACT(MONTH FROM disp_date) AS Month
FROM
  __temp__.ca_withdrawn

"""
    
_ = pydb.start_query_execution_and_wait(non_filtered_disps)

#### Adding the order and disposal type descriptions, court/region information, along with caluculation of case type based on the type of order. Filtering out order types we do not include

In [None]:
# The order type lookup file has been created separately
# Order code to be up to 45 but including up to 54 as this is what some of the published data includes
# The order case_type is based on the type of order being disposed. A case may have both public and private law disposal types
pydb.create_temp_table(
f"""
SELECT
  t1.*,
  CASE WHEN t2.order_code < 19 OR (SUBSTR(case_number,5,1) = 'C') THEN 'C'
        ELSE 'P' END 
      AS order_case_type,  
  CASE WHEN t1.event_model = 'G63' THEN 3
       WHEN t1.event_model = 'ORDREF' THEN 4
       WHEN t1.event_model = 'ORDNOM' THEN 5
       WHEN t2.order_code BETWEEN 40 AND 45 THEN 2
       ELSE 1 END
    AS disp_type_code,
  CASE WHEN t1.event_model = 'G63' THEN 'Withdrawn'
       WHEN t1.event_model = 'ORDREF' THEN 'Refused'
       WHEN t1.event_model = 'ORDNOM' THEN 'No order'
       WHEN t2.order_code BETWEEN 40 AND 45 THEN 'Interim Order'
       ELSE 'Order' END
    AS disp_type,
  order_code,
  order_desc,
  CASE WHEN t1.year < 2014 THEN t3.court_pre_2014
        ELSE t3.name END 
    AS disposal_court,
  t3.dfj_new AS disposal_dfj,
  CASE WHEN t1.year < 2014 THEN t3.region_pre2014
        ELSE t3.region END 
    AS disposal_region
FROM
  {fcsq_db}.ca_non_filtered_disposals t1
  LEFT JOIN {fcsq_db}.ca_order_type_lookup t2
    ON t1.lookup_event_code = t2.event_model
    AND t1.order_type = t2.value
  LEFT JOIN {fcsq_db}.court_mv_feb21_dfj t3
    ON t1.creating_court = t3.court
WHERE
  t2.order_code BETWEEN 0 AND 54

""",
    
"ca_disps_order_name")

#### Excluding duplicate order types within the same event. Creation of case level case type 

In [None]:
#drop table in Athena (if it already exists)
_ = pydb.start_query_execution_and_wait(f"""DROP TABLE {fcsq_db}.ca_all_disposals""")

In [None]:
#Use the distinct function to remove duplicte order types within an event. The order type field (which contains the letter code) is not included in the select fields as different codes can map to the same order type (eg 'CAST' & 'CN' are both contact)
#The duplicates account for around 0.25% of all disposals, so it is not a significant number 
#The main_case_type ensures that in the small number of cases where there are both private and public order types within a case, only one case type is selected in the case (based on order type activity within the case) 
#For the main_case_type, where the case number is 'P' and it has both public and private law disposals it will be counted within private law cases disposed
#The FM_case_type is based on the letter within the case number. Naturally there will be 1 per case.
#Domestic violence and adoption case types are filtered out of the main/FM_case_type fields so they can be excluded from final CA case counts


#set S3 file path
all_disps_s3_path = os.path.join(fcsq_db_path, "all_disps/")
# Delete all the underlying data stored within the S3 location
if wr.s3.list_objects(all_disps_s3_path):
    print("deleting objs")
    wr.s3.delete_objects(all_disps_s3_path)
    
#Create table in Athena

all_disps =  f"""
CREATE TABLE {fcsq_db}.ca_all_disposals WITH
(
    external_location='{all_disps_s3_path}'
) AS
  SELECT 
    DISTINCT
    case_number,
    disp_date,
    EXTRACT(YEAR FROM disp_date) AS Year,
    EXTRACT(QUARTER FROM disp_date) AS Quarter,
    order_case_type,
    CASE WHEN SUBSTR(case_number,5,1) NOT IN ('P','C')
           THEN 'NA'
         WHEN order_case_type = 'C' 
              AND case_number in (SELECT case_number
                                  FROM __temp__.ca_disps_order_name
                                  WHERE order_case_type = 'P')
           THEN 'P'
        ELSE order_case_type END
      AS main_case_type,
    CASE WHEN SUBSTR(case_number,5,1) NOT IN ('P','C')
           THEN 'NA'
           ELSE  SUBSTR(case_number,5,1) END
      AS FM_case_type,
    receipt_date,
    entry_date,
    event,
    event_model,
    field_model,
    /*order_type, excluded as the same order type can have multiple order type codes and will therefore be double counted eg CAST & CN*/
    disp_type_code,
    disp_type,
    order_code,
    order_desc,
    creating_court,
    disposal_court,
    disposal_dfj,
    disposal_region
  FROM 
    __temp__.ca_disps_order_name

"""

_ = pydb.start_query_execution_and_wait(all_disps)

## Adding children to the disposal events
##### This is done in 3 stages:    
######   - First, some children are recorded directly on the event
######   - Second, where the child data field within the event is blank, we pull out all the children recorded on the case from the roles table
######   - Finally, there are a small number of cases in which children are neither recorded on the event or the case. Here we default to one child per disposal event, with unknown characteristics

#### Children - recorded on the disposal event

In [None]:
#First we extract the child role ID where the child is listed on the event
#Cross join unnest function splits where multiple children are listed in the event field into one row per child
pydb.create_temp_table(
f"""
  SELECT 
    a.*,
    f.value children,
    TRY_CAST(TRIM(child_role_id) as bigint) child_role_id
  FROM 
    {fcsq_db}.ca_all_disposals a
    LEFT JOIN {db1}.event_fields f
      ON f.event = a.event
   CROSS JOIN UNNEST(SPLIT(f.value,',')) AS t(child_role_id)
  WHERE 
     f.field_model IN('C21_CH','C23_CH','C24_CH','C25_CH','C26_CH','C27_CH','C28_CH','C29_CH',
                      'C30_CH','C31_CH','C32A_CH','C32B_CH','C33_CH','C34A_CH',
                      'C34B_CH','C35A_CH','C35B_CH','C36_CH','C37_CH','C38A_CH',
                      'C38B_CH','C39_CH','C40_CH','C42_CH','C43_CH',
                      'C43A_CH','C44A_CH','C44B_CH','C45A_CH','C45B_CH','C53_CH',
                      'C80_CH','C81_CH','C82_CH','CPA_CH','ORDNOM_CH','ORDREF_CH', 'G63_CH')
    AND child_role_id <> ''
    AND f.mojap_snapshot_date = DATE'{snapshot_date}'
""",

"ca_disps_child_event")

##### Now link to the parties table to get child characteristics

In [None]:
#Here we only include children where the delete flag is not marked as Y. For cases there there is only one child on the case and they are marked as 'Y' this will exclude the disposal event, but it will get picked up again at the 
# stage where we include children not recorded on the case or the event and will default to a child count of 1.
pydb.create_temp_table(
f"""
  SELECT 
    a.*,
    p.dob,
    p.gender
  FROM 
    __temp__.ca_disps_child_event a
    JOIN {db1}.roles r on a.child_role_id = r.role
    JOIN {db1}.parties p on r.party = p.party
  WHERE 
    r.delete_flag = 'N'
    AND r.mojap_snapshot_date = DATE'{snapshot_date}'
    AND p.mojap_snapshot_date = DATE'{snapshot_date}'
""",

"ca_disps_child_event2")

#### Children - recorded on the case (not recorded on the disopsal event)

In [None]:
#Secondly, we take all the events in which children were not directly recorded and link to the case details to get child information recorded on the case
#Where a child id is not recorded on the event, this means the event relates to all children listed on the case
#We get the dob and gender of the child from the party table
pydb.create_temp_table(
f"""
  SELECT
    a.*,
    r.role child_role_id,
    p.dob,
    p.gender
  FROM
    {fcsq_db}.ca_all_disposals a
    JOIN {db1}.roles r 
      on a.case_number = r.case_number
    JOIN {db1}.parties p 
      on r.party = p.party 
  WHERE
    event not in (SELECT event FROM __temp__.ca_disps_child_event)
    AND role_model in ('CHLDC', 'CHLDZ')
    AND r.delete_flag = 'N'
    AND r.mojap_snapshot_date = DATE'{snapshot_date}'
    AND p.mojap_snapshot_date = DATE'{snapshot_date}'
""",

"ca_disps_child_case")

#### Children - not recorded on event or case

In [None]:
#There are a small number of disposals where the children are not reocorded on the event and there are also no children recorded on the case. This tends to be for DV cases, where CA order types 
#   may have been made but children are not necessarily recorded on the case. Here we default to just one child per order/disposal, with unknown characteristics.
pydb.create_temp_table(
f"""
  SELECT
    *,
    CAST(NULL AS INTEGER) AS child_role_id,
    CAST(NULL AS DATE) dob,
    CAST(NULL AS INTEGER) AS gender
  FROM
    {fcsq_db}.ca_all_disposals
  WHERE
    event NOT IN (SELECT event FROM __temp__.ca_disps_child_event2)
    AND event NOT IN (SELECT event FROM __temp__.ca_disps_child_case)
""",

"ca_disps_no_child")

### Child disposals data set
##### Combining children data sets together

In [None]:
#drop table in Athena (if it already exists)
_ = pydb.start_query_execution_and_wait(f"""DROP TABLE {fcsq_db}.ca_disps_all_children""")

In [None]:
#Append the two datasets created previouly, which also includes all disposal data
#The child event table also links to the party table to get the dob/gender info

#set S3 file path
all_child_disps_s3_path = os.path.join(fcsq_db_path, "all_child_disps/")
# Delete all the underlying data stored within the S3 location
if wr.s3.list_objects(all_child_disps_s3_path):
    print("deleting objs")
    wr.s3.delete_objects(all_child_disps_s3_path)
    
#Create table in Athena

all_child_disps =  f"""
CREATE TABLE {fcsq_db}.ca_disps_all_children WITH
(
    external_location='{all_child_disps_s3_path}'
) AS
  SELECT 
    a.case_number,
    a.disp_date,
    Year,
    Quarter,
    a.order_case_type,
    a.main_case_type,
    a.FM_case_type,
    a.receipt_date,
    a.entry_date,
    a.event,
    a.event_model,
    a.field_model,
    a.disp_type_code,
    a.disp_type,
    a.order_code,
    a.order_desc,
    a.child_role_id,
    a.dob,
    a.gender,
    a.creating_court,
    disposal_court,
    disposal_dfj,
    disposal_region
  FROM
    __temp__.ca_disps_child_event2 a
  UNION ALL
  SELECT
    a.case_number,
    a.disp_date,
    Year,
    Quarter,
    a.order_case_type,
    a.main_case_type,
    a.FM_case_type,
    a.receipt_date,
    a.entry_date,
    a.event,
    a.event_model,
    a.field_model,
    a.disp_type_code,
    a.disp_type,
    a.order_code,
    a.order_desc,
    a.child_role_id,
    a.dob,
    a.gender,
    a.creating_court,
    disposal_court,
    disposal_dfj,
    disposal_region
  FROM
    __temp__.ca_disps_child_case a
  UNION ALL
  SELECT
    a.case_number,
    a.disp_date,
    Year,
    Quarter,
    a.order_case_type,
    a.main_case_type,
    a.FM_case_type,
    a.receipt_date,
    a.entry_date,
    a.event,
    a.event_model,
    a.field_model,
    a.disp_type_code,
    a.disp_type,
    a.order_code,
    a.order_desc,
    a.child_role_id,
    a.dob,
    a.gender,
    a.creating_court,
    disposal_court,
    disposal_dfj,
    disposal_region
  FROM
    __temp__.ca_disps_no_child a
"""

_ = pydb.start_query_execution_and_wait(all_child_disps)

## Closed cases
##### 3 different event types are used to determine the case closed date (not all cases will have all events populated):
#####   - Date of final order/non-final order
#####   - Date of case closed event
#####   - Date of case re-opened event

##### For each date event, a flag is created to identify the last date within each case, which is then added to each case and the case close date can be determined

#### Final order/non-final order events

In [None]:
#Extract final order information for disposals. The value field indicates whether the event was a final order within the case
#Columns added to create a flag for the latest final order within each case
pydb.create_temp_table( 
f"""
  SELECT 
    e.event,
    e.case_number,
    COALESCE(e.receipt_date, e.entry_date) AS disp_date,
    f.field_model,
    f.value as FO_value,
    ROW_NUMBER() OVER(PARTITION BY e.case_number, f.value
                       ORDER BY e.case_number, COALESCE(e.receipt_date, e.entry_date) DESC) 
      AS max_date_rank,
    e.creating_court
  FROM 
    {db1}.event_fields f
    JOIN {db1}.events e
      ON f.event = e.event
  WHERE 
      field_model IN ('C21_FO','C23_FO','C24_FO','C25_FO','C26_FO','C27_FO','C28_FO','C29_FO','C30_FO',
                      'C31_FO','C32A_FO','C32B_FO','C33_FO','C34A_FO','C34B_FO','C35A_FO','C35B_FO','C36_FO',
                      'C37_FO','C38A_FO','C38B_FO','C39_FO','C40_FO','C42_FO','C43_FO','C43A_FO','C44A_FO',
                      'C44B_FO','C45A_FO','C45B_FO','C53_FO','C80_FO','C81_FO','C82_FO','CPA_FO',
                      'ORDNOM_FO','ORDREF_FO','G63_FO')
              
    AND f.mojap_snapshot_date = DATE'{snapshot_date}'
    AND e.mojap_snapshot_date = DATE'{snapshot_date}'
    AND e.error = 'N'
""",

"ca_final_orders")

#### Case closed events

In [None]:
#Extracting the case closed event, and as with final orders create a flag for the latest date recorded for each case
pydb.create_temp_table(
f"""
SELECT
    case_number,
    receipt_date,
    CAST (entry_date AS DATE) AS entry_date,
    ROW_NUMBER() OVER(PARTITION BY case_number
                       ORDER BY case_number, entry_date DESC) 
      AS max_date_rank,
    creating_court
FROM
    {db1}.events
WHERE
  event_model = 'U24'
  AND error = 'N'
  AND mojap_snapshot_date = date '{snapshot_date}'
""",

"ca_case_closed_events")
 

#### Case re-opened events

In [None]:
#Extract information on whether a case has been re-opened, and if so columns added so we can easily identify the latest re-open date for each case
pydb.create_temp_table(
f"""
SELECT
    case_number,
    receipt_date,
    CAST (entry_date AS DATE) AS entry_date,
    ROW_NUMBER() OVER(PARTITION BY case_number
                       ORDER BY case_number, entry_date DESC) 
      AS max_date_rank  
FROM
    {db1}.events
WHERE
  event_model = 'G62'
  AND error = 'N'
  AND mojap_snapshot_date = date '{snapshot_date}'
""",

"ca_case_reopened_events")
 

#### Final order/case closed/case re-opened dates added to case

In [None]:
#Here we get a distinct list of case numbers, and add to them the dates calculated earlier, so that for each case we have the last final order, non final order, date close and case re-opened event
pydb.create_temp_table(
f"""
SELECT
    DISTINCT
    t1.case_number,
    t1.main_case_type AS case_type,
    (SELECT f.disp_date FROM __temp__.ca_final_orders f
                         WHERE f.max_date_rank = 1 
                         AND f.fo_value = 'Y'
                         AND t1.case_number = f.case_number)
        AS last_final_order,
    (SELECT f.creating_court FROM __temp__.ca_final_orders f
                              WHERE f.max_date_rank = 1 
                              AND f.fo_value = 'Y'
                              AND t1.case_number = f.case_number)
        AS last_final_order_court,
    (SELECT f.disp_date FROM __temp__.ca_final_orders f
                         WHERE f.max_date_rank = 1 
                         AND f.fo_value = 'N'
                         AND t1.case_number = f.case_number)
        AS last_no_final_order,    
    (SELECT c.entry_date FROM __temp__.ca_case_closed_events c
                         WHERE c.max_date_rank = 1 
                         AND t1.case_number = c.case_number) 
        AS last_close_date,  
    (SELECT c.creating_court FROM __temp__.ca_case_closed_events c
                              WHERE c.max_date_rank = 1 
                              AND t1.case_number = c.case_number) 
        AS last_close_date_court,     
    (SELECT r.entry_date FROM __temp__.ca_case_reopened_events r
                         WHERE r.max_date_rank = 1 
                         AND t1.case_number = r.case_number)
        AS last_reopen
FROM
  {fcsq_db}.ca_all_disposals t1
WHERE 
  disp_type_code <> 2
  
""",
    
"ca_closed_date_events")

#### Calculate final event type within each case

In [None]:
#Here the code compares each of the different dates and adds a column to show what type of event the last date recorded on the case is
pydb.create_temp_table(
f"""
SELECT
  case_number,
  case_type,
  last_final_order,
  last_final_order_court,
  last_no_final_order,
  last_close_date,
  last_close_date_court,
  last_reopen,
  CASE WHEN last_close_date = GREATEST ((COALESCE(last_final_order, CAST('1900-01-01' AS DATE))), (COALESCE(last_no_final_order, CAST('1900-01-01' AS DATE))), 
                                      (COALESCE(last_close_date, CAST('1900-01-01' AS DATE))), (COALESCE(last_reopen, CAST('1900-01-01' AS DATE))))
             THEN 'case closed'
       WHEN last_final_order = GREATEST ((COALESCE(last_final_order, CAST('1900-01-01' AS DATE))), (COALESCE(last_no_final_order, CAST('1900-01-01' AS DATE))), 
                                      (COALESCE(last_close_date, CAST('1900-01-01' AS DATE))), (COALESCE(last_reopen, CAST('1900-01-01' AS DATE))))
             THEN 'final order'     
       WHEN last_no_final_order = GREATEST ((COALESCE(last_final_order, CAST('1900-01-01' AS DATE))), (COALESCE(last_no_final_order, CAST('1900-01-01' AS DATE))), 
                                      (COALESCE(last_close_date, CAST('1900-01-01' AS DATE))), (COALESCE(last_reopen, CAST('1900-01-01' AS DATE))))
             THEN 'non final order' 
       WHEN last_reopen = GREATEST ((COALESCE(last_final_order, CAST('1900-01-01' AS DATE))), (COALESCE(last_no_final_order, CAST('1900-01-01' AS DATE))), 
                                      (COALESCE(last_close_date, CAST('1900-01-01' AS DATE))), (COALESCE(last_reopen, CAST('1900-01-01' AS DATE))))
             THEN 'case reopened'
     END AS last_date_type
        
FROM
  __temp__.ca_closed_date_events
""",

"ca_last_case_date")

#### Calculate case close date

In [None]:
#to check we are happy with how we deal with case date where case close date = case reopen date
#for the vast majority of cases the close date will be the date of the last final order. There are some exceptions (where the case is flgged as closed and the re-open/non final orders are recorded as
#   later than the last final order)
pydb.create_temp_table(
f"""
SELECT
  case_number,
  case_type,
  last_final_order,
  last_no_final_order,
  last_close_date,
  last_reopen,
  last_date_type,
  CASE WHEN last_close_date = last_reopen 
        AND last_date_type = 'case closed'
         THEN last_close_date
       WHEN last_reopen > last_final_order
        AND last_date_type = 'case closed'
         THEN last_close_date  
       WHEN last_date_type = 'case closed'
         THEN COALESCE (last_final_order, last_close_date)
       WHEN last_date_type = 'final order'
         THEN last_final_order
       WHEN last_date_type = 'non final order'
        AND last_close_date IS NOT NULL
        AND (last_reopen IS NULL 
                OR (last_reopen IS NOT NULL AND last_close_date > last_reopen))
          THEN last_close_date
       ELSE NULL   
      END 
    AS case_closed_date,
  CASE WHEN last_close_date = last_reopen 
        AND last_date_type = 'case closed'
         THEN last_close_date_court
       WHEN last_reopen > last_final_order
        AND last_date_type = 'case closed'
         THEN last_close_date_court  
       WHEN last_date_type = 'case closed'
         THEN COALESCE (last_final_order_court, last_close_date_court)
       WHEN last_date_type = 'final order'
         THEN last_final_order_court
       WHEN last_date_type = 'non final order'
        AND last_close_date IS NOT NULL
        AND (last_reopen IS NULL 
                OR (last_reopen IS NOT NULL AND last_close_date > last_reopen))
          THEN last_close_date_court
       ELSE NULL   
      END 
    AS case_closed_court
FROM
  __temp__.ca_last_case_date

""",
    
"ca_closed_date")
                  

### Cases closed

In [None]:
#drop table in Athena (if it already exists)
_ = pydb.start_query_execution_and_wait(f"""DROP TABLE {fcsq_db}.ca_closed_cases""")

In [None]:
#Year and quarter added, alongside court/area lookup

#set S3 file path
closed_cases_s3_path = os.path.join(fcsq_db_path, "closed_cases/")
# Delete all the underlying data stored within the S3 location
if wr.s3.list_objects(closed_cases_s3_path):
    print("deleting objs")
    wr.s3.delete_objects(closed_cases_s3_path)
    
#Create table in Athena

closed_cases =  f"""
CREATE TABLE {fcsq_db}.ca_closed_cases WITH
(
    external_location='{closed_cases_s3_path}'
) AS
SELECT
  c.case_number,
  EXTRACT(YEAR FROM c.case_closed_date) AS Year,
  EXTRACT(QUARTER FROM c.case_closed_date) AS Quarter,
  c.case_type,
  c.case_closed_date,
  c.case_closed_court,
  ct.dfj_new AS case_closed_dfj,
  CASE WHEN EXTRACT(YEAR FROM c.case_closed_date) < 2014 THEN ct.region_pre2014
        ELSE ct.region END 
    AS case_closed_region
FROM
  __temp__.ca_closed_date c
  LEFT JOIN {fcsq_db}.court_mv_feb21_dfj ct
    ON c.case_closed_court = ct.court
WHERE
  case_closed_date IS NOT NULL

"""
    
_ = pydb.start_query_execution_and_wait(closed_cases)

In [None]:
data = pydb.read_sql_query ("select * from __temp__.ca_closed_cases where year = 2015 and quarter = 1")

In [None]:
df = pd.DataFrame(data)

In [None]:
df.to_csv ('Exports/closed_data_check.csv', header = True)

In [None]:
df.to_csv (r's3://alpha-family-data/fcsq_processing/CA_disps/case_check/ca_disp_cases.csv', header = True)

### Removal of duplicate children

#### This code is not currently used as the old SAS process never removed duplicate orders made - but placing here as it's likely we will come to this in future

In [None]:
pydb.create_temp_table(
f"""
  SELECT
    *,
    ROW_NUMBER() OVER(PARTITION BY case_number, child_role_id, order_code, receipt_date, disp_type 
                       ORDER BY case_number, child_role_id, order_code, receipt_date, event) 
      AS dup_rank
  FROM
    __temp__.ca_disps_all_children

""",
    
"ca_disp_dup_flag")

In [None]:
pydb.read_sql_query ("select * from __temp__.ca_disp_dup_flag where case_number in ('AF13P00441') limit 10") 

In [None]:
pydb.read_sql_query ("select * from __temp__.ca_disp_dup_flag where dup_rank = 2 limit 10") 

In [None]:
#get an idea of how many might be duplicate order types
pydb.read_sql_query ("select count(*) count from __temp__.ca_disp_dup_flag where year > 2010 and disp_type_code = 1 and dup_rank > 1")

In [None]:
#drop table in Athena (if it already exists)
_ = pydb.start_query_execution_and_wait(f"""DROP TABLE {fcsq_db}.ca_disps""")

In [None]:
#set S3 file path
disp_s3_path = os.path.join(fcsq_db_path, "disps/")
# Delete all the underlying data stored within the S3 location
if wr.s3.list_objects(disp_s3_path):
    print("deleting objs")
    wr.s3.delete_objects(disp_s3_path)
    
t_disps = f"""
CREATE TABLE {fcsq_db}.ca_disps WITH
(
    external_location='{disp_s3_path}'
) AS   
SELECT
*
FROM
__temp__.ca_disps_children_FO
"""
    
_ = pydb.start_query_execution_and_wait(t_disps)    