# Childrens Act Disposals

In [None]:
# libraries
import pandas as pd
import pydbtools as pydb
import boto3
from datetime import datetime
#import re
import os  # for file paths
import awswrangler as wr

In [None]:
# Structured  dataframes
pd.set_option("display.max_columns", 100)
pd.set_option("display.width", 900)
pd.set_option("display.max_colwidth", 200)

In [None]:
# Assigning databases and S3 bucket
db1 = "familyman_dev_v2"
fcsq_db = "fcsq"
# create path for within athena FCSQ database in the S3 folder
fcsq_db_path = f"s3://alpha-family-data/fcsq_processing/CA_disps/"
s3 = boto3.resource("s3")
bucket = s3.Bucket("alpha-family-data")
snapshot_date = "2022-02-21"

### Orders made

In [None]:
#Here we strip out the children from each case to just count the orders made
pydb.create_temp_table( 
f"""
  SELECT 
    DISTINCT
    case_number,
    disp_date,
    Year,
    Quarter,
    case_type,
    receipt_date,
    entry_date,
    event,
    event_model,
    field_model,
    /*order_type, excluded as the same order can have multiple order types eg CAST & CN*/
    disp_type_code,
    disp_type,
    order_code,
    order_desc,
    creating_court
  FROM 
    __temp__.ca_disps_all_children

""",

"ca_disp_order_type")

### Disposal Events (note - this replicates the current SAS code - however we may change this as it is questionable how useful FM events are).

In [None]:
#Counting just disposal events - where multiple orders within a case are made within the same event they are just counted once
#The master_case_type ensures that in the small number of cases where there are both private and public order types within a case,  only one case type is selected and the case is only counted once in the final case count (and not in both public and private)
#Currently cases where the case number is a 'P' type and it has both public and private law disposals it will be counted within private law cases disposed
#Domestic violence and adoption case types are also filtered out of the master_case_type field so they can be excluded from final CA case counts
pydb.create_temp_table( 
f"""
  SELECT 
    DISTINCT
    case_number,
    CAST(disp_date AS DATE) AS disp_date,
    Year,
    Quarter,
    case_type,
    CASE WHEN SUBSTR(case_number,5,1) NOT IN ('P','C')
           THEN 'NA'
         WHEN case_type = 'C' 
              AND case_number in (SELECT case_number
                                  FROM __temp__.ca_disp_order_type
                                  WHERE case_type = 'P')
           THEN 'P'
        ELSE case_type END
      AS master_case_type,
    receipt_date,
    entry_date,
    event,
    event_model,
    disp_type_code,
    disp_type
  FROM 
    __temp__.ca_disp_order_type
  WHERE 
    disp_type_code <> 2
""",

"ca_disp_events")

### Create flag for earliest/latest final order, non final order date

In [None]:
#Extract final order information for disposals. The value field indicates whether the event was a final order within the case
#Columns added to create a flag for the earliest and latest final order within each case
pydb.create_temp_table( 
f"""
  SELECT 
    e.event,
    e.case_number,
    COALESCE(e.receipt_date, e.entry_date) AS disp_date,
    f.field_model,
    f.value as FO_value,
    ROW_NUMBER() OVER(PARTITION BY e.case_number, f.value
                       ORDER BY e.case_number, COALESCE(e.receipt_date, e.entry_date) ASC) 
      AS min_date_rank,
    ROW_NUMBER() OVER(PARTITION BY e.case_number, f.value
                       ORDER BY e.case_number, COALESCE(e.receipt_date, e.entry_date) DESC) 
      AS max_date_rank,
    e.creating_court
  FROM 
    {db1}.event_fields f
    JOIN {db1}.events e
      ON f.event = e.event
  WHERE 
      field_model IN ('C21_FO','C23_FO','C24_FO','C25_FO','C26_FO','C27_FO','C28_FO','C29_FO','C30_FO',
                      'C31_FO','C32A_FO','C32B_FO','C33_FO','C34A_FO','C34B_FO','C35A_FO','C35B_FO','C36_FO',
                      'C37_FO','C38A_FO','C38B_FO','C39_FO','C40_FO','C42_FO','C43_FO','C43A_FO','C44A_FO',
                      'C44B_FO','C45A_FO','C45B_FO','C53_FO','C80_FO','C81_FO','C82_FO','CPA_FO',
                      'ORDNOM_FO','ORDREF_FO','G63_FO')
              
    AND f.mojap_snapshot_date = DATE'{snapshot_date}'
    AND e.mojap_snapshot_date = DATE'{snapshot_date}'
    AND e.error = 'N'
""",

"ca_final_orders")

### Extract case closed event dates and create flag for earliest/latest event

In [None]:
#Extracting the case closed event, and as with final orders create a flag for the earliest/latest date recorded for each case
pydb.create_temp_table(
f"""
SELECT
    case_number,
    receipt_date,
    CAST (entry_date AS DATE) AS entry_date,
    ROW_NUMBER() OVER(PARTITION BY case_number
                       ORDER BY case_number, entry_date ASC) 
      AS min_date_rank,
    ROW_NUMBER() OVER(PARTITION BY case_number
                       ORDER BY case_number, entry_date DESC) 
      AS max_date_rank,
    creating_court
FROM
    {db1}.events
WHERE
  event_model = 'U24'
  AND error = 'N'
  AND mojap_snapshot_date = date '{snapshot_date}'
""",

"ca_case_closed_events")
 

### Extract case reopened event dates and create flag for earliest/latest event

In [None]:
#Extract information on whether a case has been re-opened, and if so columns added so we can easily identify the earliest/latest re-open date for each case
pydb.create_temp_table(
f"""
SELECT
    case_number,
    receipt_date,
    CAST (entry_date AS DATE) AS entry_date,
    ROW_NUMBER() OVER(PARTITION BY case_number
                       ORDER BY case_number, entry_date ASC) 
      AS min_date_rank,
    ROW_NUMBER() OVER(PARTITION BY case_number
                       ORDER BY case_number, entry_date DESC) 
      AS max_date_rank  
FROM
    {db1}.events
WHERE
  event_model = 'G62'
  AND error = 'N'
  AND mojap_snapshot_date = date '{snapshot_date}'
""",

"ca_case_reopened_events")
 

### Add first/last date types to for each case

In [None]:
pydb.create_temp_table(
f"""
SELECT
    DISTINCT
    t1.case_number,
    t1.master_case_type,
    (SELECT f.disp_date FROM __temp__.ca_final_orders f
                         WHERE f.min_date_rank = 1
                         AND f.fo_value = 'Y'
                         AND t1.case_number = f.case_number)
        AS first_final_order,
    (SELECT f.disp_date FROM __temp__.ca_final_orders f
                         WHERE f.max_date_rank = 1 
                         AND f.fo_value = 'Y'
                         AND t1.case_number = f.case_number)
        AS last_final_order,
    (SELECT f.creating_court FROM __temp__.ca_final_orders f
                              WHERE f.max_date_rank = 1 
                              AND f.fo_value = 'Y'
                              AND t1.case_number = f.case_number)
        AS last_final_order_court,  
    (SELECT f.disp_date FROM __temp__.ca_final_orders f
                         WHERE f.min_date_rank = 1
                         AND f.fo_value = 'N'
                         AND t1.case_number = f.case_number)
        AS first_no_final_order,
    (SELECT f.disp_date FROM __temp__.ca_final_orders f
                         WHERE f.max_date_rank = 1 
                         AND f.fo_value = 'N'
                         AND t1.case_number = f.case_number)
        AS last_no_final_order,    
    (SELECT c.entry_date FROM __temp__.ca_case_closed_events c
                         WHERE c.min_date_rank = 1 
                         AND t1.case_number = c.case_number)
        AS first_close_date,
    (SELECT c.entry_date FROM __temp__.ca_case_closed_events c
                         WHERE c.max_date_rank = 1 
                         AND t1.case_number = c.case_number) 
        AS last_close_date,  
    (SELECT c.creating_court FROM __temp__.ca_case_closed_events c
                              WHERE c.max_date_rank = 1 
                              AND t1.case_number = c.case_number) 
        AS last_close_date_court,      
    (SELECT r.entry_date FROM __temp__.ca_case_reopened_events r
                         WHERE r.min_date_rank = 1 
                         AND t1.case_number = r.case_number)
        AS first_reopen,
    (SELECT r.entry_date FROM __temp__.ca_case_reopened_events r
                         WHERE r.max_date_rank = 1 
                         AND t1.case_number = r.case_number)
        AS last_reopen
FROM
  __temp__.ca_disp_events t1
  
""",
    
"ca_date_flags")

### add flag for which date to use dependent on the last date within the case

In [None]:
pydb.create_temp_table(
f"""
SELECT
  case_number,
  master_case_type,
  last_final_order,
  last_final_order_court,
  last_no_final_order,
  last_close_date,
  last_close_date_court,
  last_reopen,
  CASE WHEN last_close_date = GREATEST ((COALESCE(last_final_order, CAST('1900-01-01' AS DATE))), (COALESCE(last_no_final_order, CAST('1900-01-01' AS DATE))), 
                                      (COALESCE(last_close_date, CAST('1900-01-01' AS DATE))), (COALESCE(last_reopen, CAST('1900-01-01' AS DATE))))
             THEN 'case closed'
       WHEN last_final_order = GREATEST ((COALESCE(last_final_order, CAST('1900-01-01' AS DATE))), (COALESCE(last_no_final_order, CAST('1900-01-01' AS DATE))), 
                                      (COALESCE(last_close_date, CAST('1900-01-01' AS DATE))), (COALESCE(last_reopen, CAST('1900-01-01' AS DATE))))
             THEN 'final order'     
       WHEN last_no_final_order = GREATEST ((COALESCE(last_final_order, CAST('1900-01-01' AS DATE))), (COALESCE(last_no_final_order, CAST('1900-01-01' AS DATE))), 
                                      (COALESCE(last_close_date, CAST('1900-01-01' AS DATE))), (COALESCE(last_reopen, CAST('1900-01-01' AS DATE))))
             THEN 'non final order' 
       WHEN last_reopen = GREATEST ((COALESCE(last_final_order, CAST('1900-01-01' AS DATE))), (COALESCE(last_no_final_order, CAST('1900-01-01' AS DATE))), 
                                      (COALESCE(last_close_date, CAST('1900-01-01' AS DATE))), (COALESCE(last_reopen, CAST('1900-01-01' AS DATE))))
             THEN 'case reopened'
     END AS last_date_type
        
FROM
  __temp__.ca_date_flags
""",

"ca_last_case_date")

### Calculate case close date

In [None]:
#to check we are happy with how we deal with case date where case close date = case reopen date
#for the vast majority of cases the close date will be the date of the last final order. There are some exceptions (where the case is flgged as closed and the re-open/non final orders are recorded as
#   later than the last final order)
pydb.create_temp_table(
f"""
SELECT
  case_number,
  master_case_type,
  last_final_order,
  last_no_final_order,
  last_close_date,
  last_reopen,
  last_date_type,
  CASE WHEN last_close_date = last_reopen 
        AND last_date_type = 'case closed'
         THEN last_close_date
       WHEN last_reopen > last_final_order
        AND last_date_type = 'case closed'
         THEN last_close_date  
       WHEN last_date_type = 'case closed'
         THEN COALESCE (last_final_order, last_close_date)
       WHEN last_date_type = 'final order'
         THEN last_final_order
       WHEN last_date_type = 'non final order'
        AND last_close_date IS NOT NULL
        AND (last_reopen IS NULL 
                OR (last_reopen IS NOT NULL AND last_close_date > last_reopen))
          THEN last_close_date
       ELSE NULL   
      END 
    AS case_closed_date,
  CASE WHEN last_close_date = last_reopen 
        AND last_date_type = 'case closed'
         THEN last_close_date_court
       WHEN last_reopen > last_final_order
        AND last_date_type = 'case closed'
         THEN last_close_date_court  
       WHEN last_date_type = 'case closed'
         THEN COALESCE (last_final_order_court, last_close_date_court)
       WHEN last_date_type = 'final order'
         THEN last_final_order_court
       WHEN last_date_type = 'non final order'
        AND last_close_date IS NOT NULL
        AND (last_reopen IS NULL 
                OR (last_reopen IS NOT NULL AND last_close_date > last_reopen))
          THEN last_close_date_court
       ELSE NULL   
      END 
    AS case_closed_court
FROM
  __temp__.ca_last_case_date

""",
    
"ca_closed_date")
                  

### Add year/quarter

In [None]:
pydb.create_temp_table(
f"""
SELECT
  case_number,
  EXTRACT(YEAR FROM case_closed_date) AS Year,
  EXTRACT(QUARTER FROM case_closed_date) AS Quarter,
  master_case_type,
  case_closed_date,
  case_closed_court
FROM
  __temp__.ca_closed_date
WHERE
  case_closed_date IS NOT NULL

""",
    
"ca_closed_cases")
  

In [None]:
data = pydb.read_sql_query ("select * from __temp__.ca_closed_cases where year = 2011 and quarter = 1 and case_type = 'P' ")

In [None]:
df = pd.DataFrame(data)

In [None]:
df.to_csv (r's3://alpha-family-data/fcsq_processing/CA_disps/case_check/ca_disp_cases.csv', header = True)

In [None]:
#drop table in Athena (if it already exists)
_ = pydb.start_query_execution_and_wait(f"""DROP TABLE {fcsq_db}.ca_cases""")

In [None]:
#set S3 file path
cases_s3_path = os.path.join(fcsq_db_path, "cases/")
# Delete all the underlying data stored within the S3 location
if wr.s3.list_objects(cases_s3_path):
    print("deleting objs")
    wr.s3.delete_objects(cases_s3_path)
    
t_cases = f"""
CREATE TABLE {fcsq_db}.ca_cases WITH
(
    external_location='{cases_s3_path}'
) AS   
SELECT
*
FROM
__temp__.ca_closed_cases
"""
    
_ = pydb.start_query_execution_and_wait(t_cases) 