# Childrens Act Disposals

In [1]:
# libraries
import pandas as pd
import pydbtools as pydb
import boto3
from datetime import datetime
#import re
import os  # for file paths
import awswrangler as wr

In [2]:
# Structured  dataframes
pd.set_option("display.max_columns", 100)
pd.set_option("display.width", 900)
pd.set_option("display.max_colwidth", 200)

In [3]:
# Assigning databases and S3 bucket
db1 = "familyman_dev_v2"
fcsq_db = "fcsq"
# create path for within athena FCSQ database in the S3 folder
fcsq_db_path = f"s3://alpha-family-data/fcsq_processing/CA_disps/"
s3 = boto3.resource("s3")
bucket = s3.Bucket("alpha-family-data")
snapshot_date = "2021-11-11"

### Orders made

In [4]:
#Here we strip out the children from each case to just count the orders made
pydb.create_temp_table( 
f"""
  SELECT 
    DISTINCT
    case_number,
    disp_date,
    Year,
    Quarter,
    case_type,
    receipt_date,
    entry_date,
    event,
    event_model,
    field_model,
    order_type,
    disp_type_code,
    disp_type,
    order_code,
    order_desc,
    creating_court
  FROM 
    {fcsq_db}.ca_disps a 
  WHERE 
    disp_type_code = 1
""",

"ca_disp_order_type")

In [6]:
pydb.read_sql_query ("select count (*) count from __temp__.ca_disp_order_type")

Unnamed: 0,count
0,2449606


In [8]:
pydb.read_sql_query ("select count (*) count from __temp__.ca_disps_order_name where disp_type_code = 1")

Unnamed: 0,count
0,2453468


In [9]:
pydb.read_sql_query ("select * from __temp__.ca_disps_order_name where case_number NOT IN (select case_number from __temp__.ca_disp_order_type) and disp_type_code = 1 limit 20")

Unnamed: 0,event,case_number,event_model,lookup_event_code,receipt_date,entry_date,disp_date,creating_court,field_model,all_event_ord_types,order_type,case_type,disp_type_code,disp_type,order_code,order_desc
0,26202784286,MH11C00127,C43A,C43A,2012-04-23,2012-04-23 11:14:14,2012-04-23,MA,,,,C,1,Order,27,Special Guardianship Order
1,31600137305,SF07F00345,C43,GEN,2007-07-10,2007-07-14 09:25:19,2007-07-10,SF,C43_5,RS,RS,P,1,Order,30,Section 8 Residence Order
2,35800677840,WV16F00566,C45A,C45A,2016-05-23,2016-05-23 12:54:39,2016-05-23,WJ,,,,P,1,Order,25,Parental Responsibility Order
3,20300341563,GL04F02122,C43,GEN,2004-12-23,2004-12-31 13:46:21,2004-12-23,GL,C43_5,CN,CN,P,1,Order,29,Section 8 Contact Order
4,24401001153,LE09F00381,C43,GEN,2009-12-07,2009-12-07 15:01:48,2009-12-07,LE,C43_5,RS,RS,P,1,Order,30,Section 8 Residence Order
5,46900069076,IL11F00001,C43,GEN,2011-08-04,2011-08-05 10:25:49,2011-08-04,IL,C43_5,CN,CN,P,1,Order,29,Section 8 Contact Order
6,17600279165,CO07F00394,C43,GEN,2007-04-04,2007-04-04 16:57:01,2007-04-04,CO,C43_5,"CN, RS",CN,P,1,Order,29,Section 8 Contact Order
7,17600279165,CO07F00394,C43,GEN,2007-04-04,2007-04-04 16:57:01,2007-04-04,CO,C43_5,"CN, RS",RS,P,1,Order,30,Section 8 Residence Order
8,14000342484,BO05F01422,C43,GEN,2005-12-09,2005-12-09 14:41:11,2005-12-09,BO,C43_5,PS,PS,P,1,Order,31,Section 8 Prohibited Steps Order
9,38400317186,WX09F00455,C43,GEN,2010-10-18,2010-11-03 16:05:01,2010-10-18,WX,C43_5,CN,CN,P,1,Order,29,Section 8 Contact Order


In [33]:
pydb.read_sql_query ("select * from __temp__.ca_disps_all_children where case_number IN ('SE19C00138')")

Unnamed: 0,case_number,disp_date,year,quarter,case_type,receipt_date,entry_date,event,event_model,field_model,order_type,disp_type_code,disp_type,order_code,order_desc,child_role_id,dob,gender,creating_court,delete_flag
0,SE19C00138,2019-01-29,2019,1,C,2019-01-29,2019-02-07 16:47:51,32002693232,C33,,,2,Interim Order,40,Interim Care Order,9734268,2012-01-27,2,SE,Y
1,SE19C00138,2020-02-20,2020,1,C,2020-02-20,2020-02-25 15:15:04,32002879968,C32A,,,1,Order,1,Care Order/Substitute Supervision Order,9734268,2012-01-27,2,SE,Y
2,SE19C00138,2020-02-20,2020,1,C,2020-02-20,2020-02-25 15:15:04,32002879968,C32A,,,1,Order,1,Care Order/Substitute Supervision Order,9734238,2004-11-11,1,SE,Y
3,SE19C00138,2019-01-29,2019,1,C,2019-01-29,2019-02-07 16:47:51,32002693232,C33,,,2,Interim Order,40,Interim Care Order,9734269,2007-05-14,2,SE,Y
4,SE19C00138,2020-02-20,2020,1,C,2020-02-20,2020-02-25 15:15:04,32002879968,C32A,,,1,Order,1,Care Order/Substitute Supervision Order,9734269,2007-05-14,2,SE,Y
5,SE19C00138,2019-02-08,2019,1,C,2019-02-08,2019-02-18 15:08:18,32002698699,C33,,,2,Interim Order,40,Interim Care Order,9734238,2004-11-11,1,SE,Y
6,SE19C00138,2019-03-25,2019,1,C,2019-03-25,2019-04-06 08:41:22,32002720855,C33,,,2,Interim Order,40,Interim Care Order,9734268,2012-01-27,2,SE,Y
7,SE19C00138,2019-01-29,2019,1,C,2019-01-29,2019-01-29 12:33:23,32002688617,C33,,,2,Interim Order,40,Interim Care Order,9734238,2004-11-11,1,SE,Y
8,SE19C00138,2019-02-08,2019,1,C,2019-02-08,2019-02-18 15:08:18,32002698699,C33,,,2,Interim Order,40,Interim Care Order,9734269,2007-05-14,2,SE,Y
9,SE19C00138,2019-02-08,2019,1,C,2019-02-08,2019-02-18 15:08:18,32002698699,C33,,,2,Interim Order,40,Interim Care Order,9734267,2017-05-03,2,SE,N


### Disposal Events

In [5]:
#Counting just disposal events - where multiple orders within a case are made within the same event they are just counted once
pydb.create_temp_table( 
f"""
  SELECT 
    DISTINCT
    case_number,
    CAST(disp_date AS DATE) AS disp_date,
    Year,
    Quarter,
    case_type,
    receipt_date,
    entry_date,
    event,
    event_model,
    field_model,
    order_type,
    disp_type_code,
    disp_type,
    order_code,
    order_desc,
    creating_court,
    FO_value
  FROM 
    {fcsq_db}.ca_disps a 
  WHERE 
    order_code < 40
""",

"ca_disp_events")

### Create flag for earliest/latest final order, non final order date - this can be moved to earier (or final order extraction can be moved to here)

In [6]:
pydb.create_temp_table(
f"""
SELECT
    *,
    ROW_NUMBER() OVER(PARTITION BY case_number, fo_value
                       ORDER BY case_number, disp_date ASC) 
      AS min_date,
    ROW_NUMBER() OVER(PARTITION BY case_number, fo_value
                       ORDER BY case_number, disp_date DESC) 
      AS max_date  
FROM
    __temp__.ca_disp_events
""",

"ca_FO_dates")
 
    

#### Final order - alternate extraction at case level rather than event level

In [31]:
#Extract final order information for disposals. The value field indicates whether the event was a final order within the case
#Columns added to create a flag for the earliest and latest final order within each case
pydb.create_temp_table( 
f"""
  SELECT 
    e.event,
    e.case_number,
    COALESCE(e.receipt_date, e.entry_date) AS disp_date,
    f.field_model,
    f.value as FO_value,
    ROW_NUMBER() OVER(PARTITION BY e.case_number, f.value
                       ORDER BY e.case_number, COALESCE(e.receipt_date, e.entry_date) ASC) 
      AS min_date_rank,
    ROW_NUMBER() OVER(PARTITION BY e.case_number, f.value
                       ORDER BY e.case_number, COALESCE(e.receipt_date, e.entry_date) DESC) 
      AS max_date_rank  
  FROM 
    {db1}.event_fields f
    JOIN {db1}.events e
      ON f.event = e.event
  WHERE 
      field_model IN ('C21_FO','C23_FO','C24_FO','C25_FO','C26_FO','C27_FO','C28_FO','C29_FO','C30_FO',
                      'C31_FO','C32A_FO','C32B_FO','C33_FO','C34A_FO','C34B_FO','C35A_FO','C35B_FO','C36_FO',
                      'C37_FO','C38A_FO','C38B_FO','C39_FO','C40_FO','C42_FO','C43_FO','C43A_FO','C44A_FO',
                      'C44B_FO','C45A_FO','C45B_FO','C53_FO','C80_FO','C81_FO','C82_FO','CPA_FO',
                      'ORDNOM_FO','ORDREF_FO','G63_FO')
              
    AND f.mojap_snapshot_date = DATE'{snapshot_date}'
    AND e.mojap_snapshot_date = DATE'{snapshot_date}'
    AND e.error = 'N'
""",

"ca_final_orders_2")

In [32]:
pydb.read_sql_query ("select * from __temp__.ca_final_orders_2 limit 20")

Unnamed: 0,event,case_number,disp_date,field_model,fo_value,min_date,max_date
0,14400733465,BF11P01480,2012-08-10,C21_FO,N,3,7
1,14400707311,BF11P01480,2012-02-21,C21_FO,N,2,8
2,14400696674,BF11P01480,2011-12-09,C21_FO,N,1,9
3,14400747519,BF12P01048,2012-11-12,C43_FO,Y,1,1
4,14400902480,BF12P01165,2016-08-01,C21_FO,Y,3,1
5,14400854206,BF12P01165,2015-01-29,C43_FO,Y,2,2
6,14400821157,BF12P01165,2014-04-04,C43_FO,Y,1,3
7,14400778236,BF13P00099,2013-03-14,C43_FO,Y,1,1
8,54600072820,BF13P00626,2013-10-07,C53_FO,Y,1,1
9,54600080702,BF13P00626,2013-10-07,C53_FO,Y,2,2


### Extract case closed events and create flag for earliest/latest event

In [7]:
#Extracting the case closed event, and as with final orders create a flag for the earliest/latest date recorded for each case
pydb.create_temp_table(
f"""
SELECT
    case_number,
    receipt_date,
    CAST (entry_date AS DATE) AS entry_date,
    ROW_NUMBER() OVER(PARTITION BY case_number
                       ORDER BY case_number, entry_date ASC) 
      AS min_date_rank,
    ROW_NUMBER() OVER(PARTITION BY case_number
                       ORDER BY case_number, entry_date DESC) 
      AS max_date_rank  
FROM
    {db1}.events
WHERE
  event_model = 'U24'
  AND error = 'N'
  AND mojap_snapshot_date = date '{snapshot_date}'
""",

"ca_case_closed_events")
 

### Extract case reopened events and create flag for earliest/latest event

In [8]:
#Extract information on whether a case has been re-opened, and if so columns added so we can easily identify the earliest/latest re-open date for each case
pydb.create_temp_table(
f"""
SELECT
    case_number,
    receipt_date,
    CAST (entry_date AS DATE) AS entry_date,
    ROW_NUMBER() OVER(PARTITION BY case_number
                       ORDER BY case_number, entry_date ASC) 
      AS min_date_rank,
    ROW_NUMBER() OVER(PARTITION BY case_number
                       ORDER BY case_number, entry_date DESC) 
      AS max_date_rank  
FROM
    {db1}.events
WHERE
  event_model = 'G62'
  AND error = 'N'
  AND mojap_snapshot_date = date '{snapshot_date}'
""",

"ca_case_reopened_events")
 

### Add first/last date types to for each case

In [22]:
pydb.create_temp_table(
f"""
SELECT
    DISTINCT
    t1.case_number,
    t1.case_type,
    (SELECT f.disp_date FROM __temp__.ca_FO_dates f
                         WHERE f.min_date_rank = 1
                         AND f.fo_value = 'Y'
                         AND t1.case_number = f.case_number)
        AS first_final_order,
    (SELECT f.disp_date FROM __temp__.ca_FO_dates f
                         WHERE f.max_date_rank = 1 
                         AND f.fo_value = 'Y'
                         AND t1.case_number = f.case_number)
        AS last_final_order,
    (SELECT f.disp_date FROM __temp__.ca_FO_dates f
                         WHERE f.min_date_rank = 1
                         AND f.fo_value = 'N'
                         AND t1.case_number = f.case_number)
        AS first_no_final_order,
    (SELECT f.disp_date FROM __temp__.ca_FO_dates f
                         WHERE f.max_date_rank = 1 
                         AND f.fo_value = 'N'
                         AND t1.case_number = f.case_number)
        AS last_no_final_order,    
    (SELECT c.entry_date FROM __temp__.ca_case_closed_events c
                         WHERE c.min_date_rank = 1 
                         AND t1.case_number = c.case_number)
        AS first_close_date,
    (SELECT c.entry_date FROM __temp__.ca_case_closed_events c
                         WHERE c.max_date_rank = 1 
                         AND t1.case_number = c.case_number) 
        AS last_close_date,                 
    (SELECT r.entry_date FROM __temp__.ca_case_reopened_events r
                         WHERE r.min_date_rank = 1 
                         AND t1.case_number = r.case_number)
        AS first_reopen,
    (SELECT r.entry_date FROM __temp__.ca_case_reopened_events r
                         WHERE r.max_date_rank = 1 
                         AND t1.case_number = r.case_number)
        AS last_reopen
FROM
  __temp__.ca_disp_events t1
""",
    
"ca_date_flags")

### add flag for which date to use dependent on the last date within the case

In [23]:
pydb.create_temp_table(
f"""
SELECT
  case_number,
  case_type,
  last_final_order,
  last_no_final_order,
  last_close_date,
  last_reopen,
CASE WHEN last_close_date = GREATEST ((COALESCE(last_final_order, CAST('1900-01-01' AS DATE))), (COALESCE(last_no_final_order, CAST('1900-01-01' AS DATE))), 
                                      (COALESCE(last_close_date, CAST('1900-01-01' AS DATE))), (COALESCE(last_reopen, CAST('1900-01-01' AS DATE))))
             THEN 'case closed'
     WHEN last_final_order = GREATEST ((COALESCE(last_final_order, CAST('1900-01-01' AS DATE))), (COALESCE(last_no_final_order, CAST('1900-01-01' AS DATE))), 
                                      (COALESCE(last_close_date, CAST('1900-01-01' AS DATE))), (COALESCE(last_reopen, CAST('1900-01-01' AS DATE))))
             THEN 'final order'     
     WHEN last_no_final_order = GREATEST ((COALESCE(last_final_order, CAST('1900-01-01' AS DATE))), (COALESCE(last_no_final_order, CAST('1900-01-01' AS DATE))), 
                                      (COALESCE(last_close_date, CAST('1900-01-01' AS DATE))), (COALESCE(last_reopen, CAST('1900-01-01' AS DATE))))
             THEN 'non final order' 
     WHEN last_reopen = GREATEST ((COALESCE(last_final_order, CAST('1900-01-01' AS DATE))), (COALESCE(last_no_final_order, CAST('1900-01-01' AS DATE))), 
                                      (COALESCE(last_close_date, CAST('1900-01-01' AS DATE))), (COALESCE(last_reopen, CAST('1900-01-01' AS DATE))))
             THEN 'case reopened'
     END AS last_date_type
        
FROM
  __temp__.ca_date_flags
""",

"ca_last_case_date")

### Calculate case close date

In [44]:
#to check we are happy with how we deal with case date where case close date = case reopen date
#for the vast majority of cases the close date will be the date of the last final order. There are some exceptions (where the case is flgged as closed and the re-open/non final orders are recorded as
#   later than the last final order)
pydb.create_temp_table(
f"""
SELECT
  case_number,
  case_type,
  last_final_order,
  last_no_final_order,
  last_close_date,
  last_reopen,
  last_date_type,
  CASE WHEN last_close_date = last_reopen 
        AND last_date_type = 'case closed'
         THEN last_close_date
       WHEN last_reopen > last_final_order
        AND last_date_type = 'case closed'
         THEN last_close_date  
       WHEN last_date_type = 'case closed'
         THEN COALESCE (last_final_order, last_close_date)
       WHEN last_date_type = 'final order'
         THEN last_final_order
       WHEN last_date_type = 'non final order'
        AND last_close_date IS NOT NULL
        AND (last_reopen IS NULL 
                OR (last_reopen IS NOT NULL AND last_close_date > last_reopen))
          THEN last_close_date
       ELSE NULL   
      END 
    AS case_closed_date
FROM
  __temp__.ca_last_case_date

""",
    
"ca_closed_date")
                  

### Add year/quarter

In [45]:
pydb.create_temp_table(
f"""
SELECT
  case_number,
  EXTRACT(YEAR FROM case_closed_date) AS Year,
  EXTRACT(QUARTER FROM case_closed_date) AS Quarter,
  case_type,
  case_closed_date
FROM
  __temp__.ca_closed_date
WHERE
  case_closed_date IS NOT NULL

""",
    
"ca_closed_cases")
  

In [39]:
#drop table in Athena (if it already exists)
_ = pydb.start_query_execution_and_wait(f"""DROP TABLE {fcsq_db}.ca_cases""")

QueryFailed: FAILED: SemanticException [Error 10001]: Table not found fcsq.ca_cases

In [40]:
#set S3 file path
cases_s3_path = os.path.join(fcsq_db_path, "cases/")
# Delete all the underlying data stored within the S3 location
if wr.s3.list_objects(cases_s3_path):
    print("deleting objs")
    wr.s3.delete_objects(cases_s3_path)
    
t_cases = f"""
CREATE TABLE {fcsq_db}.ca_cases WITH
(
    external_location='{cases_s3_path}'
) AS   
SELECT
*
FROM
__temp__.ca_closed_cases
"""
    
_ = pydb.start_query_execution_and_wait(t_cases) 

deleting objs
