# Childrens Act Disposals

In [118]:
# libraries
import pandas as pd
import pydbtools as pydb
import boto3
from datetime import datetime
#import re
import os  # for file paths
import awswrangler as wr

In [119]:
# Structured  dataframes
pd.set_option("display.max_columns", 100)
pd.set_option("display.width", 900)
pd.set_option("display.max_colwidth", 200)

In [120]:
# Assigning databases and S3 bucket
db1 = "familyman_dev_v2"
fcsq_db = "fcsq"
# create path for within athena FCSQ database in the S3 folder
fcsq_db_path = f"s3://alpha-family-data/fcsq_processing/CA_disps/"
s3 = boto3.resource("s3")
bucket = s3.Bucket("alpha-family-data")
snapshot_date = "2021-11-11"

### Orders made

In [121]:
pydb.create_temp_table( 
f"""
  SELECT 
    DISTINCT
    case_number,
    disp_date,
    Year,
    Quarter,
    case_type,
    receipt_date,
    entry_date,
    event,
    event_model,
    field_model,
    order_type,
    disp_type_code,
    disp_type,
    order_code,
    order_desc,
    creating_court
  FROM 
    {fcsq_db}.ca_disps a 
  WHERE 
    disp_type_code = 1
""",

"ca_disp_order_type")

In [13]:
pydb.read_sql_query ("select count (*) count from __temp__.ca_order_type where year = 2020 and case_type = 'P' and order_code = 29 ")

Unnamed: 0,count
0,46368


### Disposal Events

In [160]:
pydb.create_temp_table( 
f"""
  SELECT 
    DISTINCT
    case_number,
    CAST(disp_date AS DATE) AS disp_date,
    Year,
    Quarter,
    case_type,
    receipt_date,
    entry_date,
    event,
    event_model,
    field_model,
    order_type,
    disp_type_code,
    disp_type,
    order_code,
    order_desc,
    creating_court,
    FO_value
  FROM 
    {fcsq_db}.ca_disps a 
  WHERE 
    order_code < 40
""",

"ca_disp_events")

In [161]:
pydb.read_sql_query ("select * from __temp__.ca_disp_events limit 10")

Unnamed: 0,case_number,disp_date,year,quarter,case_type,receipt_date,entry_date,event,event_model,field_model,order_type,disp_type_code,disp_type,order_code,order_desc,creating_court,fo_value
0,SE04C00299,2004-11-24,2004,4,C,2004-11-24,2004-11-25 15:47:20,32000496830,C45A,,,1,Order,25,Parental Responsibility Order,SE,N
1,LX11C00218,2011-05-16,2011,2,C,2011-05-16,2011-06-08 10:35:36,47700034538,C37,,,1,Order,10,Education Supervision Order,LV,Y
2,MA15C00415,2015-08-18,2015,3,C,2015-08-18,2015-10-05 12:04:41,26203867196,C44A,,,1,Order,39,Leave to change surname,MA,N
3,BM19P09452,2020-12-02,2020,4,P,2020-12-02,2020-12-14 14:55:30,12706909547,C30,,,1,Order,20,Missing Child Info,BM,N
4,SD18C00684,2020-01-29,2020,1,C,2020-01-29,2020-02-03 13:48:43,55400851524,C32A,,,1,Order,1,Care Order/Substitute Supervision Order,HS,Y
5,LA12C00020,2012-12-05,2012,4,C,2012-12-05,2012-12-06 16:41:49,24200436957,C43,C43_5,RS,1,Order,30,Section 8 Residence Order,LA,N
6,LV18C03260,2019-07-26,2019,3,C,2019-07-26,2019-08-10 10:28:13,25104546791,C43A,,,1,Order,27,Special Guardianship Order,LV,N
7,SQ08P10288,2009-07-15,2009,3,P,2009-07-15,2009-07-28 09:41:33,33800682308,C43,C43_5,CN,1,Order,29,Section 8 Contact Order,SQ,Y
8,BM07C07151,2009-01-27,2009,1,C,2009-01-27,2009-01-27 14:50:33,12702929699,C45A,,,1,Order,25,Parental Responsibility Order,BM,N
9,LV00P05591,2003-06-18,2003,2,P,2003-06-18,2003-06-24 11:04:54,25100352843,C43,C43_5,CN,1,Order,29,Section 8 Contact Order,LV,Y


### Create flag for earliest/latest final order, non final order date - this can be moved to earier (or final order extraction can be moved to here)

In [162]:
pydb.create_temp_table(
f"""
Select
    *,
    ROW_NUMBER() OVER(PARTITION BY case_number, fo_value
                       ORDER BY case_number, disp_date ASC) 
      AS min_date,
    ROW_NUMBER() OVER(PARTITION BY case_number, fo_value
                       ORDER BY case_number, disp_date DESC) 
      AS max_date  
FROM
    __temp__.ca_disp_events
""",

"ca_FO_dates")
 
    

### Extract case closed events and create flag for earliest/latest event

In [163]:
pydb.create_temp_table(
f"""
Select
    case_number,
    receipt_date,
    CAST (entry_date AS DATE) AS entry_date,
    ROW_NUMBER() OVER(PARTITION BY case_number
                       ORDER BY case_number, entry_date ASC) 
      AS min_date,
    ROW_NUMBER() OVER(PARTITION BY case_number
                       ORDER BY case_number, entry_date DESC) 
      AS max_date  
FROM
    {db1}.events
WHERE
  event_model = 'U24'
  AND error = 'N'
  AND mojap_snapshot_date = date '{snapshot_date}'
""",

"ca_case_closed_events")
 

### Extract case reopened events and create flag for earliest/latest event

In [164]:
pydb.create_temp_table(
f"""
Select
    case_number,
    receipt_date,
    CAST (entry_date AS DATE) AS entry_date,
    ROW_NUMBER() OVER(PARTITION BY case_number
                       ORDER BY case_number, entry_date ASC) 
      AS min_date,
    ROW_NUMBER() OVER(PARTITION BY case_number
                       ORDER BY case_number, entry_date DESC) 
      AS max_date  
FROM
    {db1}.events
WHERE
  event_model = 'G62'
  AND error = 'N'
  AND mojap_snapshot_date = date '{snapshot_date}'
""",

"ca_case_reopened_events")
 

### Add first/last date types to for each case

In [170]:
pydb.create_temp_table(
f"""
Select
    t1.case_number,
    (SELECT f.disp_date FROM __temp__.ca_FO_dates f
                         WHERE f.min_date = 1
                         AND f.fo_value = 'Y'
                         AND t1.case_number = f.case_number)
        AS first_final_order,
    (SELECT f.disp_date FROM __temp__.ca_FO_dates f
                         WHERE f.max_date = 1 
                         AND f.fo_value = 'Y'
                         AND t1.case_number = f.case_number)
        AS last_final_order,
    (SELECT f.disp_date FROM __temp__.ca_FO_dates f
                         WHERE f.min_date = 1
                         AND f.fo_value = 'N'
                         AND t1.case_number = f.case_number)
        AS first_no_final_order,
    (SELECT f.disp_date FROM __temp__.ca_FO_dates f
                         WHERE f.max_date = 1 
                         AND f.fo_value = 'N'
                         AND t1.case_number = f.case_number)
        AS last_no_final_order,    
    (SELECT c.entry_date FROM __temp__.ca_case_closed_events c
                         WHERE c.min_date = 1 
                         AND t1.case_number = c.case_number)
        AS first_close_date,
    (SELECT c.entry_date FROM __temp__.ca_case_closed_events c
                         WHERE c.max_date = 1 
                         AND t1.case_number = c.case_number) 
        AS last_close_date,                 
    (SELECT r.entry_date FROM __temp__.ca_case_reopened_events r
                         WHERE r.min_date = 1 
                         AND t1.case_number = r.case_number)
        AS first_reopen,
    (SELECT r.entry_date FROM __temp__.ca_case_reopened_events r
                         WHERE r.max_date = 1 
                         AND t1.case_number = r.case_number)
        AS last_reopen
FROM
  __temp__.ca_disp_events t1
""",
    
"ca_date_flags")

### add flag for which date to use dependent on the last date within the case

In [171]:
pydb.create_temp_table( 
f"""
select 
  case_number,
  last_final_order,
  last_no_final_order,
  last_close_date,
  last_reopen,
  CASE WHEN last_final_order = GREATEST (COALESCE(last_final_order,last_no_final_order,last_close_date,last_reopen))
    THEN 1 ELSE 0 END 
   AS last_date_is_final_order,
  CASE WHEN last_close_date = GREATEST (COALESCE(last_final_order,last_no_final_order,last_close_date,last_reopen))
    THEN 1 ELSE 0 END 
   AS last_date_is_close_date, 
  CASE WHEN last_no_final_order = GREATEST (COALESCE(last_final_order,last_no_final_order,last_close_date,last_reopen))
       OR last_reopen = GREATEST (COALESCE(last_final_order,last_no_final_order,last_close_date,last_reopen))
    THEN 1 ELSE 0 END 
   AS case_not_closed
FROM
  __temp__.ca_date_flags
  
""",
    
"ca_last_date_flags")    


In [172]:
pydb.read_sql_query ("select * from __temp__.ca_date_flags limit 20")

Unnamed: 0,case_number,first_final_order,last_final_order,first_no_final_order,last_no_final_order,first_close_date,last_close_date,first_reopen,last_reopen
0,HD13P00598,2013-11-06,2014-01-13,2013-07-31,2013-07-31,2013-11-18,2014-03-15,,
1,LS03C06076,2004-05-10,2004-05-10,,,2004-05-18,2004-05-18,,
2,CB10C01500,2011-10-19,2011-10-19,,,2011-10-24,2011-10-24,,
3,BM99P08869,,,1999-12-15,1999-12-22,2004-06-14,2004-06-14,,
4,ME04C00099,2005-05-13,2005-05-18,2004-12-01,2004-12-01,2005-06-13,2006-03-06,2005-05-11,2006-03-06
5,TQ10P10021,2010-04-19,2010-10-18,,,2010-04-23,2010-10-21,2010-07-21,2010-07-21
6,SO15P00753,2017-02-14,2017-02-14,2016-11-09,2016-11-09,2017-03-09,2017-03-09,,
7,OX07P00379,,,2008-02-20,2009-01-12,2010-09-28,2010-09-28,,
8,LB05P00053,2006-03-10,2006-03-10,,,2006-03-29,2006-03-29,,
9,GU16P00088,2016-03-23,2016-03-23,,,2016-03-31,2016-03-31,,


In [156]:
pydb.read_sql_query(f"""
select 
   GREATEST (COALESCE(last_final_order,last_close_date,last_no_final_order,last_reopen)) as max_date
FROM
  __temp__.ca_date_flags
WHERE  
  case_number = 'DA10P00602'
""")


QueryFailed: SYNTAX_ERROR: line 6:13: Function try_greatest not registered. You may need to manually clean the data at location 's3://mojap-athena-query-dump/AROAIYHQRYQMYSKCRPFA6:8ced989c-alpha_user_afonso82/tables/0cdbdcca-3405-4a30-8915-679990286ff7' before retrying. Athena will not delete data in your account.

pydb.create_temp_table
( 
f"""
select 
  case_number,
  last_final_order,
  last_no_final_order,
  last_close_date,
  last_reopen,
  select 
   AS last_date_is_final_order,
  CASE WHEN last_close_date = GREATEST (COALESCE(last_final_order,last_no_final_order,last_close_date,last_reopen))
    THEN 1 ELSE 0 END 
   AS last_date_is_close_date, 
  CASE WHEN last_no_final_order = GREATEST (COALESCE(last_final_order,last_no_final_order,last_close_date,last_reopen))
       OR last_reopen = GREATEST (COALESCE(last_final_order,last_no_final_order,last_close_date,last_reopen))
    THEN 1 ELSE 0 END 
   AS case_not_closed
FROM
  __temp__.ca_date_flags
  
""",
    
"ca_last_date_flags")   

In [198]:
pydb.create_temp_table(
f"""
select 
  last_final_order,
  last_no_final_order,
  last_close_date,
  last_reopen,
CASE WHEN last_close_date = GREATEST (last_final_order,last_no_final_order,last_close_date,last_reopen)
     THEN 1 ELSE 0 END
    AS last_date_is_close_date,
CASE WHEN last_close_date = GREATEST ((COALESCE(last_final_order, CAST('1900-01-01' AS DATE))), (COALESCE(last_no_final_order, CAST('1900-01-01' AS DATE))), 
                                      (COALESCE(last_close_date, CAST('1900-01-01' AS DATE))), (COALESCE(last_reopen, CAST('1900-01-01' AS DATE))))
     THEN 1 ELSE 0 END 
    AS last_date_is_test_date,
CASE WHEN last_close_date = GREATEST ((COALESCE(last_final_order, CAST('1900-01-01' AS DATE))), (COALESCE(last_no_final_order, CAST('1900-01-01' AS DATE))), 
                                      (COALESCE(last_close_date, CAST('1900-01-01' AS DATE))), (COALESCE(last_reopen, CAST('1900-01-01' AS DATE))))
             THEN 'case closed'
     WHEN last_final_order = GREATEST ((COALESCE(last_final_order, CAST('1900-01-01' AS DATE))), (COALESCE(last_no_final_order, CAST('1900-01-01' AS DATE))), 
                                      (COALESCE(last_close_date, CAST('1900-01-01' AS DATE))), (COALESCE(last_reopen, CAST('1900-01-01' AS DATE))))
             THEN 'final order'     
     WHEN last_no_final_order = GREATEST ((COALESCE(last_final_order, CAST('1900-01-01' AS DATE))), (COALESCE(last_no_final_order, CAST('1900-01-01' AS DATE))), 
                                      (COALESCE(last_close_date, CAST('1900-01-01' AS DATE))), (COALESCE(last_reopen, CAST('1900-01-01' AS DATE))))
             THEN 'non final order' 
     WHEN last_reopen = GREATEST ((COALESCE(last_final_order, CAST('1900-01-01' AS DATE))), (COALESCE(last_no_final_order, CAST('1900-01-01' AS DATE))), 
                                      (COALESCE(last_close_date, CAST('1900-01-01' AS DATE))), (COALESCE(last_reopen, CAST('1900-01-01' AS DATE))))
             THEN 'case reopened'
     END AS last_date_type
        
FROM
  __temp__.ca_date_flags
""",

"date_test")

In [199]:
pydb.read_sql_query ("select * from __temp__.date_test limit 10")

Unnamed: 0,last_final_order,last_no_final_order,last_close_date,last_reopen,last_date_is_close_date,last_date_is_test_date,last_date_type
0,2007-08-28,,2007-08-31,,0,1,case closed
1,2013-09-30,2013-09-30,2013-11-01,2012-08-14,1,1,case closed
2,2013-09-30,2013-09-30,2013-11-01,2012-08-14,1,1,case closed
3,2019-05-31,2019-04-25,2019-07-26,,0,1,case closed
4,,2021-10-14,,,0,0,non final order
5,2008-11-21,2008-01-08,2008-11-28,,0,1,case closed
6,2011-12-15,2011-09-19,2012-02-14,,0,1,case closed
7,,2007-11-08,2007-11-14,,0,1,case closed
8,,2006-09-22,2006-11-24,,0,1,case closed
9,2018-03-15,2018-03-15,2018-03-15,2018-03-15,1,1,case closed
