# Childrens Act Disposals

In [1]:
# libraries
import pandas as pd
import pydbtools as pydb
import boto3
from datetime import datetime
#import re
import os  # for file paths
import awswrangler as wr

In [2]:
# Structured  dataframes
pd.set_option("display.max_columns", 100)
pd.set_option("display.width", 900)
pd.set_option("display.max_colwidth", 200)

In [3]:
# Assigning databases and S3 bucket
db1 = "familyman_dev_v2"
fcsq_db = "fcsq"
# create path for within athena FCSQ database in the S3 folder
fcsq_db_path = f"s3://alpha-family-data/fcsq_processing/CA_disps/"
s3 = boto3.resource("s3")
bucket = s3.Bucket("alpha-family-data")
snapshot_date = "2021-11-11"

### Orders made

In [4]:
pydb.create_temp_table( 
f"""
  SELECT 
    DISTINCT
    case_number,
    disp_date,
    Year,
    Quarter,
    case_type,
    receipt_date,
    entry_date,
    event,
    event_model,
    field_model,
    order_type,
    disp_type_code,
    disp_type,
    order_code,
    order_desc,
    creating_court
  FROM 
    {fcsq_db}.ca_disps a 
  WHERE 
    disp_type_code = 1
""",

"ca_disp_order_type")

### Disposal Events

In [5]:
pydb.create_temp_table( 
f"""
  SELECT 
    DISTINCT
    case_number,
    CAST(disp_date AS DATE) AS disp_date,
    Year,
    Quarter,
    case_type,
    receipt_date,
    entry_date,
    event,
    event_model,
    field_model,
    order_type,
    disp_type_code,
    disp_type,
    order_code,
    order_desc,
    creating_court,
    FO_value
  FROM 
    {fcsq_db}.ca_disps a 
  WHERE 
    order_code < 40
""",

"ca_disp_events")

### Create flag for earliest/latest final order, non final order date - this can be moved to earier (or final order extraction can be moved to here)

In [6]:
pydb.create_temp_table(
f"""
SELECT
    *,
    ROW_NUMBER() OVER(PARTITION BY case_number, fo_value
                       ORDER BY case_number, disp_date ASC) 
      AS min_date,
    ROW_NUMBER() OVER(PARTITION BY case_number, fo_value
                       ORDER BY case_number, disp_date DESC) 
      AS max_date  
FROM
    __temp__.ca_disp_events
""",

"ca_FO_dates")
 
    

### Extract case closed events and create flag for earliest/latest event

In [7]:
pydb.create_temp_table(
f"""
SELECT
    case_number,
    receipt_date,
    CAST (entry_date AS DATE) AS entry_date,
    ROW_NUMBER() OVER(PARTITION BY case_number
                       ORDER BY case_number, entry_date ASC) 
      AS min_date,
    ROW_NUMBER() OVER(PARTITION BY case_number
                       ORDER BY case_number, entry_date DESC) 
      AS max_date  
FROM
    {db1}.events
WHERE
  event_model = 'U24'
  AND error = 'N'
  AND mojap_snapshot_date = date '{snapshot_date}'
""",

"ca_case_closed_events")
 

### Extract case reopened events and create flag for earliest/latest event

In [8]:
pydb.create_temp_table(
f"""
SELECT
    case_number,
    receipt_date,
    CAST (entry_date AS DATE) AS entry_date,
    ROW_NUMBER() OVER(PARTITION BY case_number
                       ORDER BY case_number, entry_date ASC) 
      AS min_date,
    ROW_NUMBER() OVER(PARTITION BY case_number
                       ORDER BY case_number, entry_date DESC) 
      AS max_date  
FROM
    {db1}.events
WHERE
  event_model = 'G62'
  AND error = 'N'
  AND mojap_snapshot_date = date '{snapshot_date}'
""",

"ca_case_reopened_events")
 

### Add first/last date types to for each case

In [22]:
pydb.create_temp_table(
f"""
SELECT
    DISTINCT
    t1.case_number,
    t1.case_type,
    (SELECT f.disp_date FROM __temp__.ca_FO_dates f
                         WHERE f.min_date = 1
                         AND f.fo_value = 'Y'
                         AND t1.case_number = f.case_number)
        AS first_final_order,
    (SELECT f.disp_date FROM __temp__.ca_FO_dates f
                         WHERE f.max_date = 1 
                         AND f.fo_value = 'Y'
                         AND t1.case_number = f.case_number)
        AS last_final_order,
    (SELECT f.disp_date FROM __temp__.ca_FO_dates f
                         WHERE f.min_date = 1
                         AND f.fo_value = 'N'
                         AND t1.case_number = f.case_number)
        AS first_no_final_order,
    (SELECT f.disp_date FROM __temp__.ca_FO_dates f
                         WHERE f.max_date = 1 
                         AND f.fo_value = 'N'
                         AND t1.case_number = f.case_number)
        AS last_no_final_order,    
    (SELECT c.entry_date FROM __temp__.ca_case_closed_events c
                         WHERE c.min_date = 1 
                         AND t1.case_number = c.case_number)
        AS first_close_date,
    (SELECT c.entry_date FROM __temp__.ca_case_closed_events c
                         WHERE c.max_date = 1 
                         AND t1.case_number = c.case_number) 
        AS last_close_date,                 
    (SELECT r.entry_date FROM __temp__.ca_case_reopened_events r
                         WHERE r.min_date = 1 
                         AND t1.case_number = r.case_number)
        AS first_reopen,
    (SELECT r.entry_date FROM __temp__.ca_case_reopened_events r
                         WHERE r.max_date = 1 
                         AND t1.case_number = r.case_number)
        AS last_reopen
FROM
  __temp__.ca_disp_events t1
""",
    
"ca_date_flags")

### add flag for which date to use dependent on the last date within the case

In [23]:
pydb.create_temp_table(
f"""
SELECT
  case_number,
  case_type,
  last_final_order,
  last_no_final_order,
  last_close_date,
  last_reopen,
CASE WHEN last_close_date = GREATEST ((COALESCE(last_final_order, CAST('1900-01-01' AS DATE))), (COALESCE(last_no_final_order, CAST('1900-01-01' AS DATE))), 
                                      (COALESCE(last_close_date, CAST('1900-01-01' AS DATE))), (COALESCE(last_reopen, CAST('1900-01-01' AS DATE))))
             THEN 'case closed'
     WHEN last_final_order = GREATEST ((COALESCE(last_final_order, CAST('1900-01-01' AS DATE))), (COALESCE(last_no_final_order, CAST('1900-01-01' AS DATE))), 
                                      (COALESCE(last_close_date, CAST('1900-01-01' AS DATE))), (COALESCE(last_reopen, CAST('1900-01-01' AS DATE))))
             THEN 'final order'     
     WHEN last_no_final_order = GREATEST ((COALESCE(last_final_order, CAST('1900-01-01' AS DATE))), (COALESCE(last_no_final_order, CAST('1900-01-01' AS DATE))), 
                                      (COALESCE(last_close_date, CAST('1900-01-01' AS DATE))), (COALESCE(last_reopen, CAST('1900-01-01' AS DATE))))
             THEN 'non final order' 
     WHEN last_reopen = GREATEST ((COALESCE(last_final_order, CAST('1900-01-01' AS DATE))), (COALESCE(last_no_final_order, CAST('1900-01-01' AS DATE))), 
                                      (COALESCE(last_close_date, CAST('1900-01-01' AS DATE))), (COALESCE(last_reopen, CAST('1900-01-01' AS DATE))))
             THEN 'case reopened'
     END AS last_date_type
        
FROM
  __temp__.ca_date_flags
""",

"ca_last_case_date")

### Calculate case close date

In [44]:
#to check we are happy with how we deal with case date where case close date = case reopen date
pydb.create_temp_table(
f"""
SELECT
  case_number,
  case_type,
  last_final_order,
  last_no_final_order,
  last_close_date,
  last_reopen,
  last_date_type,
  CASE WHEN last_close_date = last_reopen 
        AND last_date_type = 'case closed'
         THEN last_close_date
       WHEN last_reopen > last_final_order
        AND last_date_type = 'case closed'
         THEN last_close_date  
       WHEN last_date_type = 'case closed'
         THEN COALESCE (last_final_order, last_close_date)
       WHEN last_date_type = 'final order'
         THEN last_final_order
       WHEN last_date_type = 'non final order'
        AND last_close_date IS NOT NULL
        AND (last_reopen IS NULL 
                OR (last_reopen IS NOT NULL AND last_close_date > last_reopen))
          THEN last_close_date
       ELSE NULL   
      END 
    AS case_closed_date
FROM
  __temp__.ca_last_case_date

""",
    
"ca_closed_date")
                  

### Add year/quarter

In [45]:
pydb.create_temp_table(
f"""
SELECT
  case_number,
  EXTRACT(YEAR FROM case_closed_date) AS Year,
  EXTRACT(QUARTER FROM case_closed_date) AS Quarter,
  case_type,
  case_closed_date
FROM
  __temp__.ca_closed_date
WHERE
  case_closed_date IS NOT NULL

""",
    
"ca_closed_cases")
  

In [39]:
#drop table in Athena (if it already exists)
_ = pydb.start_query_execution_and_wait(f"""DROP TABLE {fcsq_db}.ca_cases""")

QueryFailed: FAILED: SemanticException [Error 10001]: Table not found fcsq.ca_cases

In [40]:
#set S3 file path
cases_s3_path = os.path.join(fcsq_db_path, "cases/")
# Delete all the underlying data stored within the S3 location
if wr.s3.list_objects(cases_s3_path):
    print("deleting objs")
    wr.s3.delete_objects(cases_s3_path)
    
t_cases = f"""
CREATE TABLE {fcsq_db}.ca_cases WITH
(
    external_location='{cases_s3_path}'
) AS   
SELECT
*
FROM
__temp__.ca_closed_cases
"""
    
_ = pydb.start_query_execution_and_wait(t_cases) 

deleting objs


In [31]:
pydb.read_sql_query ("select year, case_type, count(*) as count from __temp__.ca_disp_events where year > 2010 group by year, case_type order by year, case_type")

Unnamed: 0,year,case_type,count
0,2011,C,20593
1,2011,P,122138
2,2012,C,25203
3,2012,P,124660
4,2013,C,27590
5,2013,P,125793
6,2014,C,23013
7,2014,P,120329
8,2015,C,25606
9,2015,P,107625


In [46]:
pydb.read_sql_query ("select year, case_type, count(*) as count from __temp__.ca_closed_cases where year > 2010 group by year, case_type order by year, case_type")

Unnamed: 0,year,case_type,count
0,2011,C,11700
1,2011,P,40744
2,2012,C,14413
3,2012,P,37986
4,2013,C,16275
5,2013,P,44014
6,2014,C,14626
7,2014,P,59282
8,2015,C,14482
9,2015,P,44275
