#### Import packages

In [None]:
# libraries
import pandas as pd
import pydbtools as pydb
import boto3
from datetime import datetime
#import re
import os  # for file paths
import awswrangler as wr

In [None]:
# Structured  dataframes
pd.set_option("display.max_columns", 100)
pd.set_option("display.width", 900)
pd.set_option("display.max_colwidth", 200)

#### Assigning key variables

In [None]:
# define some variables to be used throughout the notebook
db1 = "familyman_derived_dev_v2" #database where Familyman data is stored
db2 = "familyman_dev_v2"
snapshot_date = "2022-05-23" #To update where necessary
db3 = "fcsq" #database where tables created as part of FCSQ processing are stored where required

### Extracting disposals

In [None]:
#Select all disposal events
#Familyman will store values differently depending on the disposal type. Here we are dealing with 3 types: 
  #Firstly, where the event model is one of the following ('C21', 'C24', 'C36', 'C43', 'ORDNOM', 'ORDREF_), the type of event is determined by field values, and so the value is stored within the order_type_fields_array column within the events derived table
     #The C43 (CAO orders), 'ORDNOM', 'ORDREF', and 'G63' event models use the same field values to record the order type, and so the lookup value is set to a general value ('GEN')
  #Secondly, there are withdrawn events (G63). The value stored in the withdrawn_application_type_value column corresponds to the orignal application event (usually G50 or U22), and so we link back to those records to get the oder type applied for
  #Thirdly, for the remaining event models, we can determine the order the type by the value of the event model. As such these do not have an order_type_fields_array value (and so we set these to an array so that the order_type column is in a consistent format, 
      # and also set an 'N/A' value that is used for the order type lookup later)

pydb.create_temp_table(
f""" 
SELECT 
  e.event,
  e.case_number,
  e.event_model,
  CASE WHEN e.event_model IN ('C43', 'ORDNOM', 'ORDREF','G63')
      THEN 'GEN'
      ELSE e.event_model END 
    AS lookup_event_code,
  e.receipt_date,
  e.entry_date,
  COALESCE(e.receipt_date, e.entry_date) AS disp_date,
  e.creating_court,
  CASE WHEN e.event_model = 'G63'
        THEN app.application_type_value_array
       WHEN e.order_type_value_if_in_fields_array IS NULL
        THEN ARRAY['N/A']
         ELSE e.order_type_value_if_in_fields_array END  
    AS all_event_ord_types
FROM 
  {db1}.events_derived AS e
  LEFT JOIN {db1}.events_derived AS app
    ON e.withdrawn_application_type_value = app.event
      AND e.mojap_snapshot_date = app.mojap_snapshot_date    
WHERE
  e.event_model IN ('C23','C25','C26','C27','C28','C29',
                    'C30','C31','C32A','C32B','C33','C34A','C34B','C35A','C35B','C37',
                    'C38A','C38B','C39','C40','C42','C43A','C44A','C44B','C45A','C45B',
                    'C53','C80','C81','C82',
                    
                    'C21', 'C24', 'C36','C43', 'ORDNOM', 'ORDREF',
                    
                    'G63')
  AND e.mojap_snapshot_date = DATE'{snapshot_date}'



""",

"ca_extract_disps_d")

### Separating disposal types and adding order type descriptions

In [None]:
#split disposal events to one per row
# The order type lookup file has been created separately
# Order code to be up to 45 but including up to 54 as this is what some of the published data includes
# The order case_type is based on the type of order being disposed. A case may have both public and private law disposal types
pydb.create_temp_table(
f""" 
SELECT 
  t1.*,
  TRIM(ord_type) as order_type,
  CASE WHEN t2.order_code < 19 OR (SUBSTR(case_number,5,1) = 'C') THEN 'C'
        ELSE 'P' END 
      AS order_case_type,  
  CASE WHEN t1.event_model = 'G63' THEN 3
       WHEN t1.event_model = 'ORDREF' THEN 4
       WHEN t1.event_model = 'ORDNOM' THEN 5
       WHEN t2.order_code BETWEEN 40 AND 45 THEN 2
       ELSE 1 END
    AS disp_type_code,
  CASE WHEN t1.event_model = 'G63' THEN 'Withdrawn'
       WHEN t1.event_model = 'ORDREF' THEN 'Refused'
       WHEN t1.event_model = 'ORDNOM' THEN 'No order'
       WHEN t2.order_code BETWEEN 40 AND 45 THEN 'Interim Order'
       ELSE 'Order' END
    AS disp_type,
  order_code,
  order_desc
FROM 
  __temp__.ca_extract_disps_d AS t1
  CROSS JOIN UNNEST(all_event_ord_types) 
        AS t(ord_type)    
  LEFT JOIN {db3}.ca_order_type_lookup t2
    ON t1.lookup_event_code = t2.event_model
    AND TRIM(ord_type) = t2.value
WHERE
  t2.order_code BETWEEN 0 AND 54

""",

"ca_disps_split_d")

### Excluding duplicate order types within the same event

In [None]:
#Use the distinct function to remove duplicte order types within an event. The order type field (which contains the letter code) is not included in the select fields as different codes can map to the same order type (eg 'CAST' & 'CN' are both contact)
#The duplicates account for around 0.25% of all disposals, so it is not a significant number 
#The main_case_type ensures that in the small number of cases where there are both private and public order types within a case, only one case type is selected in the case (based on order type activity within the case) 
#For the main_case_type, where the case number is 'P' and it has both public and private law disposals it will be counted within private law cases disposed
#The FM_case_type is based on the letter within the case number. Naturally there will be 1 per case.
#Domestic violence and adoption case types are filtered out of the main/FM_case_type fields so they can be excluded from final CA case counts
pydb.create_temp_table( 
f"""
  SELECT 
    DISTINCT
    case_number,
    disp_date,
    EXTRACT(YEAR FROM disp_date) AS Year,
    EXTRACT(QUARTER FROM disp_date) AS Quarter,
    order_case_type,
    CASE WHEN SUBSTR(case_number,5,1) NOT IN ('P','C')
           THEN 'NA'
         WHEN order_case_type = 'C' 
              AND case_number in (SELECT case_number
                                  FROM __temp__.ca_disps_order_name
                                  WHERE order_case_type = 'P')
           THEN 'P'
        ELSE order_case_type END
      AS main_case_type,
    CASE WHEN SUBSTR(case_number,5,1) NOT IN ('P','C')
           THEN 'NA'
           ELSE  SUBSTR(case_number,5,1) END
      AS FM_case_type,
    receipt_date,
    entry_date,
    event,
    event_model,
    disp_type_code,
    disp_type,
    order_code,
    order_desc,
    creating_court
  FROM 
    __temp__.ca_disps_split_d

""",

"ca_all_disposals_d")

### Adding children to the disposal events

#### Children recorded on the disposal event

In [None]:
# Events with a child role id recorded against the disposal event are extracted. Children not recorded in the event value field are dealt with in the following step
# We are using the main familyman database to get the child information as the role id is not contained within the event of the derived tables
# As with the disposal code above, the cross join unnest flattens the child data to 1 row per child recorded against the disposal (event)
pydb.create_temp_table( 
f"""
  SELECT 
    a.*,
    f.value children,
    TRY_CAST(TRIM(child_role_id) as bigint) child_role_id,
    p.dob,
    p.gender
  FROM 
    __temp__.ca_all_disposals_d a
    INNER JOIN {db2}.event_fields f
      ON a.event = f.event
    CROSS JOIN UNNEST(SPLIT(f.value,',')) AS t(child_role_id)  
    LEFT JOIN {db1}.people_derived p
      ON TRY_CAST(TRIM(child_role_id) as bigint) = p.role
  WHERE f.field_model IN('C21_CH','C23_CH','C24_CH','C25_CH','C26_CH','C27_CH','C28_CH','C29_CH',
                      'C30_CH','C31_CH','C32A_CH','C32B_CH','C33_CH','C34A_CH',
                      'C34B_CH','C35A_CH','C35B_CH','C36_CH','C37_CH','C38A_CH',
                      'C38B_CH','C39_CH','C40_CH','C42_CH','C43_CH',
                      'C43A_CH','C44A_CH','C44B_CH','C45A_CH','C45B_CH','C53_CH',
                      'C80_CH','C81_CH','C82_CH','CPA_CH','ORDNOM_CH','ORDREF_CH', 'G63_CH')
    AND child_role_id <> ''
    AND f.mojap_snapshot_date = DATE'{snapshot_date}'
    AND p.mojap_snapshot_date = DATE'{snapshot_date}'
""",

"ca_disps_child_event_d")

#### Children - recorded on the case (not recorded on the disopsal event)

In [None]:
# Take all events where no children were recorded against the event in the children(events) table and get children details from the people table.
# Where there are no children recorded on the event then all children recorded in the case are considered involved in the application - this is not an assumption but based on how Familyman data entry works 
# This table does not include those where children are recorded on the event and are flagged to delete in the roles table (main FM tables). These children are included in the next stage
# Adding date of birth and gender from the parties table
pydb.create_temp_table( 
f"""
  SELECT
    a.*,
    p.role child_role_id,
    p.dob,
    p.gender
  FROM
    __temp__.ca_all_disposals_d a
    JOIN {db1}.people_derived p 
      on a.case_number = p.case_number
  WHERE
    event not in (SELECT event FROM __temp__.ca_disps_child_event_d)
    AND p.role_model in ('CHLDC', 'CHLDZ')
    AND p.mojap_snapshot_date = DATE'{snapshot_date}'
""",

"ca_disps_child_case_d")

#### Children - not recorded on event or case

In [None]:
#There are a small number of applicaitons where the children are not reocorded on the event and there are also no children recorded on the case (or all chilren recorded have a delete flag of 'Y'). This tends to be for DV cases, where CA order types 
#   may have been made but children are not necessarily recorded on the case. Here we default to just one child per order/disposal, with unknown characteristics. This is an assumption and will not always be correct.
pydb.create_temp_table(
f"""
  SELECT
    *,
    CAST(NULL AS INTEGER) AS child_role_id,
    CAST(NULL AS DATE) dob,
    CAST(NULL AS INTEGER) AS gender
  FROM
    __temp__.ca_all_disposals_d
  WHERE
    event NOT IN (SELECT event FROM __temp__.ca_disps_child_event_d)
    AND event NOT IN (SELECT event FROM __temp__.ca_disps_child_case_d)
""",

"ca_disps_no_child_d")

In [None]:
pydb.read_sql_query ("select count (*) as count from __temp__.ca_disps_all_children_d")

In [None]:
pydb.read_sql_query ("select count (*) as count from __temp__.ca_disps_all_children")

### Child disposals data set
##### Combining children data sets together

In [None]:
#Append the two datasets created previouly, which also includes all disposal data
#The child event table also links to the party table to get the dob/gender info

pydb.create_temp_table( 
f"""
  SELECT 
    a.case_number,
    a.disp_date,
    Year,
    Quarter,
    a.order_case_type,
    a.main_case_type,
    a.FM_case_type,
    a.receipt_date,
    a.entry_date,
    a.event,
    a.event_model,
    a.disp_type_code,
    a.disp_type,
    a.order_code,
    a.order_desc,
    a.child_role_id,
    a.dob,
    a.gender,
    a.creating_court
  FROM
    __temp__.ca_disps_child_event_d a
  UNION ALL
  SELECT
    a.case_number,
    a.disp_date,
    Year,
    Quarter,
    a.order_case_type,
    a.main_case_type,
    a.FM_case_type,
    a.receipt_date,
    a.entry_date,
    a.event,
    a.event_model,
    a.disp_type_code,
    a.disp_type,
    a.order_code,
    a.order_desc,
    a.child_role_id,
    a.dob,
    a.gender,
    a.creating_court
  FROM
    __temp__.ca_disps_child_case_d a
  UNION ALL
  SELECT
    a.case_number,
    a.disp_date,
    Year,
    Quarter,
    a.order_case_type,
    a.main_case_type,
    a.FM_case_type,
    a.receipt_date,
    a.entry_date,
    a.event,
    a.event_model,
    a.disp_type_code,
    a.disp_type,
    a.order_code,
    a.order_desc,
    a.child_role_id,
    a.dob,
    a.gender,
    a.creating_court
  FROM
    __temp__.ca_disps_no_child_d a
""",

"ca_disps_all_children_d")