#### Import packages and set options

In [None]:
import os  # for file paths
import pandas as pd
import awswrangler as wr
import pydbtools as pydb  # see https://github.com/moj-analytical-services/pydbtools

# few things for viewing dataframes better
pd.set_option("display.max_columns", 100)
pd.set_option("display.width", 900)
pd.set_option("display.max_colwidth", 200)

#### Defining variables

In [None]:
# define some variables to be used throughout the notebook
db1 = "familyman_derived_dev_v2" #database where Familyman data is stored
db2 = "familyman_dev_v2"
snapshot_date = "2021-11-11" #To update where necessary
db3 = "fcsq" #database where tables created as part of FCSQ processing are stored where required

## Applications

#### Extracting applications, filtered by specific order types

In [None]:
# Creating a table with all the applications for CA, with order type codes selected from the lookup file (lookup file has been created independently from this process).
# The cross join unnest function flattens the application array value to one row per app type
# U22 events are initial applications, while G50 events are subsequent applications within a case 
pydb.create_temp_table( 
f"""
SELECT 
    e.case_number,
    e.receipt_date,
    EXTRACT(year FROM e.receipt_date) AS year,
    EXTRACT(quarter FROM e.receipt_date) AS quarter,
    e.event,
    e.event_model,
    application_type_value_array as all_event_app_types,
    TRIM(ord_type) as order_type,
    o.order_type_code as order_code,
    o.order_desc,
    CAST(SUBSTR(CAST(e.event AS varchar),1,3) AS integer) AS court_code
  FROM 
    {db1}.events_derived AS e
    CROSS JOIN UNNEST(application_type_value_array) AS t(ord_type)
    LEFT JOIN {db3}.apps_order_type_lookup o
      ON ord_type = o.order_type
  WHERE 
    e.event_model IN('U22','G50')
    AND  o.child_act = 'Y'
    AND e.mojap_snapshot_date = DATE'{snapshot_date}'
    
""",
    
"ca_apps_d")

## Children

#### Children (event) - children recorded on the application event

In [None]:
# Events with a child role id recorded against the application event are extracted. Children not recorded in the event value field are dealt with in the following step
# We are using the main familyman database to get the child information as the role id is not contained within the event of the derived tables
# As with the previous code above, the cross join unnest flattens the child data to 1 row per child recorded against the application (event)
pydb.create_temp_table( 
f"""
  SELECT 
    a.*,
    f.value children,
    TRY_CAST(TRIM(child_role_id) as bigint) child_role_id,
    p.dob,
    p.gender
  FROM 
    __temp__.ca_apps_d a
    INNER JOIN {db2}.event_fields f
      ON a.event = f.event
    CROSS JOIN UNNEST(SPLIT(f.value,',')) AS t(child_role_id)  
    LEFT JOIN {db1}.people_derived p
      ON TRY_CAST(TRIM(child_role_id) as bigint) = p.role
  WHERE f.field_model IN('U22_CH','G50_CH')
    AND child_role_id <> ''
    AND f.mojap_snapshot_date = DATE'{snapshot_date}'
    AND p.mojap_snapshot_date = DATE'{snapshot_date}'
""",

"ca_apps_child_event_d")

 #### Children (case) - joining the children onto the application data using the people table where children are not recorded on the event.

In [None]:
# Take all events where no children were recorded against the event in the children(events) table and get children details from the people table.
# Where there are no children recorded on the event then all children recorded in the case are considered involved in the application - this is not an assumption but based on how Familyman data entry works 
# This table does not include those where children are recorded on the event and are flagged to delete in the roles table (main FM tables). These children are included in the next stage
# Adding date of birth and gender from the parties table
pydb.create_temp_table( 
f"""
  SELECT
    a.*,
    p.role child_role_id,
    p.dob,
    p.gender
  FROM
    __temp__.ca_apps_d a
    JOIN {db1}.people_derived p 
      on a.case_number = p.case_number
  WHERE
    event not in (SELECT event FROM __temp__.ca_apps_child_event_d)
    AND p.role_model in ('CHLDC', 'CHLDZ')
    AND p.mojap_snapshot_date = DATE'{snapshot_date}'
""",

"ca_apps_child_case_d")

#### Children (not recorded) - children that are not recorded on the event or the case. 

In [None]:
#There are a small number of applicaitons where the children are not reocorded on the event and there are also no children recorded on the case (or all chilren recorded have a delete flag of 'Y'). This tends to be for DV cases, where CA order types 
#   may have been made but children are not necessarily recorded on the case. Here we default to just one child per order/disposal, with unknown characteristics. This is an assumption and will not always be correct.
pydb.create_temp_table(
f"""
  SELECT
    *,
    CAST(NULL AS INTEGER) AS child_role_id,
    CAST(NULL AS DATE) dob,
    CAST(NULL AS INTEGER) AS gender
  FROM
    __temp__.ca_apps_d
  WHERE
    event NOT IN (SELECT event FROM __temp__.ca_apps_child_event_d)
    AND event NOT IN (SELECT event FROM __temp__.ca_apps_child_case_d)
""",

"ca_apps_no_child_d")

#### Appending children from events, children from case and no child recorded data sets

In [None]:
# Unioning all the children data sets together
pydb.create_temp_table( 
f"""
  SELECT 
    case_number,
    receipt_date,
    year,
    quarter,
    event_model,
    event,
    court_code,
    order_type,
    order_code,
    order_desc,
    child_role_id,
    dob,
    gender
  FROM
    __temp__.ca_apps_child_event_d
  UNION ALL
  SELECT
    case_number,
    receipt_date,
    year,
    quarter,
    event_model,
    event,
    court_code,
    order_type,
    order_code,
    order_desc,
    child_role_id,
    dob,
    gender
  FROM
    __temp__.ca_apps_child_case_d
  UNION ALL
  SELECT
    case_number,
    receipt_date,
    year,
    quarter,
    event_model,
    event,
    court_code,
    order_type,
    order_code,
    order_desc,
    child_role_id,
    dob,
    gender
  FROM
    __temp__.ca_apps_no_child_d
""",

 "ca_apps_all_children_d")

 #### Calculating child age and flagging duplicate child/order type data within a case

In [None]:
# The apps all children table contains some records with the same order type for the same child in the same case in the same year. 
# Here we order the cases so that in those cases initial apps are ranked earlier than subsequent apps, and where the ord type is the same the earliest app is ranked highest
pydb.create_temp_table( 
f"""
SELECT
    *,
    Floor((date_diff('day',cast(dob as date),cast(receipt_date as date)))/365.25) AS child_age,
    substr(case_number, 5,1) as case_type,
    ROW_NUMBER() OVER(PARTITION BY year, case_number, child_role_id, order_desc
                       ORDER BY case_number, child_role_id, order_desc, event_model DESC, receipt_date ASC) 
      AS dup_rank
FROM 
   __temp__.ca_apps_all_children_d
""",

"ca_apps_dup_rank_d")

## Children involved in applications dataset

In [None]:
# Select the earliest record in each year for each order type per child
# Add whether public or private law case type (potentially revisit this as private law may contain adoption cases)
# Gender descriptions and age bands added
pydb.create_temp_table( 
f"""
SELECT
    year,
    quarter,
    case_number,
    receipt_date,
    event,
    event_model,
    order_type,
    order_code,
    order_desc,
    child_role_id,
    CASE WHEN gender = 1 THEN 'Male'
         WHEN gender = 2 THEN 'Female'
        ELSE 'Unknown'
      end as Gender,
    dob,
    child_age,
    CASE WHEN Child_age is Null
          THEN 'Unknown'
         WHEN Child_age < 0
          THEN 'Other'
         WHEN Child_age < 1
          THEN '<1 year'
         WHEN Child_age<5
           THEN '1-4 years'
         WHEN Child_age<10
          THEN '5-9 years'
         WHEN Child_age<15
          THEN '10-14 years'
         WHEN Child_age<18
          THEN '15-17 years'
           ELSE 'Other'
      END AS Age_band,
    court_code,
    CASE WHEN order_type in ('CRO','SSC','DCO','OSA','SO','DSO','OC','OCST','ARC','ARST','ESO','XESO','CAO','EPO','XEPO','DEPO','WEP')
      OR case_type = 'C' THEN 'C' Else 'P' END 
     AS order_case_type
FROM 
   __temp__.ca_apps_dup_rank_d
WHERE
  dup_rank = 1
""",
    
"ca_apps_child_d")

## Orders applied for

In [None]:
# Remove the child ID and group up so we only count an order type within an event once, rather than per child
pydb.create_temp_table( 
f"""
SELECT 
  DISTINCT 
    year,
    quarter,
    case_number,
    order_case_type,
    receipt_date,
    event,
    court_code,
    order_type,
    order_code,
    order_desc
FROM 
  __temp__.ca_apps_child_d
""",
    
"ca_apps_order_types_d")

## Application count data

In [None]:
# Counting applications (individual events). Here multiple orders applied for under one event are only counted once
# Creating the main case type field that ensures any 'P' case that has both private law and public law applications is counted within rivate law case starts
pydb.create_temp_table( 
f"""
SELECT 
  DISTINCT 
    year,
    quarter,
    case_number,
    order_case_type,
    CASE WHEN SUBSTR(case_number,5,1) NOT IN ('P','C')
           THEN 'NA'
         WHEN order_case_type = 'C' 
              AND case_number in (SELECT case_number
                                  FROM __temp__.ca_apps_order_types_d
                                  WHERE order_case_type = 'P')
           THEN 'P'
        ELSE order_case_type END
      AS main_case_type,
    receipt_date,
    event,
    court_code
FROM 
  __temp__.ca_apps_child_d
""",

"ca_apps_event_count_d")

## Case count data

In [None]:
# Keeping just one record per case, and selecting the earliest record
pydb.create_temp_table( 
f"""
SELECT    
  main_case_type,    
  case_number,
  (MIN(receipt_date)) AS MIN_of_RECEIPT_DATE,
  EXTRACT (YEAR FROM (MIN(receipt_date))) AS Year,
  EXTRACT (QUARTER FROM (MIN(receipt_date))) AS Quarter
FROM 
  __temp__.ca_apps_event_count_d
GROUP BY 
  main_case_type, 
  case_number
""",

 "ca_apps_case_count_d")

## Individual children

#### Individual children by year

In [None]:
# Adding a count for the number of times an individual child appears within a year, and then selecting the earliest record
pydb.create_temp_table( 
f"""
WITH ca_apps_count_child_yr AS (

SELECT    
  *,    
  ROW_NUMBER() OVER(PARTITION BY year, child_role_id, order_case_type
                       ORDER BY child_role_id, year, receipt_date ASC) 
      AS child_count_yr
FROM 
  __temp__.ca_apps_child_d
  
)
  
SELECT    
  year, 
  quarter,
  case_number,
  child_role_id,
  gender,
  dob,
  age_band,
  order_case_type
FROM 
  ca_apps_count_child_yr
WHERE
  child_count_yr = 1 
""",

"ca_apps_ind_child_yr_d")

#### 5.2 Individual children by quarter

In [None]:
# Adding a count for the number of times an individual child appears within a quarter, and then selecting the earliest record
pydb.create_temp_table( 
f"""
WITH ca_apps_count_child_qtr AS (

SELECT    
  *,    
  ROW_NUMBER() OVER(PARTITION BY year, quarter, child_role_id, order_case_type
                       ORDER BY child_role_id, year, quarter, receipt_date ASC) 
      AS child_count_qtr
FROM 
  __temp__.ca_apps_child_d

)

SELECT    
  year, 
  quarter,
  case_number,
  child_role_id,
  gender,
  dob,
  age_band,
  order_case_type
FROM 
  ca_apps_count_child_qtr
WHERE
  child_count_qtr = 1
  
""",

"ca_apps_ind_child_qtr_d")