In [None]:
import os  # for file paths
import pandas as pd
import awswrangler as wr
import pydbtools as pydb  # see https://github.com/moj-analytical-services/pydbtools

# few things for viewing dataframes better
pd.set_option("display.max_columns", 100)
pd.set_option("display.width", 900)
pd.set_option("display.max_colwidth", 200)

In [None]:
# define some variables to be used throughout the notebook
db1 = "familyman_dev_v3" #database where Familyman data is stored
db2 = "fcsq" #database where tables created as part of FCSQ processing are stored where required
#snapshot dates and publication period are set in the main run file. However, if running this notebook independently you will need to set them here
snapshot_date = "2023-05-10"

In [None]:
#Extract res orders made
pydb.create_temp_table( 
f"""
SELECT    
  year,
  case_number,
  Order_desc,
  CASE WHEN disposal_region = 'WALES'
        THEN 'Wales'
        ELSE 'England' END 
    AS country
FROM 
  {db2}.ca_disps_all_children
WHERE
  disp_type_code = 1
  AND Year > 2010
  AND order_case_type = 'P'
  AND order_code = 30

""",

"priv_res_ords")

In [None]:
#Extract applicant relationship to child information
pydb.create_temp_table( 
f"""
SELECT    
  value AS rtc,
  field_model,
  role
FROM 
  {db1}.role_fields
WHERE
  field_model IN ('APLC_RC')
  AND mojap_snapshot_date = DATE'{snapshot_date}'
""",

"app_rtc")

In [None]:
#Get rtc for applicants
pydb.create_temp_table( 
f"""
SELECT    
  a.case_number,
  a.role_id,
  CASE WHEN (r.rtc IN ('--','Child')
              OR r.rtc IS NULL)
        THEN 'Not recorded'
        ELSE r.rtc
    END AS rtc   
FROM 
  {db2}.ca_applicants a
  LEFT JOIN __temp__.app_rtc r
    ON a.role_id = r.role

""",

"apps_rtc")

In [None]:
#add a flag for parent applicants and not recorded rtc 
pydb.create_temp_table(
f"""
SELECT
  o.*,
  CASE WHEN case_number IN (SELECT case_number
                            FROM __temp__.apps_rtc
                            WHERE rtc IN ('Father','Mother'))
        THEN 1
        ELSE 0 END
     AS parent_app,
  CASE WHEN case_number NOT IN (SELECT case_number
                                FROM __temp__.apps_rtc)
        OR case_number IN (SELECT case_number
                            FROM __temp__.apps_rtc
                            WHERE rtc = 'Not recorded')
        THEN 1 
        ELSE 0 END
     AS rtc_not_recorded   
FROM
  __temp__.priv_res_ords o
  
""", 
  
"priv_res_orders_rtc"  )

In [None]:
#add another flag column - as there may be more than one applicant in a case if one is not recorded and the other is recorded as a parent we don't want to count the order within the case as rtc not recorded
pydb.create_temp_table(
f"""
SELECT
  o.*,
  CASE WHEN parent_app = 1
       AND rtc_not_recorded = 1
          THEN 'Y'
          ELSE 'N' END 
    AS rtc_nr_parent
FROM
  __temp__.priv_res_orders_rtc o
  
""",
    
"priv_res_rtc" )

In [None]:
#Aggregate up
pydb.create_temp_table(
f"""
SELECT 
  year,
  country,
  SUM(parent_app) AS parent_app,
  SUM(CASE WHEN rtc_nr_parent = 'N' THEN rtc_not_recorded END)
      AS rtc_not_recorded,
  count(*) AS children
FROM
  __temp__.priv_res_rtc
GROUP BY 
  year,
  country
  
""",

"priv_res_agg")

In [None]:
priv_data = pydb.read_sql_query("SELECT * FROM __temp__.priv_res_agg")

In [None]:
priv_df = pd.DataFrame(priv_data)

In [None]:
priv_df.to_csv (r's3://alpha-family-data/Adhoc/priv_res_child.csv', header = True, index=False)