In [None]:
import os  # for file paths
import pandas as pd
import awswrangler as wr
import pydbtools as pydb  # see https://github.com/moj-analytical-services/pydbtools

# few things for viewing dataframes better
pd.set_option("display.max_columns", 100)
pd.set_option("display.width", 900)
pd.set_option("display.max_colwidth", 200)

In [None]:
# define some variables to be used throughout the notebook
db2 = "fcsq"

In [None]:
#Orders made - adding a country variable
# Age calculation copied from apps code
print("aggregating order level dataset....")
pydb.create_temp_table( 
f"""
SELECT
  Year,
   Order_desc,
   order_code,  
   Floor((date_diff('day',cast(dob as date),cast(receipt_date as date)))/365.25) AS child_age,
   CASE WHEN disposal_region = 'WALES' 
         THEN 'Wales'
         ELSE 'England' END 
      AS country
FROM
  {db2}.ca_disps_all_children
WHERE
  year BETWEEN 2011 AND 2023  
  AND disp_type_code = 1
  AND order_case_type = 'P'
  AND order_code = 27
  
""",

"priv_sgo_cao_age")

In [None]:
#Orders made - adding a country variable
# Age calculation copied from apps code
print("aggregating order level dataset....")
pydb.create_temp_table( 
f"""
SELECT
  Year,
   Order_desc,
   order_code,  
   CASE WHEN child_age < 0 
         OR child_age > 17 
         OR child_age IS NULL
        THEN 'Unknown'
    ELSE CAST(child_age AS VARCHAR) END
      AS child_age,
   COUNT(*) AS Count
FROM
  __temp__.priv_sgo_cao_age
WHERE
  country = 'England'
GROUP BY
  Year,
  Order_desc,
  order_code,
  CASE WHEN child_age < 0 
         OR child_age > 17 
         OR child_age IS NULL
        THEN 'Unknown'
    ELSE CAST(child_age AS VARCHAR) END
  
""",

"priv_sgo_cao_age_agg")

In [None]:
#Orders made - adding a country variable
# Age calculation copied from apps code
# Alternative setup without filtering country - used for QA
print("aggregating order level dataset....")
pydb.create_temp_table( 
f"""
SELECT
  Year,
   Order_desc,
   order_code,  
   Floor((date_diff('day',cast(dob as date),cast(receipt_date as date)))/365.25) AS child_age,
   CASE WHEN disposal_region = 'WALES' 
         THEN 'Wales'
         ELSE 'England' END 
      AS country,
   COUNT(*) AS Count
FROM
  {db2}.ca_disps_all_children
WHERE
  year BETWEEN 2011 AND 2023  
  AND disp_type_code = 1
  AND order_case_type = 'P'
  AND order_code = 27
GROUP BY
  Year,
  Order_desc,
  order_code,
  Floor((date_diff('day',cast(dob as date),cast(receipt_date as date)))/365.25),
  CASE WHEN disposal_region = 'WALES' 
         THEN 'Wales'
         ELSE 'England' END
  
""",

"priv_sgo_cao_age_alt")

In [None]:
pub_data = pydb.read_sql_query("SELECT * FROM __temp__.priv_sgo_cao_age_agg")

In [None]:
pub_df = pd.DataFrame(pub_data)

In [None]:
pub_df.to_csv (r's3://alpha-family-data/Adhoc/priv_child_sgo_cao_age_county.csv', header = True, index=False)