In [None]:
import os  # for file paths
import pandas as pd
import awswrangler as wr
import pydbtools as pydb  # see https://github.com/moj-analytical-services/pydbtools

# few things for viewing dataframes better
pd.set_option("display.max_columns", 100)
pd.set_option("display.width", 900)
pd.set_option("display.max_colwidth", 200)

In [None]:
# define some variables to be used throughout the notebook
db1 = "familyman_dev_v3" #database where Familyman data is stored
db2 = "fcsq" #database where tables created as part of FCSQ processing are stored where required
#snapshot dates and publication period are set in the main run file. However, if running this notebook independently you will need to set them here
snapshot_date = "2023-05-10"

In [None]:
#Join applicants to address table
pydb.create_temp_table( 
f"""
SELECT    
  ap.year,
  ap.party_id,
  ad.postcode,
  ap.case_number
FROM
  {db2}.ca_applicants ap
  LEFT JOIN {db1}.parties p
    ON ap.party_id = p.party
  LEFT JOIN {db1}.addresses ad 
    ON p.address = ad.address 
WHERE
  main_case_type = 'P'
  AND p.mojap_snapshot_date = DATE'{snapshot_date}'
  AND ad.mojap_snapshot_date = DATE'{snapshot_date}'
  
""",
    
  
"priv_app_postcode")

In [None]:
#Join the postcode to the kinship applicants data
pydb.create_temp_table( 
f"""
SELECT    
  DISTINCT
  ap.year,
  ad.party_id,
  ad.postcode
FROM
  __temp__.priv_res_kinship ap
  LEFT JOIN __temp__.priv_app_postcode ad
    ON ap.case_number = ad.case_number

  
""",
    
  
"priv_app_postcode_2")

In [None]:
pydb.create_temp_table( 
f"""
SELECT 
  year,
  COUNT (*) AS app_count,
  SUM (CASE WHEN postcode IS NOT NULL THEN 1 ELSE 0 END) AS postcode_count
FROM 
  __temp__.priv_app_postcode_2
GROUP BY
  Year
  
""",
    
"priv_app_postcode_count")

In [None]:
pydb.read_sql_query ("select * from __temp__.priv_app_postcode_count order by year")