In [None]:
import pandas as pd  # a module which provides the data structures and functions to store and manipulate tables in dataframes
import pydbtools as pydb  # A module which allows SQL queries to be run on the Analytical Platform from Python, see https://github.com/moj-analytical-services/pydbtools
import boto3  # allows you to directly create, update, and delete AWS resources from Python scripts

# sets parameters to view dataframes for tables easier
pd.set_option("display.max_columns", 100)
pd.set_option("display.width", 900)
pd.set_option("display.max_colwidth", 200)

In [None]:
#Variables to be used in this notebook

#this is the athena database we will be storing our tables in
fcsq_database = "fcsq"

#this is the s3 bucket we will be saving data to
s3 = boto3.resource("s3")
bucket = s3.Bucket("alpha-family-data")

#Last full year to be published - including this publication
annual_year = 2023

#Current publication variables
current_year = 2024
current_quarter = 2

#Next publication variables
next_quarter_year = 2024 
next_quarter = 3

In [None]:
create_nisi_rep_timeliness_table =f"""
SELECT
  FM_CASE_CID,
  legal_case_id,
  Year,
  Quarter,
  region,
  pet_rep,
  resp_rep,
  both_rep,
  neither_rep,
  CASE WHEN both_rep = 1 THEN '1 Both'
       WHEN pet_rep = 1 THEN '2 Petitioner Only'
       WHEN resp_rep = 1 THEN '3 Respondent Only'
       WHEN neither_rep = 1 THEN '4 Neither'
  END AS Representation,
  app_to_nisi_weeks
  
FROM fcsq.divorce_fct_nisi

WHERE Proceeding_type_code in ('D','N') AND 
      Law = 'Old' AND
      Year > 2010 AND 
      NOT (Year = {next_quarter_year} AND Quarter = {next_quarter});
"""
pydb.create_temp_table(create_nisi_rep_timeliness_table,'nisi_rep_timeliness')

In [None]:
test = pydb.read_sql_query("SELECT * from __temp__.nisi_rep_timeliness LIMIT 10")
test

In [None]:
#pydb.delete_table_and_data(database="__temp__", table="nisi_rep_timeliness")

In [None]:
create_nisi_rep_timeliness_region_table =f"""
SELECT
  Case_Type,
  Quarter,
  region, 
  representation,
  COUNT (*) AS number_of_disposals,    
  ROUND(AVG(CASE WHEN app_to_nisi_weeks >= 0 THEN app_to_nisi_weeks END),1) AS mean_duration,
  ROUND((CASE WHEN COUNT(*) % 2 = 0
              THEN (MAX(CASE WHEN data_half = 1 THEN app_to_nisi_weeks END) + MIN(CASE WHEN data_half = 2 THEN app_to_nisi_weeks END)) /2.0 
              ELSE MAX(CASE WHEN data_half = 1 THEN app_to_nisi_weeks END)
         END), 1) AS median_duration    
    
FROM (

SELECT
  'Divorce (inc. Financial Remedy)' as Case_type,
   CONCAT(CAST(Year AS VARCHAR), '-Q', CAST(Quarter AS VARCHAR)) AS Quarter,    
   Region,
   Representation,
   app_to_nisi_weeks,
   NTILE(2) OVER (PARTITION BY CONCAT(CAST(Year AS VARCHAR), '-Q', CAST(Quarter AS VARCHAR)), region, representation 
                   ORDER BY app_to_nisi_weeks) AS data_half

FROM __temp__.nisi_rep_timeliness)
    
GROUP BY Case_Type, Quarter, Region, Representation;
"""
pydb.create_temp_table(create_nisi_rep_timeliness_region_table,'nisi_rep_timeliness_region')

In [None]:
test = pydb.read_sql_query("SELECT * from __temp__.nisi_rep_timeliness_region LIMIT 10")
test

In [None]:
#pydb.delete_table_and_data(database="__temp__", table="nisi_rep_timeliness_region")

In [None]:
create_nisi_rep_timeliness_region_ew_table =f"""
SELECT
  Case_Type,
  Quarter,
  region, 
  representation,
  COUNT (*) AS number_of_disposals,    
  ROUND(AVG(CASE WHEN app_to_nisi_weeks >= 0 THEN app_to_nisi_weeks END),1) AS mean_duration,
  ROUND((CASE WHEN COUNT(*) % 2 = 0
              THEN (MAX(CASE WHEN data_half = 1 THEN app_to_nisi_weeks END) + MIN(CASE WHEN data_half = 2 THEN app_to_nisi_weeks END)) /2.0 
              ELSE MAX(CASE WHEN data_half = 1 THEN app_to_nisi_weeks END)
         END), 1) AS median_duration    
    
FROM (

SELECT
  'Divorce (inc. Financial Remedy)' as Case_type,
   CONCAT(CAST(Year AS VARCHAR), '-Q', CAST(Quarter AS VARCHAR)) AS Quarter,    
   'England & Wales' AS Region,
   Representation,
   app_to_nisi_weeks,
   NTILE(2) OVER (PARTITION BY CONCAT(CAST(Year AS VARCHAR), '-Q', CAST(Quarter AS VARCHAR)), representation 
                   ORDER BY app_to_nisi_weeks) AS data_half

FROM __temp__.nisi_rep_timeliness)
    
GROUP BY Case_Type, Quarter, Region, Representation;
"""
pydb.create_temp_table(create_nisi_rep_timeliness_region_ew_table,'nisi_rep_timeliness_region_ew')

In [None]:
test = pydb.read_sql_query("SELECT * from __temp__.nisi_rep_timeliness_region_ew LIMIT 10")
test

In [None]:
#pydb.delete_table_and_data(database="__temp__", table="nisi_rep_timeliness_region_ew")

In [None]:
create_div_table_10_lookup_annual_table =f"""
SELECT
  CONCAT('Old Divorce (incl. annulment and FR)|', CAST(Year AS VARCHAR),'|') as lookup,
  SUM(both_rep) AS both_n,
  ROUND(AVG(CASE WHEN both_rep = 1 THEN app_to_nisi_weeks END),3) AS both_mean,
  SUM(pet_rep) AS applicant_n,
  ROUND(AVG(CASE WHEN pet_rep = 1 THEN app_to_nisi_weeks END),3) AS applicant_mean,
  SUM(resp_rep) AS respondent_n,
  ROUND (AVG(CASE WHEN resp_rep = 1 THEN app_to_nisi_weeks END),3) AS respondent_mean,
  SUM (neither_rep) AS neither_n,
  ROUND (AVG(CASE WHEN neither_rep = 1 THEN app_to_nisi_weeks END),3) AS neither_mean,
  '.' AS unknown_n,
  '.' AS unknown_mean,
  COUNT(*) AS all_n,
  ROUND(AVG(app_to_nisi_weeks),3) AS all_mean
    
FROM __temp__.nisi_rep_timeliness

WHERE Year BETWEEN 2011 and {annual_year}

GROUP BY Year;
"""
pydb.create_temp_table(create_div_table_10_lookup_annual_table,'div_table_10_lookup_annual')

In [None]:
test = pydb.read_sql_query("SELECT * from __temp__.div_table_10_lookup_annual LIMIT 10")
test

In [None]:
#pydb.delete_table_and_data(database="__temp__", table="div_table_10_lookup_annual")

In [None]:
create_div_table_10_lookup_qtr_table =f"""
SELECT
  CONCAT('Old Divorce (incl. annulment and FR)|',CAST(Year AS VARCHAR),'|Q', CAST(Quarter AS VARCHAR)) as lookup,
  SUM(both_rep) AS both_n,
  ROUND(AVG(CASE WHEN both_rep = 1 THEN app_to_nisi_weeks END),3) AS both_mean,
  SUM(pet_rep) AS applicant_n,
  ROUND(AVG(CASE WHEN pet_rep = 1 THEN app_to_nisi_weeks END),3) AS applicant_mean,
  SUM(resp_rep) AS respondent_n,
  ROUND (AVG(CASE WHEN resp_rep = 1 THEN app_to_nisi_weeks END),3) AS respondent_mean,
  SUM (neither_rep) AS neither_n,
  ROUND (AVG(CASE WHEN neither_rep = 1 THEN app_to_nisi_weeks END),3) AS neither_mean,
  '.' AS unknown_n,
  '.' AS unknown_mean,
  COUNT(*) AS all_n,
  ROUND(AVG(app_to_nisi_weeks),3) AS all_mean
    
FROM __temp__.nisi_rep_timeliness

WHERE NOT (Year = {next_quarter_year} AND Quarter = {next_quarter})

GROUP BY Year, Quarter

ORDER BY Year, Quarter;
"""
pydb.create_temp_table(create_div_table_10_lookup_qtr_table,'div_table_10_lookup_qtr')

In [None]:
test = pydb.read_sql_query("SELECT * from __temp__.div_table_10_lookup_qtr LIMIT 10")
test

In [None]:
#pydb.delete_table_and_data(database="__temp__", table="div_table_10_lookup_qtr")

In [None]:
drop_div_table_10_lookup = "DROP TABLE IF EXISTS fcsq.div_table_10_lookup"
pydb.start_query_execution_and_wait(drop_div_table_10_lookup)
bucket.objects.filter(Prefix="fcsq_processing/Divorce/div_table_10_lookup").delete();

In [None]:
create_div_table_10_lookup_table =f"""
CREATE TABLE IF NOT EXISTS fcsq.div_table_10_lookup
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Divorce/div_table_10_lookup') AS
SELECT *
FROM
(SELECT * FROM __temp__.div_table_10_lookup_annual
UNION ALL
SELECT * FROM __temp__.div_table_10_lookup_qtr)
ORDER BY lookup;
"""
pydb.start_query_execution_and_wait(create_div_table_10_lookup_table)

In [None]:
df = pydb.read_sql_query("select * from fcsq.div_table_10_lookup;")
df.to_csv(path_or_buf = 's3://alpha-family-data/fcsq_processing/Divorce/sdp_process/div_table_10_lookup.csv',index=False)


In [None]:
test = pydb.read_sql_query("SELECT * from fcsq.div_table_10_lookup LIMIT 10")
test

In [None]:
#test = pydb.read_sql_query("SELECT count(*) as row_count from fcsq.div_table_10_lookup")
#test

In [None]:
#pydb.delete_table_and_data(database="__temp__", table="div_table_10_lookup")

In [None]:
drop_nisi_rep_timeliness = "DROP TABLE IF EXISTS fcsq.nisi_rep_timeliness"
pydb.start_query_execution_and_wait(drop_nisi_rep_timeliness)
bucket.objects.filter(Prefix="fcsq_processing/Divorce/nisi_rep_timeliness").delete();


In [None]:
create_nisi_rep_timeliness_table =f"""
CREATE TABLE IF NOT EXISTS fcsq.nisi_rep_timeliness
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Divorce/nisi_rep_timeliness') AS
SELECT * FROM __temp__.nisi_rep_timeliness_region
UNION ALL
SELECT * FROM __temp__.nisi_rep_timeliness_region_ew;
"""
pydb.start_query_execution_and_wait(create_nisi_rep_timeliness_table)

In [None]:
df = pydb.read_sql_query("select * from fcsq.nisi_rep_timeliness;")
df.to_csv(path_or_buf = 's3://alpha-family-data/fcsq_processing/Divorce/sdp_process/nisi_rep_timeliness.csv',index=False)


In [None]:
#test = pydb.read_sql_query("SELECT * from fcsq.nisi_rep_timeliness LIMIT 10")
#test

In [None]:
#pydb.delete_table_and_data(database="__temp__", table="nisi_rep_timeliness")

In [None]:
#pydb.delete_database_and_data("__temp__")

In [None]:
create_represent_divorce_fct_table =f"""
SELECT fm_case_cid,
       legal_case_id,
       year,
       quarter,
       region,
       nfd_app_type,
       pet_rep,
       resp_rep,   
       both_rep,
       neither_rep,
       CASE WHEN pet_rep = 1 THEN 'First/Sole Applicant represented - Second Applicant/Respondent not represented'
            WHEN resp_rep = 1 THEN 'First/Sole Applicant not represented - Second Applicant/Respondent represented'
            WHEN both_rep = 1 THEN 'First/Sole Applicant represented - Second Applicant/Respondent represented'
            WHEN neither_rep = 1 THEN 'First/Sole Applicant not represented - Second Applicant/Respondent not represented'
      END AS representation,
      app_to_nisi_weeks
      
FROM fcsq.divorce_fct_nisi

WHERE Proceeding_type_code in ('D','N') AND
      law = 'New';
"""
pydb.create_temp_table(create_represent_divorce_fct_table,'represent_divorce_fct')

In [None]:
create_represent_divorce_fct_table =f"""
SELECT fm_case_cid,
       legal_case_id,
       year,
       quarter,
       region,
       nfd_app_type,
       pettnr_reprsntd_cind,
       respndnt_reprsntd_cind,
       CASE WHEN pettnr_reprsntd_cind = 'Yes' AND respndnt_reprsntd_cind = 'No' THEN 1 
            ELSE 0
       END AS pet_rep,
       CASE WHEN pettnr_reprsntd_cind = 'No' AND respndnt_reprsntd_cind = 'Yes' THEN 1 
           ELSE 0
       END AS resp_rep,   
       CASE WHEN pettnr_reprsntd_cind = 'Yes' AND respndnt_reprsntd_cind = 'Yes' THEN 1 
            ELSE 0
       END AS both_rep,
       CASE WHEN pettnr_reprsntd_cind = 'No' AND respndnt_reprsntd_cind = 'No' THEN 1 
            ELSE 0
       END AS neither_rep,
       CASE WHEN pettnr_reprsntd_cind = 'Yes' AND respndnt_reprsntd_cind = 'No' THEN 'First/Sole Applicant represented - Second Applicant/Respondent not represented'
            WHEN pettnr_reprsntd_cind = 'No' AND respndnt_reprsntd_cind = 'Yes' THEN 'First/Sole Applicant not represented - Second Applicant/Respondent represented'
            WHEN pettnr_reprsntd_cind = 'Yes' AND respndnt_reprsntd_cind = 'Yes' THEN 'First/Sole Applicant represented - Second Applicant/Respondent represented'
            WHEN pettnr_reprsntd_cind = 'No' AND respndnt_reprsntd_cind = 'No' THEN 'First/Sole Applicant not represented - Second Applicant/Respondent not represented'
       END AS Representation,
       app_to_nisi_weeks    
FROM fcsq.divorce_fct_nisi

WHERE Proceeding_type_code in ('D','N') AND
      law = 'New';
"""
pydb.create_temp_table(create_represent_divorce_fct_table,'represent_divorce_fct')


In [None]:
test = pydb.read_sql_query("SELECT * from __temp__.represent_divorce_fct LIMIT 10")
test

In [None]:
#pydb.delete_table_and_data(database="__temp__", table="represent_divorce_fct")

In [None]:
create_div_table_10b_lookup_annual_table =f"""

SELECT CONCAT(NFD_APP_TYPE,'|',CAST("Year" AS VARCHAR),'|') as lookup,
       SUM (both_rep) AS Both_n,
       ROUND (AVG(CASE WHEN both_rep = 1 THEN app_to_nisi_weeks END),3) AS Both_mean,
       SUM (pet_rep) AS Applicant_n,
       ROUND (AVG(CASE WHEN pet_rep = 1 THEN app_to_nisi_weeks END),3) AS Applicant_mean,
       SUM (resp_rep) AS Respondent_n,
       ROUND (AVG(CASE WHEN resp_rep = 1 THEN app_to_nisi_weeks END),3) AS Respondent_mean,
       SUM (neither_rep) AS Neither_n,
       ROUND (AVG(CASE WHEN neither_rep = 1 THEN app_to_nisi_weeks END),3) AS Neither_mean,
       '.' AS Unknown_N,
       '.' AS Unknown_mean,
      COUNT(*) AS All_n,
      ROUND(AVG(app_to_nisi_weeks),3) AS All_mean

FROM __temp__.represent_divorce_fct

WHERE Year BETWEEN 2011 and {annual_year}

GROUP BY Year, NFD_APP_TYPE
  
UNION ALL

SELECT CONCAT('All|',CAST("Year" AS VARCHAR),'|') as lookup,
       SUM (both_rep) AS Both_n,
       ROUND (AVG(CASE WHEN both_rep = 1 THEN app_to_nisi_weeks END),3) AS Both_mean,
       SUM (pet_rep) AS Applicant_n,
       ROUND (AVG(CASE WHEN pet_rep = 1 THEN app_to_nisi_weeks END),3) AS Applicant_mean,
       SUM (resp_rep) AS Respondent_n,
       ROUND (AVG(CASE WHEN resp_rep = 1 THEN app_to_nisi_weeks END),3) AS Respondent_mean,
       SUM (neither_rep) AS Neither_n,
       ROUND (AVG(CASE WHEN neither_rep = 1 THEN app_to_nisi_weeks END),3) AS Neither_mean,
       '.' AS Unknown_N,
       '.' AS Unknown_mean,
       COUNT(*) AS All_n,
       ROUND(AVG(app_to_nisi_weeks),3) AS All_mean

FROM __temp__.represent_divorce_fct

WHERE  Year BETWEEN 2011 and {annual_year}

GROUP BY Year;
"""
pydb.create_temp_table(create_div_table_10b_lookup_annual_table,'div_table_10b_lookup_annual')

In [None]:
test = pydb.read_sql_query("SELECT * from __temp__.div_table_10b_lookup_annual LIMIT 10")
test

In [None]:
#pydb.delete_table_and_data(database="__temp__", table="div_table_10b_lookup_annual")

In [None]:
create_div_table_10b_lookup_qtr_table =f"""

SELECT
    CONCAT(NFD_APP_TYPE,'|', CAST("Year" AS VARCHAR),'|Q', CAST("Quarter" AS VARCHAR)) as lookup,
    SUM (both_rep) AS Both_n,
    ROUND (AVG(CASE WHEN both_rep = 1 THEN app_to_nisi_weeks END),3) AS Both_mean,
    SUM (pet_rep) AS Applicant_n,
    ROUND (AVG(CASE WHEN pet_rep = 1 THEN app_to_nisi_weeks END),3) AS Applicant_mean,
    SUM (resp_rep) AS Respondent_n,
    ROUND (AVG(CASE WHEN resp_rep = 1 THEN app_to_nisi_weeks END),3) AS Respondent_mean,
    SUM (neither_rep) AS Neither_n,
    ROUND (AVG(CASE WHEN neither_rep = 1 THEN app_to_nisi_weeks END),3) AS Neither_mean,
    '.' AS Unknown_N,
    '.' AS Unknown_mean,
    COUNT(*) AS All_n,
   ROUND(AVG(app_to_nisi_weeks),3) AS All_mean

FROM __temp__.represent_divorce_fct

WHERE  "Year" > 2010 AND 
        NOT ("Year" = {next_quarter_year} AND Quarter = {next_quarter})

GROUP BY "Year", "Quarter", NFD_APP_TYPE
  
UNION ALL

SELECT CONCAT('All|', CAST("Year" AS VARCHAR),'|Q', CAST("Quarter" AS VARCHAR)) as lookup,
       SUM (both_rep) AS Both_n,
       ROUND (AVG(CASE WHEN both_rep = 1 THEN app_to_nisi_weeks END),3) AS Both_mean,
       SUM (pet_rep) AS Applicant_n,
       ROUND (AVG(CASE WHEN pet_rep = 1 THEN app_to_nisi_weeks END),3) AS Applicant_mean,
       SUM (resp_rep) AS Respondent_n,
       ROUND (AVG(CASE WHEN resp_rep = 1 THEN app_to_nisi_weeks END),3) AS Respondent_mean,
       SUM (neither_rep) AS Neither_n,
       ROUND (AVG(CASE WHEN neither_rep = 1 THEN app_to_nisi_weeks END),3) AS Neither_mean,
       '.' AS Unknown_N,
       '.' AS Unknown_mean,
       COUNT(*) AS All_n,
      ROUND(AVG(app_to_nisi_weeks),3) AS All_mean

FROM __temp__.represent_divorce_fct

WHERE "Year"  > 2010 AND
       NOT ("Year" = {next_quarter_year} AND Quarter = {next_quarter})

GROUP BY "Year", "Quarter"; 
  """
pydb.create_temp_table(create_div_table_10b_lookup_qtr_table,'div_table_10b_lookup_qtr')

In [None]:
test = pydb.read_sql_query("SELECT * from __temp__.div_table_10b_lookup_qtr LIMIT 10")
test

In [None]:
#pydb.delete_table_and_data(database="__temp__", table="div_table_10b_lookup_qtr")

In [None]:
drop_div_table_10b_lookup = "DROP TABLE IF EXISTS fcsq.div_table_10b_lookup"
pydb.start_query_execution_and_wait(drop_div_table_10b_lookup)
bucket.objects.filter(Prefix="fcsq_processing/Divorce/div_table_10b_lookup").delete();

In [None]:
create_DIV_TABLE_10b_LOOKUP_table =f"""
CREATE TABLE IF NOT EXISTS fcsq.div_table_10b_lookup
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Divorce/div_table_10b_lookup') AS
SELECT *
FROM
(SELECT * FROM __temp__.div_table_10b_lookup_annual
UNION ALL
SELECT * FROM __temp__.div_table_10b_lookup_qtr)
ORDER BY lookup;
"""
pydb.start_query_execution_and_wait(create_DIV_TABLE_10b_LOOKUP_table)

In [None]:
df = pydb.read_sql_query("select * from fcsq.div_table_10b_lookup;")
df.to_csv(path_or_buf = 's3://alpha-family-data/fcsq_processing/Divorce/sdp_process/div_table_10b_lookup.csv',index=False)

In [None]:
test = pydb.read_sql_query("SELECT * from fcsq.DIV_TABLE_10b_LOOKUP LIMIT 10")
test

In [None]:
create_div_leg_rep_table =f"""

SELECT 'Divorce' AS Case_type,
        Year,
        Quarter,
        'Cases' AS Category,
         null AS Party,
         null AS Gender,
         null as Representation
         
FROM fcsq.divorce_fct_petitions 

WHERE Proceeding_type_code = 'D' AND 
      Law = 'Old'
      
UNION ALL

SELECT 'Divorce' AS Case_type,
        Year,
        Quarter,
        'Cases with a hearing' AS Category,
        null AS Party,
        null AS Gender,
        null as Representation
        
FROM fcsq.divorce_fct_petitions 

WHERE (First_divorce_hrng_ind = 1 OR First_fr_hrng_ind = 1) AND
      Law = 'Old'
      
UNION ALL

SELECT 'Divorce' AS Case_type,
        Year,
        Quarter,
        'Party' AS Category,
        'Applicant' AS Party,
         Petitioners_gender AS Gender,
         CASE WHEN petnr_reprsntd_ind = 1 THEN 'Y'
             ELSE 'N' 
         END AS Representation

FROM fcsq.divorce_fct_petitions 

WHERE (First_divorce_hrng_ind = 1 OR First_fr_hrng_ind = 1) AND
      Law = 'Old'
      
UNION ALL

SELECT 'Divorce' AS Case_type,
       Year,
       Quarter,
       'Party' AS Category,
       'Respondent' AS Party,
       Respondents_gender AS Gender,
       CASE WHEN respndnt_reprsntd_ind = 1 
            THEN 'Y'
       ELSE 'N' END as Representation

FROM fcsq.divorce_fct_petitions 

WHERE (First_divorce_hrng_ind = 1 OR First_fr_hrng_ind = 1) AND
      Law = 'Old';
"""
pydb.create_temp_table(create_div_leg_rep_table,'div_leg_rep')

In [None]:
test = pydb.read_sql_query("SELECT * from __temp__.div_leg_rep LIMIT 10")
test

In [None]:
#pydb.delete_table_and_data(database="__temp__", table="div_leg_rep")

In [None]:
create_div_leg_rep_csv_table =f"""
SELECT Case_type,
       Year,
       Quarter,
       Category,
       Party,
       Gender,
       Representation,
       Count(*) AS Count
       
FROM __temp__.div_leg_rep

WHERE "Year" > 2010
      
GROUP BY Case_type, Year, Quarter, Category, Party, Gender, Representation

ORDER BY Category, Year, Quarter;    
"""
pydb.create_temp_table(create_div_leg_rep_csv_table,'div_leg_rep_csv')

In [None]:
test = pydb.read_sql_query("SELECT * from __temp__.div_leg_rep_csv LIMIT 10")
test

In [None]:
create_div_table_11_lookup_annual_table =f"""
SELECT CONCAT('Divorce (incl. FR)|',CAST ("Year" AS VARCHAR),'|') as lookup,
        SUM (CASE WHEN Category = 'Cases' THEN count END) AS Cases,
        SUM (CASE WHEN Category = 'Cases with a hearing' THEN count END) AS Cases_hearing,
        SUM (CASE WHEN Party = 'Applicant' AND Representation = 'Y' THEN count END) AS app_rep,
        SUM (CASE WHEN Party = 'Applicant' AND Representation = 'N' THEN count END) AS app_unrep,
        SUM (CASE WHEN Party = 'Respondent' AND Representation = 'Y' THEN count END) AS res_rep,
        SUM (CASE WHEN Party = 'Respondent' AND Representation = 'N' THEN count END) AS res_unrep,
        SUM (CASE WHEN Party is not null THEN count END) AS total_parties  

FROM __temp__.div_leg_rep_csv

WHERE Year <= {annual_year}

GROUP BY "Year"

ORDER BY "Year"
;
"""
pydb.create_temp_table(create_div_table_11_lookup_annual_table,'div_table_11_lookup_annual')

In [None]:
test = pydb.read_sql_query("SELECT * from __temp__.div_table_11_lookup_annual LIMIT 10 ")
test

In [None]:
create_div_table_11_lookup_qtr_table =f"""
SELECT CONCAT('Divorce (incl. FR)|', CAST("Year" AS VARCHAR),'|Q', CAST("Quarter" AS VARCHAR)) AS lookup,
       SUM (CASE WHEN Category = 'Cases' THEN count END) AS Cases,
       SUM (CASE WHEN Category = 'Cases with a hearing' THEN count END) AS Cases_hearing,
       SUM (CASE WHEN Party = 'Applicant' AND Representation = 'Y' THEN count END) AS app_rep,
       SUM (CASE WHEN Party = 'Applicant' AND Representation = 'N' THEN count END) AS app_unrep,
       SUM (CASE WHEN Party = 'Respondent' AND Representation = 'Y' THEN count END) AS res_rep,
       SUM (CASE WHEN Party = 'Respondent' AND Representation = 'N' THEN count END) AS res_unrep,
       SUM (CASE WHEN Party is not null THEN count END) AS total_parties  

FROM __temp__. div_leg_rep_csv

WHERE  NOT (Year = {next_quarter_year} AND Quarter = {next_quarter})

GROUP BY "Year", "Quarter"

ORDER BY "Year", "Quarter";
"""
pydb.create_temp_table(create_div_table_11_lookup_qtr_table,'div_table_11_lookup_qtr')


In [None]:
test = pydb.read_sql_query("SELECT * from __temp__.div_table_11_lookup_qtr LIMIT 10")
test

In [None]:
#pydb.delete_table_and_data(database="__temp__", table="div_table_11_lookup_qtr")

In [None]:
drop_div_leg_rep_csv = "DROP TABLE IF EXISTS fcsq.div_leg_rep_csv"
pydb.start_query_execution_and_wait(drop_div_leg_rep_csv)
bucket.objects.filter(Prefix="fcsq_processing/Divorce/div_leg_rep_csv").delete();

In [None]:
create_div_leg_rep_csv_table =f"""
CREATE TABLE IF NOT EXISTS fcsq.div_leg_rep_csv
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Divorce/div_leg_rep_csv') AS
SELECT *
FROM __temp__.div_leg_rep_csv;
"""
pydb.start_query_execution_and_wait(create_div_leg_rep_csv_table)

In [None]:
df = pydb.read_sql_query("select * from fcsq.div_leg_rep_csv;")
df.to_csv(path_or_buf = 's3://alpha-family-data/fcsq_processing/Divorce/sdp_process/div_leg_rep_csv.csv',index=False)

In [None]:
test = pydb.read_sql_query("SELECT * from fcsq.div_leg_rep_csv LIMIT 10")
test

In [None]:
drop_div_table_11_lookup = "DROP TABLE IF EXISTS fcsq.div_table_11_lookup"
pydb.start_query_execution_and_wait(drop_div_table_11_lookup)
bucket.objects.filter(Prefix="fcsq_processing/Divorce/div_table_11_lookup").delete();

In [None]:
create_div_table_11_lookup_table =f"""
CREATE TABLE IF NOT EXISTS fcsq.div_table_11_lookup
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Divorce/div_table_11_lookup') AS
SELECT * 

FROM (SELECT *
      FROM __temp__.div_table_11_lookup_annual
      UNION ALL
      SELECT *
      FROM __temp__.div_table_11_lookup_qtr)

ORDER BY Lookup;
"""
pydb.start_query_execution_and_wait(create_div_table_11_lookup_table)

In [None]:
df = pydb.read_sql_query("select * from fcsq.div_table_11_lookup;")
df.to_csv(path_or_buf = 's3://alpha-family-data/fcsq_processing/Divorce/sdp_process/div_table_11_lookup.csv',index=False)

In [None]:
test = pydb.read_sql_query("SELECT * from fcsq.div_table_11_lookup LIMIT 10")
test