In [None]:
!pip install --user pydbtools==5.6.4

In [None]:
#Command to check the version of pydbtools
!pip freeze | grep pydbtools

In [None]:
import pandas as pd  # a module which provides the data structures and functions to store and manipulate tables in dataframes
import pydbtools as pydb  # A module which allows SQL queries to be run on the Analytical Platform from Python, see https://github.com/moj-analytical-services/pydbtools
import boto3  # allows you to directly create, update, and delete AWS resources from Python scripts

# sets parameters to view dataframes for tables easier
pd.set_option("display.max_columns", 100)
pd.set_option("display.width", 900)
pd.set_option("display.max_colwidth", 200)

In [None]:
#Variables to be used in this notebook

#this is the athena database we will be storing our tables in
fcsq_database = "fcsq"

#this is the s3 bucket we will be saving data to
s3 = boto3.resource("s3")
bucket = s3.Bucket("alpha-family-data")

#Last full year to be published - including this publication
annual_year = 2023

#Current publication variables
current_year = 2024
current_quarter = 2

#Next publication variables
next_quarter_year = 2024 
next_quarter = 3

In [None]:
#imports SDP extract data from S3 bucket into a temporary table
sdp_divorce_data_table = pd.read_csv("s3://alpha-family-data/fcsq_processing/Divorce/sdp_process/sdp_divorce_data_raw_2024q3.csv", low_memory=False)
#Converting selected columns to datetime       
sdp_divorce_data_table['case_recpt_date'] = pd.to_datetime(sdp_divorce_data_table['case_recpt_date'],  errors = 'coerce', format = '%Y-%m-%d %H:%M:%S')
sdp_divorce_data_table['dn_proncd_date'] = pd.to_datetime(sdp_divorce_data_table['dn_proncd_date'],  errors = 'coerce', format = '%Y-%m-%d %H:%M:%S')
sdp_divorce_data_table['da_grntd_date'] = pd.to_datetime(sdp_divorce_data_table['da_grntd_date'],  errors = 'coerce', format = '%Y-%m-%d %H:%M:%S')

pydb.dataframe_to_temp_table(sdp_divorce_data_table, "sdp_divorce_data");


In [None]:
#test = pydb.read_sql_query("SELECT * from __temp__.sdp_divorce_data limit 10")
#test

In [None]:
#record count check in extracted data
#sdp_divorce_data_count = pydb.read_sql_query("SELECT count(*) as count from __temp__.sdp_divorce_data")
#sdp_divorce_data_count

In [None]:
#pydb.delete_table_and_data(database="__temp__", table="sdp_divorce_data")

In [None]:
#Imports location information from S3 bucket into a temporary table
create_div_court_dfj_region_lookup_table = pd.read_csv("s3://alpha-family-data/fcsq_processing/lookups/div_court_dfj_region_lookup.csv", low_memory=False)

pydb.dataframe_to_temp_table(create_div_court_dfj_region_lookup_table, "div_court_dfj_region_lookup");


In [None]:
#test = pydb.read_sql_query("SELECT * from __temp__.div_court_dfj_region_lookup LIMIT 10")
#test

In [None]:
#pydb.delete_table_and_data(database="__temp__", table="#pydb.delete_table_and_data(database="__temp__", table="sdp_divorce_data")")

In [None]:
#Creating a table holding petition / application information.

In [None]:
drop_divorce_fct_petitions = "DROP TABLE IF EXISTS fcsq.divorce_fct_petitions"
pydb.start_query_execution_and_wait(drop_divorce_fct_petitions)
bucket.objects.filter(Prefix="fcsq_processing/Divorce/divorce_fct_petitions").delete();


In [None]:
create_divorce_fct_petitions_table =f"""
CREATE TABLE IF NOT EXISTS fcsq.divorce_fct_petitions
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Divorce/divorce_fct_petitions') AS
SELECT  'Petition' As stage,
        CAST(a.pet_year AS INT) AS year,
        EXTRACT(QUARTER FROM a.case_recpt_date) AS quarter,
        EXTRACT(MONTH FROM a.case_recpt_date) AS month,
        a.fm_case_cid,
        a.legal_case_id,
        a.digital_paper,
        a.respndnt_reprsntd_ind,
        a.petnr_reprsntd_ind,
        a.case_recpt_date AS event_date,
        a.case_issd_date,
        a.case_type_descriptor AS reason,
        a.proceeding_type,
        a.proceeding_type_code,
        CASE    WHEN a.pettnr_gender_type_name = 'female' THEN 'Female'
                WHEN a.pettnr_gender_type_name = 'male' THEN 'Male'
                WHEN a.pettnr_gender_type_name = 'other' THEN 'Other/unknown'
                WHEN a.pettnr_gender_type_name = 'notGiven' THEN 'Other/unknown'
                ELSE 'check' 
        END AS Petitioners_gender,
        CASE   WHEN a.respndnt_gender_type_name = 'female' THEN 'Female'
               WHEN a.respndnt_gender_type_name = 'male' THEN 'Male'
               WHEN a.respndnt_gender_type_name = 'other' THEN 'Other/unknown'
               WHEN a.respndnt_gender_type_name = 'notGiven' THEN 'Other/unknown'
               ELSE 'check' 
        END AS respondents_gender,   
        a.pet_location_key as location_key,
        a.first_fr_hrng_ind,
        a.first_divorce_hrng_ind,
        a.law,
        a.nfd_app_type,
        a.new_case_type,
        b.location_name as court,
        b.dfj_lookup as dfj_area,
        b.region_lookup as region
        
FROM __temp__.sdp_divorce_data AS a
  LEFT JOIN __temp__.div_court_dfj_region_lookup AS b
    ON a.pet_location_key = b.location_key 

WHERE a.pet_ind = 1;
"""
pydb.start_query_execution_and_wait(create_divorce_fct_petitions_table)

In [None]:
#test = pydb.read_sql_query("SELECT * from fcsq.divorce_fct_petitions LIMIT 10")
#test

In [None]:
#pydb.delete_table_and_data(database="__temp__", table="divorce_fct_petitions")

In [None]:
#Creating a table holding nisi / conditional order information.

In [None]:
drop_divorce_fct_nisi = "DROP TABLE IF EXISTS fcsq.divorce_fct_nisi"
pydb.start_query_execution_and_wait(drop_divorce_fct_nisi)
bucket.objects.filter(Prefix="fcsq_processing/Divorce/divorce_fct_nisi").delete();

In [None]:
create_divorce_fct_nisi_table =f"""
CREATE TABLE IF NOT EXISTS fcsq.divorce_fct_nisi
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Divorce/divorce_fct_nisi') AS
SELECT  CASE    WHEN a.proceeding_type_code IN ('D','N') THEN 'Decree Nisi'
                WHEN a.proceeding_type_code = 'J' THEN 'Judicial Separations Granted'
        END As stage,
        CAST(a.nisi_year AS INT) AS year,
        EXTRACT(QUARTER FROM a.dn_proncd_date) AS quarter,
        EXTRACT(MONTH FROM a.dn_proncd_date) AS month,
        a.fm_case_cid,
        a.legal_case_id,
        a.digital_paper,
        a.dn_proncd_date AS event_date,
        a.pet_to_nisi,
        a.proceeding_type,
        a.proceeding_type_code,
        a.contested,   
        a.nisi_location_key as location_key,
        a.first_fr_hrng_ind,
        a.first_divorce_hrng_ind,
        a.law,
        a.nfd_app_type,
        a.new_case_type,
        b.location_name as court,
        b.dfj_lookup as dfj_area,
        b.region_lookup as region,
        CASE WHEN a.petnr_reprsntd_ind = 1 AND a.respndnt_reprsntd_ind IN (0, -1) THEN 1 
             ELSE 0
        END AS pet_rep,
        CASE WHEN a.petnr_reprsntd_ind IN (0, -1) AND a.respndnt_reprsntd_ind = 1 THEN 1 
             ELSE 0
        END AS resp_rep,   
        CASE WHEN a.petnr_reprsntd_ind = 1 AND a.respndnt_reprsntd_ind = 1 THEN 1 
             ELSE 0
        END AS both_rep,
        CASE WHEN a.petnr_reprsntd_ind IN (0, -1) AND a.respndnt_reprsntd_ind IN (0, -1) THEN 1 
             ELSE 0
        END AS neither_rep,
        a.pet_to_nisi/7.0 AS app_to_nisi_weeks
                
FROM __temp__.sdp_divorce_data as a
 LEFT JOIN __temp__.div_court_dfj_region_lookup AS b
    ON a.nisi_location_key = b.location_key

WHERE a.nisi_ind = 1;
"""
pydb.start_query_execution_and_wait(create_divorce_fct_nisi_table)

In [None]:
#test = pydb.read_sql_query("SELECT * from fcsq.divorce_fct_nisi LIMIT 10")
#test

In [None]:
#test1 = pydb.read_sql_query("SELECT new_case_type,stage, count(*) as count from fcsq.divorce_fct_nisi group by new_case_type,stage ")
#test1

In [None]:
#pydb.delete_table_and_data(database="__temp__", table="divorce_fct_nisi")

In [None]:
#Creating a table holding absolute / final order information.

In [None]:
drop_divorce_fct_abs = "DROP TABLE IF EXISTS fcsq.divorce_fct_abs"
pydb.start_query_execution_and_wait(drop_divorce_fct_abs)
bucket.objects.filter(Prefix="fcsq_processing/Divorce/divorce_fct_abs").delete();

In [None]:
create_divorce_fct_abs_table =f"""
CREATE TABLE IF NOT EXISTS fcsq.divorce_fct_abs
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Divorce/divorce_fct_abs') AS
SELECT  'Decree Absolute' As stage,
        CAST(a.abs_year AS INT) AS year,
        EXTRACT(QUARTER FROM a.da_grntd_date) AS quarter,
        EXTRACT(MONTH FROM a.da_grntd_date) AS month,
        a.fm_case_cid,
        a.legal_case_id,
        a.digital_paper,
        a.da_grntd_date AS event_date,
        a.pet_to_abs,
        a.proceeding_type,
        a.proceeding_type_code,
        a.contested,   
        a.abs_location_key as location_key,
        a.first_fr_hrng_ind,
        a.first_divorce_hrng_ind,
        a.law,
        a.nfd_app_type,
        a.new_case_type,
        b.location_name as court,
        b.dfj_lookup as dfj_area,
        b.region_lookup as region
        
FROM __temp__.sdp_divorce_data as a
LEFT JOIN __temp__.div_court_dfj_region_lookup AS b
    ON a.abs_location_key = b.location_key

WHERE a.absolute_ind = 1;
"""
pydb.start_query_execution_and_wait(create_divorce_fct_abs_table)

In [None]:
#test = pydb.read_sql_query("SELECT * from fcsq.divorce_fct_abs LIMIT 10")
#test

In [None]:
#pydb.delete_table_and_data(database="__temp__", table="divorce_fct_abs")