# Adopt Timeliness

In [1]:
import pandas as pd  # for the data structures to store and manipulate tables
import pydbtools as pydb  # see https://github.com/moj-analytical-services/pydbtools
import boto3  # for working with AWS

# few things for viewing dataframes better
pd.set_option("display.max_columns", 100)
pd.set_option("display.width", 900)
pd.set_option("display.max_colwidth", 200)

database = "familyman_dev_v2"
snapshot_date = "2021-08-19"
database_derived = "familyman_derived_dev_v1"

fcsq_database = "fcsq"

s3 = boto3.resource("s3")
bucket = s3.Bucket("alpha-family-data")

## Adopt_applications_data_sorted table

### Drop the adopt_applications_data_sorted table if it already exists and remove its data from the S3 bucket

In [2]:
drop_adopt_applications_data_sorted = f"""
DROP TABLE IF EXISTS fcsq.adopt_applications_data_sorted;
"""
pydb.start_query_execution_and_wait(drop_adopt_applications_data_sorted)

# clean up previous adopt_applications_data_sorted files
bucket.objects.filter(Prefix="fcsq_processing/Adoption/adopt_applications_data_sorted/").delete()

[{'ResponseMetadata': {'RequestId': 'TSJ9E31TGV4Q51BY',
   'HostId': '7Hd1KaSg2/+knE6YgAjMBeSL9YZTDg8mhvVT9UA6g2E52IZFEcSaRNB/A1rlujzv2O8pDpywwDc=',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'x-amz-id-2': '7Hd1KaSg2/+knE6YgAjMBeSL9YZTDg8mhvVT9UA6g2E52IZFEcSaRNB/A1rlujzv2O8pDpywwDc=',
    'x-amz-request-id': 'TSJ9E31TGV4Q51BY',
    'date': 'Wed, 06 Apr 2022 16:23:46 GMT',
    'content-type': 'application/xml',
    'transfer-encoding': 'chunked',
    'server': 'AmazonS3',
    'connection': 'close'},
   'RetryAttempts': 0},
  'Deleted': [{'Key': 'fcsq_processing/Adoption/adopt_applications_data_sorted/20220406_154629_00012_dh4qi_7f714072-94a4-4353-995f-55660a867518',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'pb9mc3FPfRUoAvAw2wI36j0k6CIZVDf7'},
   {'Key': 'fcsq_processing/Adoption/adopt_applications_data_sorted/20220406_154629_00012_dh4qi_2b354396-5aee-43e0-8033-02434098502f',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'TMD79xQPJbpJcQt2kPDH17n6wGIvDXHC'},
   {'

### Create the adopt_applications_data_sorted table in Athena

In [3]:
create_adopt_applications_data_sorted = f"""
CREATE TABLE IF NOT EXISTS fcsq.adopt_applications_data_sorted
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/adopt_applications_data_sorted') AS
SELECT * FROM
fcsq.adopt_application_5
ORDER BY 
CASE_NUMBER, APP_DATE
"""

pydb.start_query_execution_and_wait(create_adopt_applications_data_sorted)



{'QueryExecutionId': '0fb4db2d-7d7f-44fd-bf52-8ea922c3b843',
 'Query': "CREATE TABLE IF NOT EXISTS fcsq.adopt_applications_data_sorted\nWITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/adopt_applications_data_sorted') AS\nSELECT * FROM\nfcsq.adopt_application_5\nORDER BY \nCASE_NUMBER, APP_DATE",
 'StatementType': 'DDL',
 'ResultConfiguration': {'OutputLocation': 's3://aws-athena-query-results-593291632749-eu-west-1/tables/0fb4db2d-7d7f-44fd-bf52-8ea922c3b843'},
 'QueryExecutionContext': {},
 'Status': {'State': 'SUCCEEDED',
  'SubmissionDateTime': datetime.datetime(2022, 4, 6, 16, 23, 47, 847000, tzinfo=tzlocal()),
  'CompletionDateTime': datetime.datetime(2022, 4, 6, 16, 23, 50, 605000, tzinfo=tzlocal())},
 'Statistics': {'EngineExecutionTimeInMillis': 2591,
  'DataScannedInBytes': 2392569,
  'DataManifestLocation': 's3://aws-athena-query-results-593291632749-eu-west-1/tables/0fb4db2d-7d7f-44fd-bf52-8ea922c3b843-manifest.csv',
  'TotalExe

#### adopt_applications_data_sorted validation

In [4]:
adopt_applications_data_sorted_count = pydb.read_sql_query("select count(*) as count from fcsq.adopt_applications_data_sorted")
adopt_applications_data_sorted_count

Unnamed: 0,count
0,175169


## Create temporary tables

In [5]:
create_adopt_applications_1 = f"""
SELECT *, row_number() over (order by CASE_NUMBER, APP_DATE) as SEQ_NUM
FROM fcsq.adopt_applications_data_sorted
"""
pydb.create_temp_table(create_adopt_applications_1,'adopt_applications_1')

create_adopt_applications_2 = f"""
SELECT DISTINCT case_number, app_type, min(seq_num) as min_of_seq_num
FROM __temp__.adopt_applications_1 GROUP BY case_number, app_type
"""
pydb.create_temp_table(create_adopt_applications_2,'adopt_applications_2')

create_adopt_applications_3 = f"""
SELECT
    t1.case_number,
    t2.App_date,
    t2.year,
    t2.quarter,
    t2.court,
    t2.app_type,
    t2.Case_app_type, 
    t2.Adoption, 
    t2.Contested, 
    t2.Standard, 
    t2.Convention, 
    t2.Foreign, 
    t2.Placement, 
    t2.Placement_revoke_or_vary, 
    t2.Contact_s26, 
    t2.Contact_s26_revoke_or_vary, 
    t2.Change_surname, 
    t2.Remove_child_from_UK, 
    t2.Other_order_type, 
    t2.Adoption_Cases, 
    t2.Non_Adoption_Cases
FROM __temp__.ADOPT_APPLICATIONS_2 t1 LEFT JOIN
__temp__.ADOPT_APPLICATIONS_1 t2 ON (t1.MIN_of_seq_num = t2.Seq_Num)
"""
pydb.create_temp_table(create_adopt_applications_3,'adopt_applications_3')

## adopt_orders_data_sorted table

### Drop the adopt_orders_data_sorted table if it already exists and remove its data from the S3 bucket

In [6]:
drop_adopt_orders_data_sorted = f"""
DROP TABLE IF EXISTS fcsq.adopt_orders_data_sorted;
"""
pydb.start_query_execution_and_wait(drop_adopt_orders_data_sorted)

# clean up previous adopt_orders_data_sorted files
bucket.objects.filter(Prefix="fcsq_processing/Adoption/adopt_orders_data_sorted/").delete()

[{'ResponseMetadata': {'RequestId': 'FAQK9BZ57N16Y8EM',
   'HostId': 'ft2WigBnI1RaYR3mc30NWQPUv6wuUpAE/A27qWRuw++41n5wIRo/lR1qQFBrOJYxr3RCAeEowemXuaKo+KRo1g==',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'x-amz-id-2': 'ft2WigBnI1RaYR3mc30NWQPUv6wuUpAE/A27qWRuw++41n5wIRo/lR1qQFBrOJYxr3RCAeEowemXuaKo+KRo1g==',
    'x-amz-request-id': 'FAQK9BZ57N16Y8EM',
    'date': 'Wed, 06 Apr 2022 16:24:25 GMT',
    'content-type': 'application/xml',
    'transfer-encoding': 'chunked',
    'server': 'AmazonS3',
    'connection': 'close'},
   'RetryAttempts': 0},
  'Deleted': [{'Key': 'fcsq_processing/Adoption/adopt_orders_data_sorted/20220406_154709_00045_mqd5q_57d2365c-5faf-469d-88cc-c1e9475ce76d',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'Igi8ust1USPdl4MpRctTut5hSMkjVnoW'},
   {'Key': 'fcsq_processing/Adoption/adopt_orders_data_sorted/20220406_154709_00045_mqd5q_18c8900b-a47b-4b1f-956d-4d5164b3d0e3',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'QXd9tstm4wI1S1b9hEn.TF5_0cCrA

### Create the adopt_orders_data_sorted table in Athena

In [7]:
create_adopt_orders_data_sorted = f"""
CREATE TABLE IF NOT EXISTS fcsq.adopt_orders_data_sorted
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/adopt_orders_data_sorted') AS
SELECT * 
FROM fcsq.adopt_disposals5 t1
    WHERE t1.Type != 'Contact_s26' AND t1.Type != 'Contact_s26_revoke_or_vary' AND t1.Type != 
        'Placement_revoke_or_vary' AND t1.Type != 'Other_order_type'
    ORDER BY t1.CASE_NUMBER, t1.Receipt_date;
"""
pydb.start_query_execution_and_wait(create_adopt_orders_data_sorted)

{'QueryExecutionId': 'fc08734a-7063-48a7-9558-f2845830c5bd',
 'Query': "CREATE TABLE IF NOT EXISTS fcsq.adopt_orders_data_sorted\nWITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/adopt_orders_data_sorted') AS\nSELECT * \nFROM fcsq.adopt_disposals5 t1\n    WHERE t1.Type != 'Contact_s26' AND t1.Type != 'Contact_s26_revoke_or_vary' AND t1.Type != \n        'Placement_revoke_or_vary' AND t1.Type != 'Other_order_type'\n    ORDER BY t1.CASE_NUMBER, t1.Receipt_date",
 'StatementType': 'DDL',
 'ResultConfiguration': {'OutputLocation': 's3://aws-athena-query-results-593291632749-eu-west-1/tables/fc08734a-7063-48a7-9558-f2845830c5bd'},
 'QueryExecutionContext': {},
 'Status': {'State': 'SUCCEEDED',
  'SubmissionDateTime': datetime.datetime(2022, 4, 6, 16, 24, 27, 727000, tzinfo=tzlocal()),
  'CompletionDateTime': datetime.datetime(2022, 4, 6, 16, 24, 31, 115000, tzinfo=tzlocal())},
 'Statistics': {'EngineExecutionTimeInMillis': 3210,
  'DataScannedIn

#### adopt_orders_data_sorted validation

In [8]:
adopt_orders_data_sorted_count = pydb.read_sql_query("select count(*) as count from fcsq.adopt_orders_data_sorted")
adopt_orders_data_sorted_count

Unnamed: 0,count
0,150507


## Create temporary tables

In [9]:
"""
NOTE, WHERE clause usually references DATE_DIFF variable, this isn't working so far so just 
copied the creation of date_diff into the where clause
"""

create_adopt_orders_0 = f"""
SELECT t1.CASE_NUMBER, 
          t1.Court, 
          t1.Year, 
          t1.Quarter, 
          t2.App_type, 
          t2.App_date, 
          t1.Receipt_date AS Disp_Date, 
          t1.EVENT_MODEL, 
          t1.FIELD_MODEL, 
          t1.Order_type, 
          t1.Country_of_birth, 
          t1.Number_applicants, 
          t1.Adopter_type, 
          t1.Adopter, 
          t1.Child_sex, /*Changed this from child_sex2 as the process has changed, check to see if an error comes up*/ 
          t1.Age_band, 
          t1.Child_age, 
          t1.Adoption, 
          t1.Type, 
          t1.Adopter_2, 
          DAY(t1.Receipt_date -t2.App_date) as DATE_DIFF
      FROM fcsq.ADOPT_ORDERS_DATA_SORTED t1
           INNER JOIN __temp__.ADOPT_APPLICATIONS_3 t2 ON (t1.CASE_NUMBER = t2.CASE_NUMBER)
                 WHERE (DAY(t1.Receipt_date -t2.App_date)) >= 0;
"""
pydb.create_temp_table(create_adopt_orders_0,'adopt_orders_0')

create_adopt_orders_1 = f"""
   SELECT t1.CASE_NUMBER, 
          t1.Court, 
          t1.Year, 
          t1.Quarter, 
          t1.App_type, 
          t1.EVENT_MODEL AS Disp_Type, 
          t1.App_date, 
          t1.Disp_Date, 
          t1.FIELD_MODEL, 
          t1.Order_type, 
          t1.Number_applicants, 
          t1.Child_sex, /*Changed this from child_sex2 as the process has changed, check to see if an error comes up*/ 
          t1.Adoption, 
          t1.Type, 
          row_number() over (order by CASE_NUMBER,App_type,App_date) as Seq_no
      FROM __temp__.ADOPT_ORDERS_0 t1
      ORDER BY t1.CASE_NUMBER,
               t1.App_type,
               t1.App_date;
"""

pydb.create_temp_table(create_adopt_orders_1,'adopt_orders_1')

create_adopt_orders_2 = f"""
SELECT DISTINCT CASE_NUMBER, 
          App_type, 
          App_date, 
        (MIN(Seq_No)) AS MIN_of_Seq_No
      FROM __temp__.ADOPT_ORDERS_1 t1
      GROUP BY CASE_NUMBER,
               App_type,
               App_date;
"""

pydb.create_temp_table(create_adopt_orders_2,'adopt_orders_2')

create_adopt_orders_3 = f"""
   SELECT DISTINCT t1.CASE_NUMBER, 
          t1.App_type, 
          t2.Court, 
          t1.App_date, 
          t2.Disp_Date, 
          t2.Disp_Type, 
          t2.Year, 
          t2.Quarter, 
          t2.Order_type, 
          t2.Adoption, 
          t2.Type
      FROM __temp__.ADOPT_ORDERS_2 t1
           LEFT JOIN __temp__.ADOPT_ORDERS_1 t2 ON (t1.MIN_of_Seq_No = t2.Seq_No);
"""
pydb.create_temp_table(create_adopt_orders_3,'adopt_orders_3')


## adopt_apps_and_orders_match table

### Drop the adopt_apps_and_orders_match table if it already exists and remove its data from the S3 bucket

In [10]:
drop_adopt_apps_and_orders_match = f"""
DROP TABLE IF EXISTS fcsq.adopt_apps_and_orders_match;
"""
pydb.start_query_execution_and_wait(drop_adopt_apps_and_orders_match)

# clean up previous adopt_apps_and_orders_match files
bucket.objects.filter(Prefix="fcsq_processing/Adoption/ADOPT_APPS_AND_ORDERS_MATCH/").delete()

[{'ResponseMetadata': {'RequestId': 'M6HZKN7WT0JC8J8V',
   'HostId': 'YqOcE60nL4W6qV+a7UO/uDYIJ/q5Yu2WzZCJpcsoq+6MRC9h+rlBjpy1yBjTolZU7yt81Y+OWl0=',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'x-amz-id-2': 'YqOcE60nL4W6qV+a7UO/uDYIJ/q5Yu2WzZCJpcsoq+6MRC9h+rlBjpy1yBjTolZU7yt81Y+OWl0=',
    'x-amz-request-id': 'M6HZKN7WT0JC8J8V',
    'date': 'Wed, 06 Apr 2022 16:25:14 GMT',
    'content-type': 'application/xml',
    'transfer-encoding': 'chunked',
    'server': 'AmazonS3',
    'connection': 'close'},
   'RetryAttempts': 0},
  'Deleted': [{'Key': 'fcsq_processing/Adoption/ADOPT_APPS_AND_ORDERS_MATCH/20220406_155235_00059_qk5va_0e8b0b27-afcd-403e-ba59-0175d3b315fc',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'EnoHKy1gZ7HaoBnxc0cBlEQiZikrfxxi'},
   {'Key': 'fcsq_processing/Adoption/ADOPT_APPS_AND_ORDERS_MATCH/20220406_155235_00059_qk5va_f8048a7a-052a-4a75-bf57-ceb636bcb6eb',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'TTabNr8x_vGGUJODyd7G5H_nnIMZvdLI'},
   {'Key': 

### Create the adopt_apps_and_orders_match table in Athena

In [11]:
create_adopt_apps_and_orders_match =f"""
CREATE TABLE IF NOT EXISTS fcsq.ADOPT_APPS_AND_ORDERS_MATCH
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/ADOPT_APPS_AND_ORDERS_MATCH') AS
   SELECT t2.CASE_NUMBER, 
          t2.App_type, 
          t2.Court, 
          t2.App_date, 
          t2.Disp_Date, 
          /* Wait_weeks */
            (DAY(t2.Disp_Date-t2.App_date)/7) AS Wait_weeks, 
          t2.Disp_Type, 
          t2.Year, 
          t2.Quarter, 
          t2.Order_type, 
          t2.Adoption, 
          t2.Type, 
          /* DSP_COURT */
         /*(INPUT(t2.Court, 3.0)) AS DSP_COURT*/ /*This line used to reformat to number but new SQL input is already number so can delete if 
		  															 everything works with the new line below*/
		  t2.Court AS DSP_COURT
      FROM __temp__.ADOPT_ORDERS_3 t2;


"""
pydb.start_query_execution_and_wait(create_adopt_apps_and_orders_match)

{'QueryExecutionId': 'd89a610e-84e7-44ab-a746-5fc2871858f1',
 'Query': "CREATE TABLE IF NOT EXISTS fcsq.ADOPT_APPS_AND_ORDERS_MATCH\nWITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/ADOPT_APPS_AND_ORDERS_MATCH') AS\n   SELECT t2.CASE_NUMBER, \n          t2.App_type, \n          t2.Court, \n          t2.App_date, \n          t2.Disp_Date, \n          /* Wait_weeks */\n            (DAY(t2.Disp_Date-t2.App_date)/7) AS Wait_weeks, \n          t2.Disp_Type, \n          t2.Year, \n          t2.Quarter, \n          t2.Order_type, \n          t2.Adoption, \n          t2.Type, \n          /* DSP_COURT */\n         /*(INPUT(t2.Court, 3.0)) AS DSP_COURT*/ /*This line used to reformat to number but new SQL input is already number so can delete if \n\t\t  \t\t\t\t\t\t\t\t\t\t\t\t\t\t\t everything works with the new line below*/\n\t\t  t2.Court AS DSP_COURT\n      FROM mojap_de_temp_alpha_user_thomasauburnmoj.ADOPT_ORDERS_3 t2",
 'StatementType': 'DDL',


#### ADOPT_APPS_AND_ORDERS_MATCH validation

In [12]:
adopt_orders_data_sorted_count = pydb.read_sql_query("select count(*) as count from fcsq.ADOPT_APPS_AND_ORDERS_MATCH")
adopt_orders_data_sorted_count

Unnamed: 0,count
0,147875


In [13]:
create_adopt_case_data_v1 = f"""
SELECT T1.YEAR,
            T1.QUARTER,
            T1.COURT,
            T1.CASE_NUMBER,
            T1.APP_TYPE,
            T1.CASE_APP_TYPE,
            T1.ADOPTION,
            T1.HIGH_COURT,
            T1.CONTESTED,
            T1.NUMBER_APPLICANTS,
            T1.ADOPTER_TYPE,
            date_format(T1.APP_DATE,'%d-%m-%Y') AS APP_DATE2
    FROM fcsq.adopt_apps_6_adoptions_only AS t1
    ORDER BY case_number, app_date2, court;
    
"""

create_adopt_case_data_v2 = f"""
SELECT *,(case when row_number() over (partition by case_number order by 
        APP_DATE2) = 1 then 1 else 0 end) as case_number_id
FROM __temp__.adopt_case_data_v1
"""

create_adopt_case_data_v3 = f"""
SELECT *
FROM __temp__.adopt_case_data_v2
where case_number_id = 1 and year > 2010;
"""
pydb.create_temp_table(create_adopt_case_data_v1,'adopt_case_data_v1')

pydb.create_temp_table(create_adopt_case_data_v2,'adopt_case_data_v2')
pydb.create_temp_table(create_adopt_case_data_v3,'adopt_case_data_v3')





In [14]:
adopt_case_data_v3_check = "SELECT COUNT(*) as Count from __temp__.adopt_case_data_v3"
pydb.start_query_execution_and_wait(adopt_case_data_v3_check)

{'QueryExecutionId': 'f50aa850-6421-4400-bab1-da32bb59d0bd',
 'Query': 'SELECT COUNT(*) as Count from mojap_de_temp_alpha_user_thomasauburnmoj.adopt_case_data_v3',
 'StatementType': 'DML',
 'ResultConfiguration': {'OutputLocation': 's3://aws-athena-query-results-593291632749-eu-west-1/f50aa850-6421-4400-bab1-da32bb59d0bd.csv'},
 'QueryExecutionContext': {},
 'Status': {'State': 'SUCCEEDED',
  'SubmissionDateTime': datetime.datetime(2022, 4, 6, 16, 25, 48, 453000, tzinfo=tzlocal()),
  'CompletionDateTime': datetime.datetime(2022, 4, 6, 16, 25, 49, 207000, tzinfo=tzlocal())},
 'Statistics': {'EngineExecutionTimeInMillis': 532,
  'DataScannedInBytes': 0,
  'TotalExecutionTimeInMillis': 754,
  'QueryQueueTimeInMillis': 171,
  'QueryPlanningTimeInMillis': 76,
  'ServiceProcessingTimeInMillis': 51},
 'WorkGroup': 'primary',
 'EngineVersion': {'SelectedEngineVersion': 'Athena engine version 2',
  'EffectiveEngineVersion': 'Athena engine version 2'}}

## Applicant_Info table

### Drop the Applicant_Info table if it already exists and remove its data from the S3 bucket

In [15]:
drop_Adopt_Applicant_Info = f"""
DROP TABLE IF EXISTS fcsq.Adopt_Applicant_Info;
"""
pydb.start_query_execution_and_wait(drop_Adopt_Applicant_Info)

# clean up previous Adopt_Applicant_Info files
bucket.objects.filter(Prefix="fcsq_processing/Adoption/Adopt_Applicant_Info/").delete()

[{'ResponseMetadata': {'RequestId': 'RAV0AXVAZ50J4K1Q',
   'HostId': 'cIobBfASdz4XFmtPcKDev5tUgPkfI4JfVeHM84O8y9u0R1cvG2kKx396ITK+inZfmvdgIHrDdcc=',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'x-amz-id-2': 'cIobBfASdz4XFmtPcKDev5tUgPkfI4JfVeHM84O8y9u0R1cvG2kKx396ITK+inZfmvdgIHrDdcc=',
    'x-amz-request-id': 'RAV0AXVAZ50J4K1Q',
    'date': 'Wed, 06 Apr 2022 16:25:53 GMT',
    'content-type': 'application/xml',
    'transfer-encoding': 'chunked',
    'server': 'AmazonS3',
    'connection': 'close'},
   'RetryAttempts': 0},
  'Deleted': [{'Key': 'fcsq_processing/Adoption/Adopt_Applicant_Info/20220406_155406_00065_p6897_98f32b66-33cd-4f93-b345-65fac5e327ba',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'WE5YhRh6NNGZDqe7p7GzDLlxhsWl1Bvr'},
   {'Key': 'fcsq_processing/Adoption/Adopt_Applicant_Info/20220406_155406_00065_p6897_9eb205ee-f4cc-4ca5-aab9-513967b23876',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'oU0oInmYZSAXDaquez829plcICMec75_'},
   {'Key': 'fcsq_processi

### Create the Applicant_Info table in Athena

In [16]:
create_Adopt_Applicant_Info = f"""
CREATE TABLE IF NOT EXISTS fcsq.Adopt_Applicant_Info
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/Adopt_Applicant_Info') AS
 SELECT DISTINCT
   {database}.roles.ROLE, 
   {database}.roles.REPRESENTATIVE_ROLE, 
   {database}.roles.ROLE_MODEL, 
   {database}.roles.PARTY, 
   {database}.roles.CASE_NUMBER, 
   {database}.parties.PERSON_GIVEN_FIRST_NAME, 
   {database}.parties.PERSON_FAMILY_NAME, 
   {database}.parties.COMPANY, 
   {database}.addresses.POSTCODE, 
   {database}.parties.GENDER, 
   {database}.roles.DELETE_FLAG
FROM 
  ({database}.roles INNER JOIN {database}.parties ON {database}.roles.PARTY = {database}.parties.PARTY) 
  LEFT JOIN {database}.addresses ON {database}.roles.ADDRESS = {database}.addresses.ADDRESS
WHERE 
    ((({database}.roles.ROLE_MODEL)= 'APLZ') AND (({database}.roles.DELETE_FLAG)= 'N')) 
    OR ((({database}.roles.ROLE_MODEL)= 'APLA') AND (({database}.roles.DELETE_FLAG)= 'N'))
    AND {database}.roles.mojap_snapshot_date = date '{snapshot_date}'
    AND {database}.parties.mojap_snapshot_date = date '{snapshot_date}'
    AND {database}.addresses.mojap_snapshot_date = date '{snapshot_date}';
"""

pydb.start_query_execution_and_wait(create_Adopt_Applicant_Info)



{'QueryExecutionId': '968b9782-d89e-4912-8447-ac05a0c85f4d',
 'Query': "CREATE TABLE IF NOT EXISTS fcsq.Adopt_Applicant_Info\nWITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/Adopt_Applicant_Info') AS\n SELECT DISTINCT\n   familyman_dev_v2.roles.ROLE, \n   familyman_dev_v2.roles.REPRESENTATIVE_ROLE, \n   familyman_dev_v2.roles.ROLE_MODEL, \n   familyman_dev_v2.roles.PARTY, \n   familyman_dev_v2.roles.CASE_NUMBER, \n   familyman_dev_v2.parties.PERSON_GIVEN_FIRST_NAME, \n   familyman_dev_v2.parties.PERSON_FAMILY_NAME, \n   familyman_dev_v2.parties.COMPANY, \n   familyman_dev_v2.addresses.POSTCODE, \n   familyman_dev_v2.parties.GENDER, \n   familyman_dev_v2.roles.DELETE_FLAG\nFROM \n  (familyman_dev_v2.roles INNER JOIN familyman_dev_v2.parties ON familyman_dev_v2.roles.PARTY = familyman_dev_v2.parties.PARTY) \n  LEFT JOIN familyman_dev_v2.addresses ON familyman_dev_v2.roles.ADDRESS = familyman_dev_v2.addresses.ADDRESS\nWHERE \n    (((familyman

#### Applicant_Info validation

In [17]:
Adopt_Applicant_Info_count = pydb.read_sql_query("select count(*) as count from fcsq.Adopt_Applicant_Info")
Adopt_Applicant_Info_count

Unnamed: 0,count
0,252248


## adopt_respondent_info table

### Drop the adopt_respondent_info table if it already exists and remove its data from the S3 bucket

In [18]:
drop_adopt_respondent_info = f"""
DROP TABLE IF EXISTS fcsq.adopt_respondent_info;
"""
pydb.start_query_execution_and_wait(drop_adopt_respondent_info)

# clean up previous adopt_respondent_info files
bucket.objects.filter(Prefix="fcsq_processing/Adoption/adopt_respondent_info/").delete()

[{'ResponseMetadata': {'RequestId': 'F0XT29FKK8KQ5HE0',
   'HostId': 'xpmXVpcmN/kXt4//AI+WvIeuZygqchRRWjsPmeLLQ/Md5XiuuF4aOKAa6asXOGuz+e0UQbLdUL0=',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'x-amz-id-2': 'xpmXVpcmN/kXt4//AI+WvIeuZygqchRRWjsPmeLLQ/Md5XiuuF4aOKAa6asXOGuz+e0UQbLdUL0=',
    'x-amz-request-id': 'F0XT29FKK8KQ5HE0',
    'date': 'Wed, 06 Apr 2022 16:26:39 GMT',
    'content-type': 'application/xml',
    'transfer-encoding': 'chunked',
    'server': 'AmazonS3',
    'connection': 'close'},
   'RetryAttempts': 0},
  'Deleted': [{'Key': 'fcsq_processing/Adoption/adopt_respondent_info/20220406_155455_00015_ebhc7_79c47c3a-9b4f-4da0-bdcf-85d8cfbccb89',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'KZh1y1T5BDUsnpYwdHqZpcKqC9xVVVWB'},
   {'Key': 'fcsq_processing/Adoption/adopt_respondent_info/20220406_155455_00015_ebhc7_df9c9a53-1fd9-4276-930b-e689c66c24f2',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'zKe6W2irsdN9mbOUdrAmQI9I7RZ1mPBM'},
   {'Key': 'fcsq_proces

### Create the adopt_respondent_info table in Athena

In [19]:
create_adopt_respondent_info = f"""
CREATE TABLE IF NOT EXISTS fcsq.adopt_respondent_info
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/adopt_respondent_info') AS
SELECT DISTINCT
  {database}.roles.ROLE, 
  {database}.roles.REPRESENTATIVE_ROLE, 
  {database}.roles.ROLE_MODEL, 
  {database}.roles.PARTY, 
  {database}.roles.CASE_NUMBER, 
  {database}.parties.GENDER, 
  {database}.addresses.POSTCODE, 
  {database}.roles.DELETE_FLAG
FROM 
  ({database}.roles INNER JOIN {database}.parties ON {database}.roles.PARTY = {database}.parties.PARTY) 
    LEFT JOIN {database}.addresses ON {database}.roles.ADDRESS = {database}.addresses.ADDRESS
WHERE 
    ((({database}.roles.ROLE_MODEL)='RSPA') AND (({database}.roles.DELETE_FLAG)='N')) 
    OR ((({database}.roles.ROLE_MODEL)='RSPZ') AND (({database}.roles.DELETE_FLAG)='N'))
    AND {database}.roles.mojap_snapshot_date = date '{snapshot_date}'
    AND {database}.parties.mojap_snapshot_date = date '{snapshot_date}'
    AND {database}.addresses.mojap_snapshot_date = date '{snapshot_date}';
"""

pydb.start_query_execution_and_wait(create_adopt_respondent_info)



{'QueryExecutionId': '105423d9-581e-4f6c-8fdf-34a42b6001d1',
 'Query': "CREATE TABLE IF NOT EXISTS fcsq.adopt_respondent_info\nWITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/adopt_respondent_info') AS\nSELECT DISTINCT\n  familyman_dev_v2.roles.ROLE, \n  familyman_dev_v2.roles.REPRESENTATIVE_ROLE, \n  familyman_dev_v2.roles.ROLE_MODEL, \n  familyman_dev_v2.roles.PARTY, \n  familyman_dev_v2.roles.CASE_NUMBER, \n  familyman_dev_v2.parties.GENDER, \n  familyman_dev_v2.addresses.POSTCODE, \n  familyman_dev_v2.roles.DELETE_FLAG\nFROM \n  (familyman_dev_v2.roles INNER JOIN familyman_dev_v2.parties ON familyman_dev_v2.roles.PARTY = familyman_dev_v2.parties.PARTY) \n    LEFT JOIN familyman_dev_v2.addresses ON familyman_dev_v2.roles.ADDRESS = familyman_dev_v2.addresses.ADDRESS\nWHERE \n    (((familyman_dev_v2.roles.ROLE_MODEL)='RSPA') AND ((familyman_dev_v2.roles.DELETE_FLAG)='N')) \n    OR (((familyman_dev_v2.roles.ROLE_MODEL)='RSPZ') AND ((family

#### adopt_respondent_info validation

In [20]:
adopt_respondent_info_count = pydb.read_sql_query("select count(*) as count from fcsq.adopt_respondent_info")
adopt_respondent_info_count

Unnamed: 0,count
0,286085


## applicants 3 table

In [21]:
create_adopt_applicants_1 = f"""
SELECT 	T1.role,
    T1.representative_role,
    T1.role_model,
    T1.party,
    T1.case_number,
    T1.gender,
    case when cast(gender as varchar(1)) = '1' then 'Male'
    when cast(gender as varchar(1)) = '2' then 'Female'
    else 'Unknown' end as Gender_Decode

    FROM fcsq.adopt_applicant_info AS t1
    ORDER BY t1.Case_Number;
"""
#pydb.start_query_execution_and_wait(create_adopt_applicants_1)
pydb.create_temp_table(create_adopt_applicants_1,'adopt_applicants_1')



create_adopt_applicants_2 = f"""
SELECT DISTINCT 
    T1.case_number,
    T1.party,
    max(T1.representative_role) as Rep_Role,
    max(T1.gender_decode) as Gender_Max
    from __temp__.adopt_applicants_1 AS t1
    group by Case_number, party;
"""

pydb.create_temp_table(create_adopt_applicants_2,'adopt_applicants_2')
#pydb.start_query_execution_and_wait(create_adopt_applicants_2)


create_adopt_applicants_3= f"""
SELECT t1.case_number,
    t1.party as App_Party_ID,
    t1.Rep_Role,
    t1.Gender_Max,
    case when t1.Rep_Role IS NULL then 'N'
    when t1.Rep_Role IS NOT NULL then 'Y'
    End as REPRESENTATION,
    case when Rep_Role IS NULL AND Gender_Max = 'Female' then 'Unrep_Female'
    when Rep_Role IS NULL AND Gender_Max = 'Male' then 'Unrep_Male'
    when Rep_Role IS NULL AND Gender_Max = 'Unknown' then 'Unrep_Unknown'
    when Rep_Role IS NOT NULL AND Gender_Max = 'Female' then 'Rep_Female'
    when Rep_Role IS NOT NULL AND Gender_Max = 'Male' then 'Rep_Male'
    when Rep_Role IS NOT NULL AND Gender_Max = 'Unknown' then 'Rep_Unknown'
    else '' end as App_Rep_Cat
    
    from __temp__.adopt_applicants_2 AS t1;


"""
#pydb.start_query_execution_and_wait(create_adopt_applicants_3)
pydb.create_temp_table(create_adopt_applicants_3,'adopt_applicants_3')


In [22]:
pydb.read_sql_query("SELECT count(*) as count from __temp__.adopt_applicants_3")

Unnamed: 0,count
0,249953


In [23]:
create_adopt_respondents_1 = f"""
SELECT T1.role,
    T1.representative_role,
    T1.role_model,
    T1.party,
    T1.case_number,
    T1.gender,
    case when cast(gender as varchar(1)) = '1' then 'Male'
    when cast(gender as varchar(1)) = '2' then 'Female'
    else 'Unknown' end as Gender_Decode

    FROM fcsq.adopt_respondent_info AS t1
    ORDER BY t1.Case_Number;
"""
#pydb.start_query_execution_and_wait(create_adopt_respondents_1)
pydb.create_temp_table(create_adopt_respondents_1,'adopt_respondents_1')



create_adopt_respondents_2 = f"""
    SELECT DISTINCT T1.case_number,
        T1.party,
        max(T1.representative_role) as Rep_Role,
        max(T1.gender_decode) as Gender_Max
    from __temp__.adopt_respondents_1 AS t1
    group by Case_number, party;
"""

pydb.create_temp_table(create_adopt_respondents_2,'adopt_respondents_2')
#pydb.start_query_execution_and_wait(create_adopt_respondents_2)


create_adopt_respondents_3= f"""
SELECT t1.case_number,
    t1.party as Resp_Party_ID,
    t1.Rep_Role,
    t1.Gender_Max,
    case when t1.Rep_Role IS NULL then 'N'
    when t1.Rep_Role IS NOT NULL then 'Y'
    End as REPRESENTATION,
    case when Rep_Role IS NULL AND Gender_Max = 'Female' then 'Unrep_Female'
    when Rep_Role IS NULL AND Gender_Max = 'Male' then 'Unrep_Male'
    when Rep_Role IS NULL AND Gender_Max = 'Unknown' then 'Unrep_Unknown'
    when Rep_Role IS NOT NULL AND Gender_Max = 'Female' then 'Rep_Female'
    when Rep_Role IS NOT NULL AND Gender_Max = 'Male' then 'Rep_Male'
    when Rep_Role IS NOT NULL AND Gender_Max = 'Unknown' then 'Rep_Unknown'
    else '' end as Resp_Rep_Cat
    
    from __temp__.adopt_respondents_2 AS t1;


"""
#pydb.start_query_execution_and_wait(create_adopt_respondents_3)
pydb.create_temp_table(create_adopt_respondents_3,'adopt_respondents_3')

In [24]:
adopt_app_rep_final = f"""
SELECT t1.YEAR, 
    t1.QUARTER,
    t1.CASE_NUMBER, 
    t1.Court,
    t2.App_Party_ID,
    t2.Representation,
    t2.Gender_Max as App_Gender,
    t2.App_Rep_Cat          
FROM __temp__.ADOPT_CASE_DATA_v3 t1
    LEFT JOIN __temp__.ADOPT_APPLICANTS_3 t2 ON (t1.CASE_NUMBER = t2.CASE_NUMBER);

"""

pydb.create_temp_table(adopt_app_rep_final,'adopt_app_rep_final')


In [25]:
adopt_app_rep_final_check = "SELECT COUNT(*) as Count from __temp__.adopt_app_rep_final"
pydb.start_query_execution_and_wait(adopt_app_rep_final_check)

{'QueryExecutionId': 'bf1a7c3b-4e7f-48f9-9954-0dcf300432b9',
 'Query': 'SELECT COUNT(*) as Count from mojap_de_temp_alpha_user_thomasauburnmoj.adopt_app_rep_final',
 'StatementType': 'DML',
 'ResultConfiguration': {'OutputLocation': 's3://aws-athena-query-results-593291632749-eu-west-1/bf1a7c3b-4e7f-48f9-9954-0dcf300432b9.csv'},
 'QueryExecutionContext': {},
 'Status': {'State': 'SUCCEEDED',
  'SubmissionDateTime': datetime.datetime(2022, 4, 6, 16, 28, 22, 638000, tzinfo=tzlocal()),
  'CompletionDateTime': datetime.datetime(2022, 4, 6, 16, 28, 23, 288000, tzinfo=tzlocal())},
 'Statistics': {'EngineExecutionTimeInMillis': 473,
  'DataScannedInBytes': 0,
  'TotalExecutionTimeInMillis': 650,
  'QueryQueueTimeInMillis': 140,
  'QueryPlanningTimeInMillis': 83,
  'ServiceProcessingTimeInMillis': 37},
 'WorkGroup': 'primary',
 'EngineVersion': {'SelectedEngineVersion': 'Athena engine version 2',
  'EffectiveEngineVersion': 'Athena engine version 2'}}

In [26]:
adopt_resp_rep_final = f"""
   SELECT t1.YEAR, 
        t1.QUARTER,
        t1.CASE_NUMBER, 
        t1.Court,
          t2.Resp_Party_ID,
          t2.Representation,
          t2.Gender_Max as Resp_Gender,
          t2.Resp_Rep_Cat
          
      FROM __temp__.ADOPT_CASE_DATA_v3 t1
           LEFT JOIN __temp__.ADOPT_RESPONDENTS_3 t2 ON (t1.CASE_NUMBER = t2.CASE_NUMBER);
"""

pydb.create_temp_table(adopt_resp_rep_final,'adopt_resp_rep_final')

In [27]:
adopt_resp_rep_final_check = "SELECT COUNT(*) as Count from __temp__.adopt_resp_rep_final"
pydb.start_query_execution_and_wait(adopt_resp_rep_final_check)

{'QueryExecutionId': '0e8bb0fe-84ce-451f-be7f-099deffbc645',
 'Query': 'SELECT COUNT(*) as Count from mojap_de_temp_alpha_user_thomasauburnmoj.adopt_resp_rep_final',
 'StatementType': 'DML',
 'ResultConfiguration': {'OutputLocation': 's3://aws-athena-query-results-593291632749-eu-west-1/0e8bb0fe-84ce-451f-be7f-099deffbc645.csv'},
 'QueryExecutionContext': {},
 'Status': {'State': 'SUCCEEDED',
  'SubmissionDateTime': datetime.datetime(2022, 4, 6, 16, 28, 33, 490000, tzinfo=tzlocal()),
  'CompletionDateTime': datetime.datetime(2022, 4, 6, 16, 28, 34, 120000, tzinfo=tzlocal())},
 'Statistics': {'EngineExecutionTimeInMillis': 434,
  'DataScannedInBytes': 0,
  'TotalExecutionTimeInMillis': 630,
  'QueryQueueTimeInMillis': 151,
  'QueryPlanningTimeInMillis': 68,
  'ServiceProcessingTimeInMillis': 45},
 'WorkGroup': 'primary',
 'EngineVersion': {'SelectedEngineVersion': 'Athena engine version 2',
  'EffectiveEngineVersion': 'Athena engine version 2'}}

## Adopt_Hearing_Events table

### Drop the Adopt_Hearing_Events table if it already exists and remove its data from the S3 bucket

In [28]:
drop_Adopt_Hearing_Events = f"""
DROP TABLE IF EXISTS fcsq.Adopt_Hearing_Events;
"""
pydb.start_query_execution_and_wait(drop_Adopt_Hearing_Events)

# clean up previous Adopt_Hearing_Events files
bucket.objects.filter(Prefix="fcsq_processing/Adoption/Adopt_Hearing_Events/").delete()

[{'ResponseMetadata': {'RequestId': 'C9DMW43GHBWXBSJM',
   'HostId': 'k5Arb3GF5ioNBkd8GMF18vwjE4ys+4muIO+U7oY2AW23KnCCParb67phQdDo6k7qpw7NHYcV3Ys=',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'x-amz-id-2': 'k5Arb3GF5ioNBkd8GMF18vwjE4ys+4muIO+U7oY2AW23KnCCParb67phQdDo6k7qpw7NHYcV3Ys=',
    'x-amz-request-id': 'C9DMW43GHBWXBSJM',
    'date': 'Wed, 06 Apr 2022 16:28:38 GMT',
    'content-type': 'application/xml',
    'transfer-encoding': 'chunked',
    'server': 'AmazonS3',
    'connection': 'close'},
   'RetryAttempts': 0},
  'Deleted': [{'Key': 'fcsq_processing/Adoption/Adopt_Hearing_Events/20220406_155648_00061_rrvbz_06a25b9a-48b4-40b4-b3cd-8764b9c63fb4',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'RBd_Uo0kh.4AhZQJXsbo7F67SttDJrwX'},
   {'Key': 'fcsq_processing/Adoption/Adopt_Hearing_Events/20220406_155648_00061_rrvbz_5df12534-8fa0-4fd8-a03d-46b9f4af4ca4',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': '9GkSjhm5L6Lb1boQozElXZSFsZnv2.Jb'},
   {'Key': 'fcsq_processi

### Create the Adopt_Hearing_Events table in Athena

In [29]:
create_Adopt_Hearing_Events = f"""
CREATE TABLE IF NOT EXISTS fcsq.Adopt_Hearing_Events
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/Adopt_Hearing_Events') AS
SELECT {database}.hearings.EVENT,
  {database}.hearings.VACATED_FLAG,
  {database}.hearings.HEARING_TYPE,
  {database}.hearings.HEARING_DATE,
  {database}.events.RECEIPT_DATE,
  {database}.events.ERROR,
  {database}.events.CASE_NUMBER,
  {database}.events.EVENT_MODEL
FROM {database}.hearings
INNER JOIN {database}.events
ON {database}.hearings.EVENT            = {database}.events.EVENT
WHERE {database}.hearings.VACATED_FLAG IS NULL
AND {database}.events.ERROR             = 'N'
AND HEARING_DATE > date_parse('31-12-2009 00:00:00', '%d-%m-%Y %H:%i:%s')
AND (substring(case_number,5,1)='A' OR substring(case_number,5,1)='Z')
AND {database}.hearings.mojap_snapshot_date = date '{snapshot_date}' and {database}.events.mojap_snapshot_date = date '{snapshot_date}';
"""

pydb.start_query_execution_and_wait(create_Adopt_Hearing_Events)



{'QueryExecutionId': '310e58de-fb96-4d75-b4fd-c37de63e4bb9',
 'Query': "CREATE TABLE IF NOT EXISTS fcsq.Adopt_Hearing_Events\nWITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/Adopt_Hearing_Events') AS\nSELECT familyman_dev_v2.hearings.EVENT,\n  familyman_dev_v2.hearings.VACATED_FLAG,\n  familyman_dev_v2.hearings.HEARING_TYPE,\n  familyman_dev_v2.hearings.HEARING_DATE,\n  familyman_dev_v2.events.RECEIPT_DATE,\n  familyman_dev_v2.events.ERROR,\n  familyman_dev_v2.events.CASE_NUMBER,\n  familyman_dev_v2.events.EVENT_MODEL\nFROM familyman_dev_v2.hearings\nINNER JOIN familyman_dev_v2.events\nON familyman_dev_v2.hearings.EVENT            = familyman_dev_v2.events.EVENT\nWHERE familyman_dev_v2.hearings.VACATED_FLAG IS NULL\nAND familyman_dev_v2.events.ERROR             = 'N'\nAND HEARING_DATE > date_parse('31-12-2009 00:00:00', '%d-%m-%Y %H:%i:%s')\nAND (substring(case_number,5,1)='A' OR substring(case_number,5,1)='Z')\nAND familyman_dev_v2.hearin

#### Adopt_Hearing_Events validation

In [30]:
Adopt_Hearing_Events_count = pydb.read_sql_query("select count(*) as count from fcsq.Adopt_Hearing_Events")
Adopt_Hearing_Events_count

Unnamed: 0,count
0,510058


## Adopt_Hearings_Cases table

### Drop the Adopt_Hearings_Cases table if it already exists and remove its data from the S3 bucket

In [31]:
drop_Adopt_Hearings_Cases = f"""
DROP TABLE IF EXISTS fcsq.Adopt_Hearings_Cases;
"""
pydb.start_query_execution_and_wait(drop_Adopt_Hearings_Cases)

# clean up previous Adopt_Hearings_Cases files
bucket.objects.filter(Prefix="fcsq_processing/Adoption/Adopt_Hearings_Cases/").delete()

[{'ResponseMetadata': {'RequestId': 'B8JH099KQDDCA51K',
   'HostId': '7nQAOL33SLqi3GMBon0klTLlQxpaSiWXpCO+4VWz1ltZeX4Bf0xgxGcUWHuai6YIw7sGaGvtjWA=',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'x-amz-id-2': '7nQAOL33SLqi3GMBon0klTLlQxpaSiWXpCO+4VWz1ltZeX4Bf0xgxGcUWHuai6YIw7sGaGvtjWA=',
    'x-amz-request-id': 'B8JH099KQDDCA51K',
    'date': 'Wed, 06 Apr 2022 16:28:58 GMT',
    'content-type': 'application/xml',
    'transfer-encoding': 'chunked',
    'server': 'AmazonS3',
    'connection': 'close'},
   'RetryAttempts': 0},
  'Deleted': [{'Key': 'fcsq_processing/Adoption/Adopt_Hearings_Cases/20220406_155710_00075_aanh8_7c7dee8b-af31-47cb-9fa5-05f10bfe500e',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'GtvozMAj3.yeFY1.2y.f4fnzNuzGxqxc'},
   {'Key': 'fcsq_processing/Adoption/Adopt_Hearings_Cases/20220406_155710_00075_aanh8_b0300d8a-aa63-4384-a235-82edaaf3af4f',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'uNPISPkxTDuhfM8QvtMupOUBbO8_1ejd'},
   {'Key': 'fcsq_processi

### Create the Adopt_Hearings_Cases table in Athena

In [32]:
"""
Equivalent to Hearings_Adopt_V3
"""

create_Adopt_Hearings_Cases = f"""
CREATE TABLE IF NOT EXISTS fcsq.Adopt_Hearings_Cases
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/Adopt_Hearings_Cases') AS
select t1.case_number,
    t1.error,
    t1.event,
    t1.event_model,
    t1.hearing_date,
    t1.hearing_type,
    t1.receipt_date,
    t1.vacated_flag,
    substring(Case_Number,5,1) AS Case_Type
    from fcsq.Adopt_Hearing_Events AS t1
    where t1.event_model in ('A8', 'A90', 'A91', 'G60')
    order by t1.case_number, t1.receipt_date;
"""

pydb.start_query_execution_and_wait(create_Adopt_Hearings_Cases)

create_Adopt_Hearings_Cases_v2 = f"""
SELECT *,
(case when row_number() over (partition by Case_Number order by receipt_date) = 1 then 1 else 0 end) as Case_Number_ID
FROM fcsq.Adopt_Hearings_Cases
"""

pydb.create_temp_table(create_Adopt_Hearings_Cases_v2,'adopt_hearings_cases_v2')
pydb.start_query_execution_and_wait(create_Adopt_Hearings_Cases_v2)


{'QueryExecutionId': 'c8701561-2c2c-498d-a668-74196a660f3f',
 'Query': 'SELECT *,\n(case when row_number() over (partition by Case_Number order by receipt_date) = 1 then 1 else 0 end) as Case_Number_ID\nFROM fcsq.Adopt_Hearings_Cases',
 'StatementType': 'DML',
 'ResultConfiguration': {'OutputLocation': 's3://aws-athena-query-results-593291632749-eu-west-1/c8701561-2c2c-498d-a668-74196a660f3f.csv'},
 'QueryExecutionContext': {},
 'Status': {'State': 'SUCCEEDED',
  'SubmissionDateTime': datetime.datetime(2022, 4, 6, 16, 29, 11, 847000, tzinfo=tzlocal()),
  'CompletionDateTime': datetime.datetime(2022, 4, 6, 16, 29, 16, 140000, tzinfo=tzlocal())},
 'Statistics': {'EngineExecutionTimeInMillis': 4122,
  'DataScannedInBytes': 2380748,
  'TotalExecutionTimeInMillis': 4293,
  'QueryQueueTimeInMillis': 127,
  'QueryPlanningTimeInMillis': 90,
  'ServiceProcessingTimeInMillis': 44},
 'WorkGroup': 'primary',
 'EngineVersion': {'SelectedEngineVersion': 'Athena engine version 2',
  'EffectiveEngineV

#### Adopt_Hearings_Cases validation

In [33]:
Adopt_Hearings_Cases_count = pydb.read_sql_query("select count(*) as count from fcsq.Adopt_Hearings_Cases")
Adopt_Hearings_Cases_count

Unnamed: 0,count
0,323515


## Hearing_Adopt_Applicants table

### Drop the Hearing_Adopt_Applicants table if it already exists and remove its data from the S3 bucket

In [34]:
drop_Hearing_Adopt_Applicants = f"""
DROP TABLE IF EXISTS fcsq.Hearing_Adopt_Applicants;
"""
pydb.start_query_execution_and_wait(drop_Hearing_Adopt_Applicants)

# clean up previous Hearing_Adopt_Applicants files
bucket.objects.filter(Prefix="fcsq_processing/Adoption/Hearing_Adopt_Applicants/").delete()

[{'ResponseMetadata': {'RequestId': '7P15VVANQ2W8CERH',
   'HostId': 'Dt7oFvddRVQZtz0Rh9igAuKGl2ro2z4erBOH+G6DGGLbdOVFpTyAdj1OX510xfnphb1JLzai4mg=',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'x-amz-id-2': 'Dt7oFvddRVQZtz0Rh9igAuKGl2ro2z4erBOH+G6DGGLbdOVFpTyAdj1OX510xfnphb1JLzai4mg=',
    'x-amz-request-id': '7P15VVANQ2W8CERH',
    'date': 'Wed, 06 Apr 2022 16:29:25 GMT',
    'content-type': 'application/xml',
    'transfer-encoding': 'chunked',
    'server': 'AmazonS3',
    'connection': 'close'},
   'RetryAttempts': 0},
  'Deleted': [{'Key': 'fcsq_processing/Adoption/Hearing_Adopt_Applicants/20220406_155738_00144_mvixq_2dd3abd2-aacb-443b-8c41-0c5bbf52477f',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'j350lmsHKc0SMdmMntIRGLzrfvJ4tbXr'},
   {'Key': 'fcsq_processing/Adoption/Hearing_Adopt_Applicants/20220406_155738_00144_mvixq_84153c1a-0968-4050-bc99-bd5e63dc6483',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'DEWH3ihE_bNOCH0daHZnPjrexj97ZuPi'},
   {'Key': 'fcsq_

### Create the Hearing_Adopt_Applicants table in Athena

In [35]:
create_Hearing_Adopt_Applicants = f"""
CREATE TABLE IF NOT EXISTS fcsq.Hearing_Adopt_Applicants
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/Hearing_Adopt_Applicants') AS
SELECT t1.*,
t2.Case_Number_ID AS Hearing_Count
FROM __temp__.ADOPT_APP_REP_FINAL t1
LEFT JOIN __temp__.Adopt_Hearings_Cases_v2 t2 ON (t1.CASE_NUMBER = t2.CASE_NUMBER)
where t2.Case_Number_ID > 0;
"""

pydb.start_query_execution_and_wait(create_Hearing_Adopt_Applicants)



{'QueryExecutionId': '34a16562-a17d-4301-b7ee-a39c52418e89',
 'Query': "CREATE TABLE IF NOT EXISTS fcsq.Hearing_Adopt_Applicants\nWITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/Hearing_Adopt_Applicants') AS\nSELECT t1.*,\nt2.Case_Number_ID AS Hearing_Count\nFROM mojap_de_temp_alpha_user_thomasauburnmoj.ADOPT_APP_REP_FINAL t1\nLEFT JOIN mojap_de_temp_alpha_user_thomasauburnmoj.Adopt_Hearings_Cases_v2 t2 ON (t1.CASE_NUMBER = t2.CASE_NUMBER)\nwhere t2.Case_Number_ID > 0",
 'StatementType': 'DDL',
 'ResultConfiguration': {'OutputLocation': 's3://aws-athena-query-results-593291632749-eu-west-1/tables/34a16562-a17d-4301-b7ee-a39c52418e89'},
 'QueryExecutionContext': {},
 'Status': {'State': 'SUCCEEDED',
  'SubmissionDateTime': datetime.datetime(2022, 4, 6, 16, 29, 26, 607000, tzinfo=tzlocal()),
  'CompletionDateTime': datetime.datetime(2022, 4, 6, 16, 29, 29, 504000, tzinfo=tzlocal())},
 'Statistics': {'EngineExecutionTimeInMillis': 2752,
  'Da

#### Hearing_Adopt_Applicants validation

In [36]:
Hearing_Adopt_Applicants_count = pydb.read_sql_query("select count(*) as count from fcsq.Hearing_Adopt_Applicants")
Hearing_Adopt_Applicants_count

Unnamed: 0,count
0,95787


## Hearing_Adopt_Respondents table

### Drop the Hearing_Adopt_Respondents table if it already exists and remove its data from the S3 bucket

In [37]:
drop_Hearing_Adopt_Respondents = f"""
DROP TABLE IF EXISTS fcsq.Hearing_Adopt_Respondents;
"""
pydb.start_query_execution_and_wait(drop_Hearing_Adopt_Respondents)

# clean up previous Hearing_Adopt_Respondents files
bucket.objects.filter(Prefix="fcsq_processing/Adoption/Hearing_Adopt_Respondents/").delete()

[{'ResponseMetadata': {'RequestId': 'EHFPFZBRPPPWRBSK',
   'HostId': 'DH+joGwsk8EFMnQ8a9ECxeXLymDF+I68KbDft+hQMmptviv2xnrnLhKKhczkfLxWZZQv7KE0hso=',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'x-amz-id-2': 'DH+joGwsk8EFMnQ8a9ECxeXLymDF+I68KbDft+hQMmptviv2xnrnLhKKhczkfLxWZZQv7KE0hso=',
    'x-amz-request-id': 'EHFPFZBRPPPWRBSK',
    'date': 'Wed, 06 Apr 2022 16:29:38 GMT',
    'content-type': 'application/xml',
    'transfer-encoding': 'chunked',
    'server': 'AmazonS3',
    'connection': 'close'},
   'RetryAttempts': 0},
  'Deleted': [{'Key': 'fcsq_processing/Adoption/Hearing_Adopt_Respondents/20220406_155753_00014_azqkv_eb92f812-8fb0-407e-9f2c-33dc70077db8',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'Q.MBV0vbgbEdupyDYJigDriNf0Z47Qns'},
   {'Key': 'fcsq_processing/Adoption/Hearing_Adopt_Respondents/20220406_155753_00014_azqkv_b6bfadbd-d975-43ac-991d-c4fefa9fe482',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'AuA3naoPa9WfFtEQW3Qt.6Bj7FjWu.4r'},
   {'Key': 'fcs

### Create the Hearing_Adopt_Respondents table in Athena

In [38]:
create_Hearing_Adopt_Respondents = f"""
CREATE TABLE IF NOT EXISTS fcsq.Hearing_Adopt_Respondents
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/Hearing_Adopt_Respondents') AS
    SELECT t1.*,
    t2.Case_Number_ID AS Hearing_Count
    FROM __temp__.ADOPT_RESP_REP_FINAL t1
    LEFT JOIN __temp__.Adopt_Hearings_Cases_v2 t2 ON (t1.CASE_NUMBER = t2.CASE_NUMBER)
    where t2.Case_Number_ID > 0;
"""

pydb.start_query_execution_and_wait(create_Hearing_Adopt_Respondents)



{'QueryExecutionId': '6d25aab9-dc9c-4c04-800e-b4a03f8e18b2',
 'Query': "CREATE TABLE IF NOT EXISTS fcsq.Hearing_Adopt_Respondents\nWITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/Hearing_Adopt_Respondents') AS\n    SELECT t1.*,\n    t2.Case_Number_ID AS Hearing_Count\n    FROM mojap_de_temp_alpha_user_thomasauburnmoj.ADOPT_RESP_REP_FINAL t1\n    LEFT JOIN mojap_de_temp_alpha_user_thomasauburnmoj.Adopt_Hearings_Cases_v2 t2 ON (t1.CASE_NUMBER = t2.CASE_NUMBER)\n    where t2.Case_Number_ID > 0",
 'StatementType': 'DDL',
 'ResultConfiguration': {'OutputLocation': 's3://aws-athena-query-results-593291632749-eu-west-1/tables/6d25aab9-dc9c-4c04-800e-b4a03f8e18b2'},
 'QueryExecutionContext': {},
 'Status': {'State': 'SUCCEEDED',
  'SubmissionDateTime': datetime.datetime(2022, 4, 6, 16, 29, 39, 383000, tzinfo=tzlocal()),
  'CompletionDateTime': datetime.datetime(2022, 4, 6, 16, 29, 42, 381000, tzinfo=tzlocal())},
 'Statistics': {'EngineExecutionTim

#### Hearing_Adopt_Respondents validation

In [39]:
Hearing_Adopt_Respondents_count = pydb.read_sql_query("select count(*) as count from fcsq.Hearing_Adopt_Respondents")
Hearing_Adopt_Respondents_count

Unnamed: 0,count
0,94976


## Adopt_App table

### Drop the Adopt_App table if it already exists and remove its data from the S3 bucket

In [40]:
drop_Adopt_App = f"""
DROP TABLE IF EXISTS fcsq.Adopt_App;
"""
pydb.start_query_execution_and_wait(drop_Adopt_App)

# clean up previous Adopt_App files
bucket.objects.filter(Prefix="fcsq_processing/Adoption/Adopt_App/").delete()

[{'ResponseMetadata': {'RequestId': '24EYY1GJR5MXZBE2',
   'HostId': '3i648leJ6j+IDfvOhRf0WqPu/5PUv1K2tnWkBHcG5+5XD8FINFxJjPiealLJIoMfDtD+nCR4dq0=',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'x-amz-id-2': '3i648leJ6j+IDfvOhRf0WqPu/5PUv1K2tnWkBHcG5+5XD8FINFxJjPiealLJIoMfDtD+nCR4dq0=',
    'x-amz-request-id': '24EYY1GJR5MXZBE2',
    'date': 'Wed, 06 Apr 2022 16:29:51 GMT',
    'content-type': 'application/xml',
    'transfer-encoding': 'chunked',
    'server': 'AmazonS3',
    'connection': 'close'},
   'RetryAttempts': 0},
  'Deleted': [{'Key': 'fcsq_processing/Adoption/Adopt_App/20220406_155807_00018_dvwcd_c3f308ee-8c31-455a-8ec1-6eb1c1b30b41',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'AkFKw26eVKb_ePFPhztCtraS.ja78TLa'},
   {'Key': 'fcsq_processing/Adoption/Adopt_App/20220406_155807_00018_dvwcd_bca86a9f-2746-4aa4-a81c-88e7598614be',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'HuiOX.0rLIqFlcBiYQmby68LrH0PVacu'},
   {'Key': 'fcsq_processing/Adoption/Adopt_App/

### Create the Adopt_App table in Athena

In [41]:
create_Adopt_App = f"""
CREATE TABLE IF NOT EXISTS fcsq.Adopt_App
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/Adopt_App') AS
SELECT
  'Adoption' AS CASE_TYPE,
  Year,
  Quarter,
  'Party' AS Category,
  'Applicant' AS PARTY,
   App_Gender AS Gender,
  Representation,
  Count (*) AS Count
FROM
  fcsq.HEARING_ADOPT_APPLICANTS
WHERE 
  Representation <> '' /*A very small number of cases from 2011/12 look into whether these should be recoded as N (gender is also blank)*/
GROUP BY
  'Adoption',
  Year,
  Quarter,
  'Party',
  'Applicant',
  App_Gender,
  Representation;
"""

pydb.start_query_execution_and_wait(create_Adopt_App)



{'QueryExecutionId': 'be74bb43-d2e3-4ff3-b9b0-81e3b67f6cf8',
 'Query': "CREATE TABLE IF NOT EXISTS fcsq.Adopt_App\nWITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/Adopt_App') AS\nSELECT\n  'Adoption' AS CASE_TYPE,\n  Year,\n  Quarter,\n  'Party' AS Category,\n  'Applicant' AS PARTY,\n   App_Gender AS Gender,\n  Representation,\n  Count (*) AS Count\nFROM\n  fcsq.HEARING_ADOPT_APPLICANTS\nWHERE \n  Representation <> '' /*A very small number of cases from 2011/12 look into whether these should be recoded as N (gender is also blank)*/\nGROUP BY\n  'Adoption',\n  Year,\n  Quarter,\n  'Party',\n  'Applicant',\n  App_Gender,\n  Representation",
 'StatementType': 'DDL',
 'ResultConfiguration': {'OutputLocation': 's3://aws-athena-query-results-593291632749-eu-west-1/tables/be74bb43-d2e3-4ff3-b9b0-81e3b67f6cf8'},
 'QueryExecutionContext': {},
 'Status': {'State': 'SUCCEEDED',
  'SubmissionDateTime': datetime.datetime(2022, 4, 6, 16, 29, 52, 621000,

#### Adopt_App validation

In [42]:
Adopt_App_count = pydb.read_sql_query("select count(*) as count from fcsq.Adopt_App")
Adopt_App_count

Unnamed: 0,count
0,221


## Adopt_resp table

### Drop the Adopt_resp table if it already exists and remove its data from the S3 bucket

In [43]:
drop_Adopt_resp = f"""
DROP TABLE IF EXISTS fcsq.Adopt_resp;
"""
pydb.start_query_execution_and_wait(drop_Adopt_resp)

# clean up previous Adopt_resp files
bucket.objects.filter(Prefix="fcsq_processing/Adoption/Adopt_resp/").delete()

[{'ResponseMetadata': {'RequestId': 'XSNTWX6KPEEC00FN',
   'HostId': 'yelw9k4s0NTDvh8ouhnj2tZgb5SZSJAR/C2k3jgHL8la4p3oumuKTbGYrLZqewNYMbL3xcgOpXQ=',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'x-amz-id-2': 'yelw9k4s0NTDvh8ouhnj2tZgb5SZSJAR/C2k3jgHL8la4p3oumuKTbGYrLZqewNYMbL3xcgOpXQ=',
    'x-amz-request-id': 'XSNTWX6KPEEC00FN',
    'date': 'Wed, 06 Apr 2022 16:30:04 GMT',
    'content-type': 'application/xml',
    'transfer-encoding': 'chunked',
    'server': 'AmazonS3',
    'connection': 'close'},
   'RetryAttempts': 0},
  'Deleted': [{'Key': 'fcsq_processing/Adoption/Adopt_resp/20220406_155821_00014_dvdih_9e651853-f78d-479c-a92f-d8fcdc33ba7c',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'NIZyfXpKUiW5bP6oUf07VjJnMT2qo64C'},
   {'Key': 'fcsq_processing/Adoption/Adopt_resp/20220406_155821_00014_dvdih_cfa44764-1013-4ee3-a7a8-7526f055b567',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'f65qUNKsvUC_llf8DPmwtTvxljnOoiQT'},
   {'Key': 'fcsq_processing/Adoption/Adopt_re

### Create the Adopt_resp table in Athena

In [44]:
create_Adopt_resp = f"""
CREATE TABLE IF NOT EXISTS fcsq.Adopt_resp
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/Adopt_resp') AS
SELECT
  'Adoption' AS CASE_TYPE,
  Year,
  Quarter,
  'Party' AS Category,
  'Respondent' AS PARTY,
  Resp_Gender AS Gender,
  Representation,
  Count (*) AS Count
FROM
  fcsq.HEARING_ADOPT_RESPONDENTS
WHERE 
  Representation <> '' /*A very small number of cases from 2011/12 look into whether these should be recoded as N (gender is also blank)*/
GROUP BY
  'Adoption',
  Year,
  Quarter,
  'Party',
  'Applicant',
  Resp_Gender,
  Representation;
"""

pydb.start_query_execution_and_wait(create_Adopt_resp)



{'QueryExecutionId': 'ec5e2cbf-6704-4c0b-b372-7d7a28927beb',
 'Query': "CREATE TABLE IF NOT EXISTS fcsq.Adopt_resp\nWITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/Adopt_resp') AS\nSELECT\n  'Adoption' AS CASE_TYPE,\n  Year,\n  Quarter,\n  'Party' AS Category,\n  'Respondent' AS PARTY,\n  Resp_Gender AS Gender,\n  Representation,\n  Count (*) AS Count\nFROM\n  fcsq.HEARING_ADOPT_RESPONDENTS\nWHERE \n  Representation <> '' /*A very small number of cases from 2011/12 look into whether these should be recoded as N (gender is also blank)*/\nGROUP BY\n  'Adoption',\n  Year,\n  Quarter,\n  'Party',\n  'Applicant',\n  Resp_Gender,\n  Representation",
 'StatementType': 'DDL',
 'ResultConfiguration': {'OutputLocation': 's3://aws-athena-query-results-593291632749-eu-west-1/tables/ec5e2cbf-6704-4c0b-b372-7d7a28927beb'},
 'QueryExecutionContext': {},
 'Status': {'State': 'SUCCEEDED',
  'SubmissionDateTime': datetime.datetime(2022, 4, 6, 16, 30, 5, 223

#### Adopt_resp validation

In [45]:
Adopt_resp_count = pydb.read_sql_query("select count(*) as count from fcsq.Adopt_resp")
Adopt_resp_count

Unnamed: 0,count
0,233


## adopt_case table

### Drop the adopt_case table if it already exists and remove its data from the S3 bucket

In [46]:
drop_adopt_case = f"""
DROP TABLE IF EXISTS fcsq.adopt_case;
"""
pydb.start_query_execution_and_wait(drop_adopt_case)

# clean up previous adopt_case files
bucket.objects.filter(Prefix="fcsq_processing/Adoption/Adopt_case/").delete()

[{'ResponseMetadata': {'RequestId': 'G3EDVENE41DV40KK',
   'HostId': 'tx69Tv3XISaBIdYXiy+Cjt2csNmZfENAAZGH4CzL7jqmbUYS+IBdX2zUNSMDhJeRmoGLSk87x/A=',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'x-amz-id-2': 'tx69Tv3XISaBIdYXiy+Cjt2csNmZfENAAZGH4CzL7jqmbUYS+IBdX2zUNSMDhJeRmoGLSk87x/A=',
    'x-amz-request-id': 'G3EDVENE41DV40KK',
    'date': 'Wed, 06 Apr 2022 16:30:16 GMT',
    'content-type': 'application/xml',
    'transfer-encoding': 'chunked',
    'server': 'AmazonS3',
    'connection': 'close'},
   'RetryAttempts': 0},
  'Deleted': [{'Key': 'fcsq_processing/Adoption/Adopt_case/20220406_155930_00014_hzgue_b4b7e8f2-ab39-4117-b691-8fbb2b5d5a62',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'f9.oPSERh0EoQvMTmzv7lDy4krDfasSU'},
   {'Key': 'fcsq_processing/Adoption/Adopt_case/20220406_155930_00014_hzgue_f10f6637-7249-4c43-996b-e1feccca53fc',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': '5CxLsxj9UjEYqF0J4UexmJ_F2csfCWbM'},
   {'Key': 'fcsq_processing/Adoption/Adopt_ca

### Create the adopt_case table in Athena

In [47]:
create_adopt_case = f"""
CREATE TABLE IF NOT EXISTS fcsq.Adopt_case
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/Adopt_case') AS
SELECT *,
    Count(*) as Count FROM
    (SELECT
        'Adoption' AS CASE_TYPE,
        Year,
        Quarter,
        'Cases' AS Category,
        'N/A' AS PARTY,
        'N/A' AS Gender,
        'N/A' AS Representation
    FROM
      __temp__.adopt_case_data_v3)
GROUP BY
  CASE_TYPE,
  Year,
  Quarter,
  Category,
  PARTY,
  Gender,
  Representation;
"""

pydb.start_query_execution_and_wait(create_adopt_case)



{'QueryExecutionId': '578a4306-7ed3-4b36-a787-6092aaa17b6e',
 'Query': "CREATE TABLE IF NOT EXISTS fcsq.Adopt_case\nWITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/Adopt_case') AS\nSELECT *,\n    Count(*) as Count FROM\n    (SELECT\n        'Adoption' AS CASE_TYPE,\n        Year,\n        Quarter,\n        'Cases' AS Category,\n        'N/A' AS PARTY,\n        'N/A' AS Gender,\n        'N/A' AS Representation\n    FROM\n      mojap_de_temp_alpha_user_thomasauburnmoj.adopt_case_data_v3)\nGROUP BY\n  CASE_TYPE,\n  Year,\n  Quarter,\n  Category,\n  PARTY,\n  Gender,\n  Representation",
 'StatementType': 'DDL',
 'ResultConfiguration': {'OutputLocation': 's3://aws-athena-query-results-593291632749-eu-west-1/tables/578a4306-7ed3-4b36-a787-6092aaa17b6e'},
 'QueryExecutionContext': {},
 'Status': {'State': 'SUCCEEDED',
  'SubmissionDateTime': datetime.datetime(2022, 4, 6, 16, 30, 17, 90000, tzinfo=tzlocal()),
  'CompletionDateTime': datetime.datet

#### adopt_case validation

In [48]:
adopt_case_count = pydb.read_sql_query("select count(*) as count from fcsq.adopt_case")
adopt_case_count

Unnamed: 0,count
0,43


## Adopt_Case_Hearings table

### Drop the Adopt_Case_Hearings table if it already exists and remove its data from the S3 bucket

In [49]:
drop_Adopt_Case_Hearings = f"""
DROP TABLE IF EXISTS fcsq.Adopt_Case_Hearings;
"""
pydb.start_query_execution_and_wait(drop_Adopt_Case_Hearings)

# clean up previous Adopt_Case_Hearings files
bucket.objects.filter(Prefix="fcsq_processing/Adoption/Adopt_Case_Hearings/").delete()

[{'ResponseMetadata': {'RequestId': 'Q0QP6Z3FMK08V17T',
   'HostId': 'tbDBaocM8gCO3XtpgDXH6UBs8nm5Mjn4Q6avb8qYVBsIMjTD7e+cy+ZNqJInUXMw6KBqgh6dxz0=',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'x-amz-id-2': 'tbDBaocM8gCO3XtpgDXH6UBs8nm5Mjn4Q6avb8qYVBsIMjTD7e+cy+ZNqJInUXMw6KBqgh6dxz0=',
    'x-amz-request-id': 'Q0QP6Z3FMK08V17T',
    'date': 'Wed, 06 Apr 2022 16:30:28 GMT',
    'content-type': 'application/xml',
    'transfer-encoding': 'chunked',
    'server': 'AmazonS3',
    'connection': 'close'},
   'RetryAttempts': 0},
  'Deleted': [{'Key': 'fcsq_processing/Adoption/Adopt_Case_Hearings/20220406_160004_00119_5829e_141c4e2e-437c-4beb-9411-74768150aa45',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'q0UXwRPqV3IA__zCX1hxkb7lspTOyAI6'},
   {'Key': 'fcsq_processing/Adoption/Adopt_Case_Hearings/20220406_160004_00119_5829e_ff4da768-e316-4c04-b3c3-e0e442632d59',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'tKTiiqPlvAbJYSrJZsLuKO_uVJcsZVOD'},
   {'Key': 'fcsq_processing

### Create the Adopt_Case_Hearings table in Athena

In [50]:
create_hearing_adopt_case =f"""
SELECT DISTINCT Year, Quarter, Case_Number
FROM fcsq.HEARING_ADOPT_Applicants;
"""

pydb.create_temp_table(create_hearing_adopt_case,'hearing_adopt_case')



create_Adopt_Case_Hearings = f"""
CREATE TABLE IF NOT EXISTS fcsq.Adopt_Case_Hearings
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/Adopt_Case_Hearings') AS
SELECT *, Count(*) as Count FROM
    (SELECT
      'Adoption' AS CASE_TYPE,
      Year,
      Quarter,
      'Cases with a hearing' AS Category,
      'N/A' AS PARTY,
      'N/A' AS Gender,
      'N/A' AS Representation
    FROM
      __temp__.Hearing_ADOPT_Case)
GROUP BY
  CASE_TYPE,
  Year,
  Quarter,
  Category,
  PARTY,
  Gender,
  Representation;
"""

pydb.start_query_execution_and_wait(create_Adopt_Case_Hearings)



{'QueryExecutionId': '4a145426-76ad-407d-85dd-c0985cc64278',
 'Query': "CREATE TABLE IF NOT EXISTS fcsq.Adopt_Case_Hearings\nWITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/Adopt_Case_Hearings') AS\nSELECT *, Count(*) as Count FROM\n    (SELECT\n      'Adoption' AS CASE_TYPE,\n      Year,\n      Quarter,\n      'Cases with a hearing' AS Category,\n      'N/A' AS PARTY,\n      'N/A' AS Gender,\n      'N/A' AS Representation\n    FROM\n      mojap_de_temp_alpha_user_thomasauburnmoj.Hearing_ADOPT_Case)\nGROUP BY\n  CASE_TYPE,\n  Year,\n  Quarter,\n  Category,\n  PARTY,\n  Gender,\n  Representation",
 'StatementType': 'DDL',
 'ResultConfiguration': {'OutputLocation': 's3://aws-athena-query-results-593291632749-eu-west-1/tables/4a145426-76ad-407d-85dd-c0985cc64278'},
 'QueryExecutionContext': {},
 'Status': {'State': 'SUCCEEDED',
  'SubmissionDateTime': datetime.datetime(2022, 4, 6, 16, 30, 38, 146000, tzinfo=tzlocal()),
  'CompletionDateTime':

#### Adopt_Case_Hearings validation

In [51]:
Adopt_Case_Hearings_count = pydb.read_sql_query("select count(*) as count from fcsq.Adopt_Case_Hearings")
Adopt_Case_Hearings_count

Unnamed: 0,count
0,43


## Adoption table

### Drop the Adoption table if it already exists and remove its data from the S3 bucket

In [52]:
drop_Adoption = f"""
DROP TABLE IF EXISTS fcsq.Adoption;
"""
pydb.start_query_execution_and_wait(drop_Adoption)

# clean up previous Adoption files
bucket.objects.filter(Prefix="fcsq_processing/Adoption/Adoption/").delete()

[{'ResponseMetadata': {'RequestId': 'V5HMB2KBYYBQP3C6',
   'HostId': 'FYFMtOuzoSrF96dnIcj3EkeVkr1y9GdVRUpDQ/BGFp/vxsLi3GAUD4q7g8NH65nAarx8aiNvVQI=',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'x-amz-id-2': 'FYFMtOuzoSrF96dnIcj3EkeVkr1y9GdVRUpDQ/BGFp/vxsLi3GAUD4q7g8NH65nAarx8aiNvVQI=',
    'x-amz-request-id': 'V5HMB2KBYYBQP3C6',
    'date': 'Wed, 06 Apr 2022 16:30:50 GMT',
    'content-type': 'application/xml',
    'transfer-encoding': 'chunked',
    'server': 'AmazonS3',
    'connection': 'close'},
   'RetryAttempts': 0},
  'Deleted': [{'Key': 'fcsq_processing/Adoption/Adoption/20220406_160039_00016_w9dmt_244252a5-6f83-4f42-8003-2ff23de31e8b',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'eqZIvr8M8TOYk4Lnk5D17OkQqb.zZZVf'},
   {'Key': 'fcsq_processing/Adoption/Adoption/20220406_160039_00016_w9dmt_b30fed24-9128-4696-8b70-d88036ece619',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'lTbUyKzNILSWWj1q7CedopsJakELF2uv'},
   {'Key': 'fcsq_processing/Adoption/Adoption/202

### Create the Adoption table in Athena

In [53]:
create_Adoption = f"""
CREATE TABLE IF NOT EXISTS fcsq.Adoption
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/Adoption') AS
SELECT
  *
FROM
 fcsq.ADOPT_APP
UNION ALL
SELECT
  *
FROM
  fcsq.ADOPT_RESP
UNION ALL
SELECT
  *
FROM
  fcsq.ADOPT_CASE
UNION ALL
SELECT
  *
FROM
  fcsq.ADOPT_CASE_HEARINGS;
"""

pydb.start_query_execution_and_wait(create_Adoption)



{'QueryExecutionId': '113acb0a-b6e0-46b6-89e3-8f2070680d9b',
 'Query': "CREATE TABLE IF NOT EXISTS fcsq.Adoption\nWITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/Adoption') AS\nSELECT\n  *\nFROM\n fcsq.ADOPT_APP\nUNION ALL\nSELECT\n  *\nFROM\n  fcsq.ADOPT_RESP\nUNION ALL\nSELECT\n  *\nFROM\n  fcsq.ADOPT_CASE\nUNION ALL\nSELECT\n  *\nFROM\n  fcsq.ADOPT_CASE_HEARINGS",
 'StatementType': 'DDL',
 'ResultConfiguration': {'OutputLocation': 's3://aws-athena-query-results-593291632749-eu-west-1/tables/113acb0a-b6e0-46b6-89e3-8f2070680d9b'},
 'QueryExecutionContext': {},
 'Status': {'State': 'SUCCEEDED',
  'SubmissionDateTime': datetime.datetime(2022, 4, 6, 16, 30, 51, 388000, tzinfo=tzlocal()),
  'CompletionDateTime': datetime.datetime(2022, 4, 6, 16, 30, 53, 400000, tzinfo=tzlocal())},
 'Statistics': {'EngineExecutionTimeInMillis': 1835,
  'DataScannedInBytes': 10418,
  'DataManifestLocation': 's3://aws-athena-query-results-593291632749-eu-west-1

#### Adoption validation

In [54]:
Adoption_count = pydb.read_sql_query("select count(*) as count from fcsq.Adoption")
Adoption_count

Unnamed: 0,count
0,540


## Applicant_representation table

### Drop the Applicant_representation table if it already exists and remove its data from the S3 bucket

In [55]:
drop_Applicant_representation = f"""
DROP TABLE IF EXISTS fcsq.Applicant_representation;
"""
pydb.start_query_execution_and_wait(drop_Applicant_representation)

# clean up previous Applicant_representation files
bucket.objects.filter(Prefix="fcsq_processing/Adoption/Applicant_representation/").delete()

[{'ResponseMetadata': {'RequestId': 'QDY15P2WAMZGCESD',
   'HostId': 'jly/anVdp/JOA1BJIgJuw/s1dTbKuWHdhBjfRmOYGAwHrzfBiVsHEoxadEskbQgHmt8u8MXJ7sc=',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'x-amz-id-2': 'jly/anVdp/JOA1BJIgJuw/s1dTbKuWHdhBjfRmOYGAwHrzfBiVsHEoxadEskbQgHmt8u8MXJ7sc=',
    'x-amz-request-id': 'QDY15P2WAMZGCESD',
    'date': 'Wed, 06 Apr 2022 16:31:02 GMT',
    'content-type': 'application/xml',
    'transfer-encoding': 'chunked',
    'server': 'AmazonS3',
    'connection': 'close'},
   'RetryAttempts': 0},
  'Deleted': [{'Key': 'fcsq_processing/Adoption/Applicant_representation/20220406_160218_00086_mctx6_10810180-f396-4b4d-9af8-44f7ac85e940',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'BnM2vktXWIv8bW4KUO8rNbR0lrFAhF5u'},
   {'Key': 'fcsq_processing/Adoption/Applicant_representation/20220406_160218_00086_mctx6_2e2f0bba-8cf5-405f-b593-8b10e074c69e',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'YDNc1qYuTV6N0bpKPzjTBVpwHGzJUSqe'},
   {'Key': 'fcsq_

### Create the Applicant_representation table in Athena

In [56]:
create_applicants_1 = f"""
SELECT Distinct t1.Case_Number, t1.Party, MAX(t1.Representative_Role) as Max_Rep_Role
FROM fcsq.Adopt_Applicant_Info t1
Group by Case_Number, Party;
"""

pydb.create_temp_table(create_applicants_1,'applicants_1')

create_applicants_2 = f"""
SELECT  t1.*,
case when Max_Rep_Role IS NULL then 0
else 1
end as Rep_IND
FROM __temp__.Applicants_1 t1;
"""
pydb.create_temp_table(create_applicants_2,'applicants_2')

create_applicants_3 = f"""
SELECT Distinct t1.Case_Number,
Count(t1.Party) as CountOfParty,
SUM(t1.Rep_Ind) as SumOfRep_IND
FROM __temp__.Applicants_2 t1
Group by Case_Number;
"""
pydb.create_temp_table(create_applicants_3,'applicants_3')

create_Applicant_representation = f"""
CREATE TABLE IF NOT EXISTS fcsq.Applicant_representation
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/Applicant_representation') AS
SELECT t1.Case_Number,
t1.CountOfParty,
t1.SumOfRep_IND,
CASE WHEN t1.SumOfRep_Ind > t1.CountOfParty then 'Error'
WHEN t1.SumOfRep_Ind = t1.CountOfParty then 'All'
WHEN t1.SumOfRep_Ind =0 then 'None' else 'Some'  end as App_Rep_Cat
FROM __temp__.Applicants_3 t1;
"""

pydb.start_query_execution_and_wait(create_Applicant_representation)



{'QueryExecutionId': 'c81baf9e-7867-4568-a6e1-02b948d94da5',
 'Query': "CREATE TABLE IF NOT EXISTS fcsq.Applicant_representation\nWITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/Applicant_representation') AS\nSELECT t1.Case_Number,\nt1.CountOfParty,\nt1.SumOfRep_IND,\nCASE WHEN t1.SumOfRep_Ind > t1.CountOfParty then 'Error'\nWHEN t1.SumOfRep_Ind = t1.CountOfParty then 'All'\nWHEN t1.SumOfRep_Ind =0 then 'None' else 'Some'  end as App_Rep_Cat\nFROM mojap_de_temp_alpha_user_thomasauburnmoj.Applicants_3 t1",
 'StatementType': 'DDL',
 'ResultConfiguration': {'OutputLocation': 's3://aws-athena-query-results-593291632749-eu-west-1/tables/c81baf9e-7867-4568-a6e1-02b948d94da5'},
 'QueryExecutionContext': {},
 'Status': {'State': 'SUCCEEDED',
  'SubmissionDateTime': datetime.datetime(2022, 4, 6, 16, 31, 27, 503000, tzinfo=tzlocal()),
  'CompletionDateTime': datetime.datetime(2022, 4, 6, 16, 31, 29, 711000, tzinfo=tzlocal())},
 'Statistics': {'Engin

#### Applicant_representation validation

In [57]:
Applicant_representation_count = pydb.read_sql_query("select count(*) as count from fcsq.Applicant_representation")
Applicant_representation_count

Unnamed: 0,count
0,180013


## Respondent_Representation table

### Drop the Respondent_Representation table if it already exists and remove its data from the S3 bucket

In [58]:
drop_Respondent_Representation = f"""
DROP TABLE IF EXISTS fcsq.Respondent_Representation;
"""
pydb.start_query_execution_and_wait(drop_Respondent_Representation)

# clean up previous Respondent_Representation files
bucket.objects.filter(Prefix="fcsq_processing/Adoption/Respondent_Representation/").delete()

[{'ResponseMetadata': {'RequestId': 'R5T5JYQHC63MSYKW',
   'HostId': 'iWaQn6ThdQFBSbl2f6EUZOWhxTHX93Yi96OaWnVLrX7N2/ZVv76bqLHF36WHzpOPZ+6y1Jy58A4=',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'x-amz-id-2': 'iWaQn6ThdQFBSbl2f6EUZOWhxTHX93Yi96OaWnVLrX7N2/ZVv76bqLHF36WHzpOPZ+6y1Jy58A4=',
    'x-amz-request-id': 'R5T5JYQHC63MSYKW',
    'date': 'Wed, 06 Apr 2022 16:31:39 GMT',
    'content-type': 'application/xml',
    'transfer-encoding': 'chunked',
    'server': 'AmazonS3',
    'connection': 'close'},
   'RetryAttempts': 0},
  'Deleted': [{'Key': 'fcsq_processing/Adoption/Respondent_Representation/20220406_160339_00025_yheyr_57bca47a-0df5-4e3a-aba4-4607b211c24a',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': '94fibNHyyH0kzfpIuqX3WF3v4t3n.opy'},
   {'Key': 'fcsq_processing/Adoption/Respondent_Representation/20220406_160339_00025_yheyr_9799d36c-d44c-4bbe-b798-7b7f55966ace',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'e8fq00XSIneTMSO.WHZwzSm8GeZ2LyPh'},
   {'Key': 'fcs

### Create the Respondent_Representation table in Athena

In [59]:
create_respondents_1 = f"""
SELECT Distinct t1.Case_Number, t1.Party, MAX(t1.Representative_Role) as Max_Rep_Role
FROM fcsq.Adopt_Respondent_Info t1
Group by Case_Number, Party;
"""

pydb.create_temp_table(create_respondents_1,'respondents_1')

create_respondents_2 = f"""
SELECT  t1.*,
case when Max_Rep_Role IS NULL then 0
else 1
end as Rep_IND
FROM __temp__.respondents_1 t1;
"""
pydb.create_temp_table(create_respondents_2,'respondents_2')

create_respondents_3 = f"""
SELECT Distinct t1.Case_Number,
Count(t1.Party) as CountOfParty,
SUM(t1.Rep_Ind) as SumOfRep_IND
FROM __temp__.respondents_2 t1
Group by Case_Number;
"""
pydb.create_temp_table(create_respondents_3,'respondents_3')

create_Respondent_Representation = f"""
CREATE TABLE IF NOT EXISTS fcsq.Respondent_Representation
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/Respondent_Representation') AS
SELECT t1.Case_Number,
t1.CountOfParty,
t1.SumOfRep_IND,
CASE WHEN t1.SumOfRep_Ind > t1.CountOfParty then 'Error'
WHEN t1.SumOfRep_Ind = t1.CountOfParty then 'All'
WHEN t1.SumOfRep_Ind =0 then 'None' else 'Some'  end as Res_Rep_Cat
FROM __temp__.Respondents_3 t1
"""

pydb.start_query_execution_and_wait(create_Respondent_Representation)



{'QueryExecutionId': '9994ff79-e10b-4226-b3b1-60aae3622362',
 'Query': "CREATE TABLE IF NOT EXISTS fcsq.Respondent_Representation\nWITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/Respondent_Representation') AS\nSELECT t1.Case_Number,\nt1.CountOfParty,\nt1.SumOfRep_IND,\nCASE WHEN t1.SumOfRep_Ind > t1.CountOfParty then 'Error'\nWHEN t1.SumOfRep_Ind = t1.CountOfParty then 'All'\nWHEN t1.SumOfRep_Ind =0 then 'None' else 'Some'  end as Res_Rep_Cat\nFROM mojap_de_temp_alpha_user_thomasauburnmoj.Respondents_3 t1",
 'StatementType': 'DDL',
 'ResultConfiguration': {'OutputLocation': 's3://aws-athena-query-results-593291632749-eu-west-1/tables/9994ff79-e10b-4226-b3b1-60aae3622362'},
 'QueryExecutionContext': {},
 'Status': {'State': 'SUCCEEDED',
  'SubmissionDateTime': datetime.datetime(2022, 4, 6, 16, 32, 5, 188000, tzinfo=tzlocal()),
  'CompletionDateTime': datetime.datetime(2022, 4, 6, 16, 32, 7, 204000, tzinfo=tzlocal())},
 'Statistics': {'Engi

#### Respondent_Representation validation

In [60]:
Respondent_Representation_count = pydb.read_sql_query("select count(*) as count from fcsq.Respondent_Representation")
Respondent_Representation_count

Unnamed: 0,count
0,164479


## Adopt_Disposals_Final table

In [61]:
create_ADOPT_APP_AND_ORDERS_WITH_REP = f"""
SELECT t1.*,
t2.APP_REP_CAT, 
t3.RES_REP_CAT
FROM fcsq.ADOPT_APPS_AND_ORDERS_MATCH AS t1
LEFT JOIN fcsq.Applicant_Representation AS t2 ON (t1.CASE_NUMBER = t2.CASE_NUMBER)
LEFT JOIN fcsq.RESPONDENT_REPRESENTATION as t3 ON t1.CASE_NUMBER = t3.CASE_NUMBER;
"""
pydb.create_temp_table(create_ADOPT_APP_AND_ORDERS_WITH_REP,'ADOPT_APP_AND_ORDERS_WITH_REP')

In [62]:
pydb.read_sql_query("SELECT Count(*) as COUNT from __temp__.adopt_app_and_orders_with_rep")

Unnamed: 0,count
0,147875


In [63]:
create_Adopt_Disposals_Final = f"""
SELECT t1.*,
cast(t1.Year as varchar(3)) || '-Q' || cast(t1.quarter as varchar(3)) AS Quarter2, 
case when (t1.APP_REP_CAT Is Null Or t1.RES_REP_CAT Is Null)  then '5 Unknown'
    when t1.APP_REP_CAT='None' and t1.RES_REP_CAT ='None' then '4 Neither'
    when t1.APP_REP_CAT='None' and t1.RES_REP_CAT != 'None' then '3 Respondent Only'
    when t1.APP_REP_CAT != 'None' and t1.RES_REP_CAT = 'None' then '2 Applicant Only'
Else '1 Both'
End AS REP_CAT

FROM __temp__.ADOPT_APP_AND_ORDERS_WITH_REP AS t1 LEFT JOIN (SELECT * FROM {database}.courts_mv WHERE {database}.courts_mv.mojap_snapshot_date = date '{snapshot_date}')
AS t2 
ON t1.DSP_COURT = cast(t2.Code as varchar(3));

"""

pydb.create_temp_table(create_Adopt_Disposals_Final,'Adopt_Disposals_Final')

"""
Missed out these columns for now:

t2.Region_Pre2014, 
t2.Region,
Case when t1.YEAR < 2014 then t2.Region_Pre2014
Else t2.Region
End As Final_Region

"""

create_Adopt_Disposals_Final_2 = f"""
SELECT *
FROM __temp__.ADOPT_DISPOSALS_FINAL
WHERE adoption = 'Adoption';
"""

pydb.create_temp_table(create_Adopt_Disposals_Final_2,'Adopt_Disposals_Final_2')

#### Adopt_Disposals_Final validation

In [64]:
Adopt_Disposals_Final_count = pydb.read_sql_query("select count(*) as count from __temp__.Adopt_Disposals_Final_2")
Adopt_Disposals_Final_count

Unnamed: 0,count
0,82011


## Adopt_Quarterly table

### Drop the Adopt_Quarterly table if it already exists and remove its data from the S3 bucket

In [65]:
drop_Adopt_Quarterly = f"""
DROP TABLE IF EXISTS fcsq.Adopt_Quarterly;
"""
pydb.start_query_execution_and_wait(drop_Adopt_Quarterly)

# clean up previous Adopt_Quarterly files
bucket.objects.filter(Prefix="fcsq_processing/Adoption/Adopt_Quarterly/").delete()

[{'ResponseMetadata': {'RequestId': '8K3XM86RZDBQVEDF',
   'HostId': '69NepMSyEheLehLNoUPqAAdMiNHBlbc4WVIo9+SAvb61WHWX1F8kFFzmtyGqYti1nLZm0S6Te0g=',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'x-amz-id-2': '69NepMSyEheLehLNoUPqAAdMiNHBlbc4WVIo9+SAvb61WHWX1F8kFFzmtyGqYti1nLZm0S6Te0g=',
    'x-amz-request-id': '8K3XM86RZDBQVEDF',
    'date': 'Wed, 06 Apr 2022 16:32:54 GMT',
    'content-type': 'application/xml',
    'transfer-encoding': 'chunked',
    'server': 'AmazonS3',
    'connection': 'close'},
   'RetryAttempts': 0},
  'Deleted': [{'Key': 'fcsq_processing/Adoption/Adopt_Quarterly/20220406_160607_00029_zdudd_7e40c586-ef48-48fb-8051-eeaebb6d6d06',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'l14nlgqTlDqnjn_S0K7xutNsIzzm9jOh'},
   {'Key': 'fcsq_processing/Adoption/Adopt_Quarterly/20220406_160607_00029_zdudd_60d2ff42-3aa0-4cc4-8e2e-7e2a5d4bbcaa',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'tjFQsoU4eA1YbfSL_UEPEWZ0vtoLne7o'},
   {'Key': 'fcsq_processing/Adoptio

### Create the Adopt_Quarterly table in Athena

In [66]:
create_Adopt_Quarterly = f"""
CREATE TABLE IF NOT EXISTS fcsq.Adopt_Quarterly
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/Adopt_Quarterly') AS
SELECT DISTINCT
        'Adoption' as type,
        year,
        'Q' || cast(quarter as varchar(3)) AS quarter,
        rep_cat,
        count(*) as n,
        avg(wait_weeks) as mean
    FROM 
        __temp__.ADOPT_DISPOSALS_FINAL_2
    WHERE year > 2010 
    AND quarter2 <> '2021-Q4'
    AND adoption='Adoption'
GROUP BY
    year,
    quarter,
    rep_cat
    
UNION ALL
SELECT DISTINCT
        'Adoption' as type,
        year,
        'Q' || cast(quarter as varchar(3)) AS quarter,
        'All' as rep_cat,
        count(*) as n,
        avg(wait_weeks) as mean
    FROM 
        __temp__.ADOPT_DISPOSALS_FINAL_2
    WHERE year > 2010 
    AND quarter2 <> '2021-Q4'
    AND adoption='Adoption'
GROUP BY
    year,
    quarter
"""


testA = f"""
SELECT * FROM __temp__.ADOPT_DISPOSALS_FINAL_2 WHERE year=2015 AND quarter = 3;
"""
testB = f"""
SELECT * FROM fcsq.Adopt_Quarterly WHERE year=2015 AND quarter = 'Q3' ORDER BY rep_cat
"""

pydb.start_query_execution_and_wait(create_Adopt_Quarterly)
#pydb.read_sql_query(testA)
pydb.read_sql_query(testB)





Unnamed: 0,type,year,quarter,rep_cat,n,mean
0,Adoption,2015,Q3,1 Both,16,28.3125
1,Adoption,2015,Q3,2 Applicant Only,59,20.949153
2,Adoption,2015,Q3,3 Respondent Only,44,27.613636
3,Adoption,2015,Q3,4 Neither,1325,14.77283
4,Adoption,2015,Q3,5 Unknown,15,20.133333
5,Adoption,2015,Q3,All,1459,15.613434


#### Adopt_Quarterly validation

In [67]:
Adopt_Quarterly_count = pydb.read_sql_query("select count(*) as count from fcsq.Adopt_Quarterly")
Adopt_Quarterly_count

pydb.read_sql_query("select * from fcsq.Adopt_Quarterly WHERE year = 2014 AND quarter = \'Q2' ORDER BY rep_cat")

Unnamed: 0,type,year,quarter,rep_cat,n,mean
0,Adoption,2014,Q2,1 Both,17,35.058824
1,Adoption,2014,Q2,2 Applicant Only,58,23.672414
2,Adoption,2014,Q2,3 Respondent Only,45,24.444444
3,Adoption,2014,Q2,4 Neither,1580,14.211392
4,Adoption,2014,Q2,5 Unknown,15,11.8
5,Adoption,2014,Q2,All,1715,14.985423


## Adopt_Annual table

### Drop the Adopt_Annual table if it already exists and remove its data from the S3 bucket

In [68]:
drop_Adopt_Annual = f"""
DROP TABLE IF EXISTS fcsq.Adopt_Annual;
"""
pydb.start_query_execution_and_wait(drop_Adopt_Annual)

# clean up previous Adopt_Annual files
bucket.objects.filter(Prefix="fcsq_processing/Adoption/Adopt_Annual/").delete()

[{'ResponseMetadata': {'RequestId': '1J3MJF9EYVVJHVQE',
   'HostId': 'ujhWjkWUEBGXoGPNsFqelJ1/oKX/N2z+74sPVJ0nCfXm8JUfMKZ3krANAK+5S1dPrb1WNDu/BEM=',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'x-amz-id-2': 'ujhWjkWUEBGXoGPNsFqelJ1/oKX/N2z+74sPVJ0nCfXm8JUfMKZ3krANAK+5S1dPrb1WNDu/BEM=',
    'x-amz-request-id': '1J3MJF9EYVVJHVQE',
    'date': 'Wed, 06 Apr 2022 16:33:16 GMT',
    'content-type': 'application/xml',
    'transfer-encoding': 'chunked',
    'server': 'AmazonS3',
    'connection': 'close'},
   'RetryAttempts': 0},
  'Deleted': [{'Key': 'fcsq_processing/Adoption/Adopt_Annual/20220406_160654_00062_n5f94_6abf0400-167e-4e05-ac59-0a6ddcfa5add',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'z_0HFWuHgekXREKBzsIBoHmmk8Ws0u5K'},
   {'Key': 'fcsq_processing/Adoption/Adopt_Annual/20220406_160654_00062_n5f94_f92461a6-88f6-4446-a1dc-1c6f5770a1a8',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': '9NvQCEF1J4HiDphIhlM6yQM68OJz4HN6'},
   {'Key': 'fcsq_processing/Adoption/Adop

### Create the Adopt_Annual table in Athena

In [69]:
create_Adopt_Annual = f"""
CREATE TABLE IF NOT EXISTS fcsq.Adopt_Annual
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/Adopt_Annual') AS
SELECT DISTINCT
        'Adoption' as type,
        year,
        'N/A' as quarter,
        rep_cat,
        count(*) as n,
        avg(wait_weeks) as mean
    FROM 
        __temp__.ADOPT_DISPOSALS_FINAL_2
    WHERE year > 2010 
    AND year < 2021
    AND adoption='Adoption'
GROUP BY
    year,
    rep_cat
    
UNION ALL
SELECT DISTINCT
        'Adoption' as type,
        year,
        'N/A' as quarter,
        'All' as rep_cat,
        count(*) as n,
        avg(wait_weeks) as mean
    FROM 
        __temp__.ADOPT_DISPOSALS_FINAL_2
    WHERE year > 2010 
    AND year < 2021
    AND adoption='Adoption'
GROUP BY
    year
"""

pydb.start_query_execution_and_wait(create_Adopt_Annual)



{'QueryExecutionId': '3e041272-6711-4042-979d-869868ea30f5',
 'Query': "CREATE TABLE IF NOT EXISTS fcsq.Adopt_Annual\nWITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/Adopt_Annual') AS\nSELECT DISTINCT\n        'Adoption' as type,\n        year,\n        'N/A' as quarter,\n        rep_cat,\n        count(*) as n,\n        avg(wait_weeks) as mean\n    FROM \n        mojap_de_temp_alpha_user_thomasauburnmoj.ADOPT_DISPOSALS_FINAL_2\n    WHERE year > 2010 \n    AND year < 2021\n    AND adoption='Adoption'\nGROUP BY\n    year,\n    rep_cat\n    \nUNION ALL\nSELECT DISTINCT\n        'Adoption' as type,\n        year,\n        'N/A' as quarter,\n        'All' as rep_cat,\n        count(*) as n,\n        avg(wait_weeks) as mean\n    FROM \n        mojap_de_temp_alpha_user_thomasauburnmoj.ADOPT_DISPOSALS_FINAL_2\n    WHERE year > 2010 \n    AND year < 2021\n    AND adoption='Adoption'\nGROUP BY\n    year",
 'StatementType': 'DDL',
 'ResultConfigurat

#### Adopt_Annual validation

In [70]:
Adopt_Annual_count = pydb.read_sql_query("select count(*) as count from fcsq.Adopt_Annual")
Adopt_Annual_count



Unnamed: 0,count
0,60


In [71]:
df = pydb.read_sql_query("SELECT * FROM fcsq.Adoption")
df.to_csv(path_or_buf = 's3://alpha-family-data/CSVs/Adoption_legrep.csv',index=False)
#df.to_excel('adoption.xlsx')


In [72]:
df = pydb.read_sql_query("SELECT * FROM fcsq.Adopt_Annual UNION ALL SELECT * FROM fcsq.Adopt_Quarterly ORDER BY type,year,quarter,rep_cat")
df.to_csv(path_or_buf = '~/FCSQ_data/timeliness.csv',index=False)

## adopt_timeliness_combined table

### Drop the adopt_timeliness_combined table if it already exists and remove its data from the S3 bucket

In [73]:
drop_adopt_timeliness_combined = f"""
DROP TABLE IF EXISTS fcsq.adopt_timeliness_combined;
"""
pydb.start_query_execution_and_wait(drop_adopt_timeliness_combined)

# clean up previous adopt_timeliness_combined files
bucket.objects.filter(Prefix="fcsq_processing/Adoption/adopt_timeliness_combined/").delete()

[{'ResponseMetadata': {'RequestId': 'V5YAWYFM18HH3215',
   'HostId': 'dVwx40Rzfv1eMZikeK/snof/lBX9mKWuP2F9XVDppRCb47s29ZmiNqkrguGdpHwoQeWgdLQLwAY=',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'x-amz-id-2': 'dVwx40Rzfv1eMZikeK/snof/lBX9mKWuP2F9XVDppRCb47s29ZmiNqkrguGdpHwoQeWgdLQLwAY=',
    'x-amz-request-id': 'V5YAWYFM18HH3215',
    'date': 'Wed, 06 Apr 2022 16:33:44 GMT',
    'content-type': 'application/xml',
    'transfer-encoding': 'chunked',
    'server': 'AmazonS3',
    'connection': 'close'},
   'RetryAttempts': 0},
  'Deleted': [{'Key': 'fcsq_processing/Adoption/adopt_timeliness_combined/20220406_161143_00022_x9h32_23771c0b-250c-486a-8cc9-10315062ea4a',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': '9EWwV30uF_qEfD4UmXTvdyLdWXNZbZAk'}]}]

### Create the adopt_timeliness_combined table in Athena

In [74]:
create_adopt_timeliness_combined = f"""
CREATE TABLE IF NOT EXISTS fcsq.adopt_timeliness_combined
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/adopt_timeliness_combined') AS
SELECT * FROM fcsq.Adopt_Annual 
UNION ALL 
SELECT * FROM fcsq.Adopt_Quarterly ORDER BY type,year,quarter,rep_cat
"""

pydb.start_query_execution_and_wait(create_adopt_timeliness_combined)



{'QueryExecutionId': 'd03a2f93-4124-4b14-bb91-539d06a04977',
 'Query': "CREATE TABLE IF NOT EXISTS fcsq.adopt_timeliness_combined\nWITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/adopt_timeliness_combined') AS\nSELECT * FROM fcsq.Adopt_Annual \nUNION ALL \nSELECT * FROM fcsq.Adopt_Quarterly ORDER BY type,year,quarter,rep_cat",
 'StatementType': 'DDL',
 'ResultConfiguration': {'OutputLocation': 's3://aws-athena-query-results-593291632749-eu-west-1/tables/d03a2f93-4124-4b14-bb91-539d06a04977'},
 'QueryExecutionContext': {},
 'Status': {'State': 'SUCCEEDED',
  'SubmissionDateTime': datetime.datetime(2022, 4, 6, 16, 33, 45, 448000, tzinfo=tzlocal()),
  'CompletionDateTime': datetime.datetime(2022, 4, 6, 16, 33, 47, 36000, tzinfo=tzlocal())},
 'Statistics': {'EngineExecutionTimeInMillis': 1453,
  'DataScannedInBytes': 9114,
  'DataManifestLocation': 's3://aws-athena-query-results-593291632749-eu-west-1/tables/d03a2f93-4124-4b14-bb91-539d06a0497

#### adopt_timeliness_combined validation

In [75]:
adopt_timeliness_combined_count = pydb.read_sql_query("select count(*) as count from fcsq.adopt_timeliness_combined")
adopt_timeliness_combined_count

Unnamed: 0,count
0,318


In [76]:
df = pydb.read_sql_query("SELECT * FROM fcsq.adopt_timeliness_combined WHERE (Year<2021 OR Quarter!='Q3') AND (Year<2021 OR Quarter!='Q2')")
df = df.pivot_table(index=['type','year','quarter'],columns=['rep_cat'],values = ['n','mean'],aggfunc=sum, fill_value=0).swaplevel(axis=1).sort_index(axis=1)
df

Unnamed: 0_level_0,Unnamed: 1_level_0,rep_cat,1 Both,1 Both,2 Applicant Only,2 Applicant Only,3 Respondent Only,3 Respondent Only,4 Neither,4 Neither,5 Unknown,5 Unknown,All,All
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,n,mean,n,mean,n,mean,n,mean,n,mean,n
type,year,quarter,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2
Adoption,2011,,28.56,100,18.811024,254,18.597765,179,15.139358,4083,14.797688,173,15.731259,4789
Adoption,2011,Q1,31.761905,21,21.619048,63,20.225,40,15.391906,939,14.319149,47,16.183784,1110
Adoption,2011,Q2,35.125,16,18.923077,52,16.181818,44,14.228782,1084,14.947368,38,14.789303,1234
Adoption,2011,Q3,25.392857,28,16.2,65,19.674419,43,15.819556,992,13.931034,58,16.113828,1186
Adoption,2011,Q4,26.171429,35,18.635135,74,18.5,52,15.209738,1068,17.033333,30,15.895155,1259
Adoption,2012,,29.315789,76,22.701299,231,19.245399,163,13.914185,4801,18.504762,105,14.760789,5376
Adoption,2012,Q1,26.071429,14,21.688525,61,19.692308,39,14.21978,1092,17.08,25,14.956133,1231
Adoption,2012,Q2,29.733333,15,25.888889,63,18.7,40,13.996473,1134,19.914286,35,15.069153,1287
Adoption,2012,Q3,33.0,23,20.846154,52,21.459459,37,14.154589,1242,19.125,24,15.004354,1378
Adoption,2012,Q4,27.416667,24,21.927273,55,17.595745,47,13.369842,1333,17.142857,21,14.103378,1480


In [77]:
df.to_csv(path_or_buf = '~/FCSQ_data/timeliness.csv',index=False)