In [2]:
import pandas as pd  # for the data structures to store and manipulate tables
import pydbtools as pydb  # see https://github.com/moj-analytical-services/pydbtools
import boto3  # for working with AWS

# few things for viewing dataframes better
pd.set_option("display.max_columns", 100)
pd.set_option("display.width", 900)
pd.set_option("display.max_colwidth", 200)

database = "familyman_dev_v2"
snapshot_date = "2021-08-19"
database_derived = "familyman_derived_dev_v1"

fcsq_database = "fcsq"

s3 = boto3.resource("s3")
bucket = s3.Bucket("alpha-family-data")

In [3]:
create_adopt_case_data_v1 = f"""
SELECT T1.YEAR,
            T1.QUARTER,
            T1.COURT,
            T1.CASE_NUMBER,
            T1.APP_TYPE,
            T1.CASE_APP_TYPE,
            T1.ADOPTION,
            T1.HIGH_COURT,
            T1.CONTESTED,
            T1.NUMBER_APPLICANTS,
            T1.ADOPTER_TYPE,
            date_format(T1.APP_DATE,'%d-%m-%Y') AS APP_DATE2
    FROM fcsq.adopt_apps_6_adoptions_only AS t1
    ORDER BY case_number, app_date2, court;
    
"""

create_adopt_case_data_v2 = f"""
SELECT *,(case when row_number() over (partition by case_number order by 
        APP_DATE2) = 1 then 1 else 0 end) as case_number_id
FROM __temp__.adopt_case_data_v1
"""

create_adopt_case_data_v3 = f"""
SELECT *
FROM __temp__.adopt_case_data_v2
where case_number_id = 1 and year > 2010;
"""
pydb.create_temp_table(create_adopt_case_data_v1,'adopt_case_data_v1')

pydb.create_temp_table(create_adopt_case_data_v2,'adopt_case_data_v2')
pydb.create_temp_table(create_adopt_case_data_v3,'adopt_case_data_v3')





## Applicant_Info table

### Drop the Applicant_Info table if it already exists and remove its data from the S3 bucket

In [4]:
drop_Adopt_Applicant_Info = f"""
DROP TABLE IF EXISTS fcsq.Adopt_Applicant_Info;
"""
pydb.start_query_execution_and_wait(drop_Adopt_Applicant_Info)

# clean up previous Adopt_Applicant_Info files
bucket.objects.filter(Prefix="fcsq_processing/Adoption/Adopt_Applicant_Info/").delete()

[{'ResponseMetadata': {'RequestId': 'AJXMGMDPGVEFK0Y4',
   'HostId': 'zJuXXDNT0QrJle1cddYdJxV5WoscwZogZm1igwvMVpfcf3Oc6aBwwutALTI3RXfgx3CDuifsHAo=',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'x-amz-id-2': 'zJuXXDNT0QrJle1cddYdJxV5WoscwZogZm1igwvMVpfcf3Oc6aBwwutALTI3RXfgx3CDuifsHAo=',
    'x-amz-request-id': 'AJXMGMDPGVEFK0Y4',
    'date': 'Thu, 07 Apr 2022 10:58:39 GMT',
    'content-type': 'application/xml',
    'transfer-encoding': 'chunked',
    'server': 'AmazonS3',
    'connection': 'close'},
   'RetryAttempts': 0},
  'Deleted': [{'Key': 'fcsq_processing/Adoption/Adopt_Applicant_Info/20220406_162555_00068_hjees_ef5cfe5e-a778-4cec-9ff6-6e4d458d712a',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'TdJ4EnoEIWWxnexN3StHX7KXc3WVWV91'},
   {'Key': 'fcsq_processing/Adoption/Adopt_Applicant_Info/20220406_162555_00068_hjees_8e4406c1-938c-4b34-9c84-591b9290fe26',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'wT19q6kIo6lKe5pj0ZW1k6LZqiZI66wg'},
   {'Key': 'fcsq_processi

### Create the Applicant_Info table in Athena

In [5]:
create_Adopt_Applicant_Info = f"""
CREATE TABLE IF NOT EXISTS fcsq.Adopt_Applicant_Info
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/Adopt_Applicant_Info') AS
 SELECT DISTINCT
   {database}.roles.ROLE, 
   {database}.roles.REPRESENTATIVE_ROLE, 
   {database}.roles.ROLE_MODEL, 
   {database}.roles.PARTY, 
   {database}.roles.CASE_NUMBER, 
   {database}.parties.PERSON_GIVEN_FIRST_NAME, 
   {database}.parties.PERSON_FAMILY_NAME, 
   {database}.parties.COMPANY, 
   {database}.addresses.POSTCODE, 
   {database}.parties.GENDER, 
   {database}.roles.DELETE_FLAG
FROM 
  ({database}.roles INNER JOIN {database}.parties ON {database}.roles.PARTY = {database}.parties.PARTY) 
  LEFT JOIN {database}.addresses ON {database}.roles.ADDRESS = {database}.addresses.ADDRESS
WHERE 
    (((({database}.roles.ROLE_MODEL)= 'APLZ') AND (({database}.roles.DELETE_FLAG)= 'N')) 
    OR ((({database}.roles.ROLE_MODEL)= 'APLA') AND (({database}.roles.DELETE_FLAG)= 'N'))
    OR ((({database}.roles.ROLE_MODEL)= 'APLC') AND (({database}.roles.DELETE_FLAG)= 'N')))
    AND {database}.roles.mojap_snapshot_date = date '{snapshot_date}'
    AND {database}.parties.mojap_snapshot_date = date '{snapshot_date}'
    AND {database}.addresses.mojap_snapshot_date = date '{snapshot_date}';
"""

pydb.start_query_execution_and_wait(create_Adopt_Applicant_Info)



{'QueryExecutionId': '420d9380-8801-4fe2-85fc-66d1137c1ab8',
 'Query': "CREATE TABLE IF NOT EXISTS fcsq.Adopt_Applicant_Info\nWITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/Adopt_Applicant_Info') AS\n SELECT DISTINCT\n   familyman_dev_v2.roles.ROLE, \n   familyman_dev_v2.roles.REPRESENTATIVE_ROLE, \n   familyman_dev_v2.roles.ROLE_MODEL, \n   familyman_dev_v2.roles.PARTY, \n   familyman_dev_v2.roles.CASE_NUMBER, \n   familyman_dev_v2.parties.PERSON_GIVEN_FIRST_NAME, \n   familyman_dev_v2.parties.PERSON_FAMILY_NAME, \n   familyman_dev_v2.parties.COMPANY, \n   familyman_dev_v2.addresses.POSTCODE, \n   familyman_dev_v2.parties.GENDER, \n   familyman_dev_v2.roles.DELETE_FLAG\nFROM \n  (familyman_dev_v2.roles INNER JOIN familyman_dev_v2.parties ON familyman_dev_v2.roles.PARTY = familyman_dev_v2.parties.PARTY) \n  LEFT JOIN familyman_dev_v2.addresses ON familyman_dev_v2.roles.ADDRESS = familyman_dev_v2.addresses.ADDRESS\nWHERE \n    ((((familyma

#### Applicant_Info validation

In [6]:
Adopt_Applicant_Info_count = pydb.read_sql_query("select count(*) as count from fcsq.Adopt_Applicant_Info")
Adopt_Applicant_Info_count

Unnamed: 0,count
0,1986081


## adopt_respondent_info table

### Drop the adopt_respondent_info table if it already exists and remove its data from the S3 bucket

In [7]:
drop_adopt_respondent_info = f"""
DROP TABLE IF EXISTS fcsq.adopt_respondent_info;
"""
pydb.start_query_execution_and_wait(drop_adopt_respondent_info)

# clean up previous adopt_respondent_info files
bucket.objects.filter(Prefix="fcsq_processing/Adoption/adopt_respondent_info/").delete()

[{'ResponseMetadata': {'RequestId': 'Z5P6KNTDNFPSE1TV',
   'HostId': 'ZA26eCDpG/vIyxJl0CKizN/7qQ0uEoO7+GJZSSXAktQw5dizuicjVUJwj+DrHD0/XS2dKzxNGAs=',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'x-amz-id-2': 'ZA26eCDpG/vIyxJl0CKizN/7qQ0uEoO7+GJZSSXAktQw5dizuicjVUJwj+DrHD0/XS2dKzxNGAs=',
    'x-amz-request-id': 'Z5P6KNTDNFPSE1TV',
    'date': 'Thu, 07 Apr 2022 10:59:27 GMT',
    'content-type': 'application/xml',
    'transfer-encoding': 'chunked',
    'server': 'AmazonS3',
    'connection': 'close'},
   'RetryAttempts': 0},
  'Deleted': [{'Key': 'fcsq_processing/Adoption/adopt_respondent_info/20220406_162641_00052_23yqr_d5c29a60-99ec-4452-b781-1e2543d71495',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'j.TsImn5i8KmbwL4Y4W7SzxnFXxtMeNS'},
   {'Key': 'fcsq_processing/Adoption/adopt_respondent_info/20220406_162641_00052_23yqr_af05aa73-cbeb-46cc-bd78-9f4d11cb9f39',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'Z0WkmUrvG0FX5sQ1ADVeKRCT8rNqV8zD'},
   {'Key': 'fcsq_proces

### Create the adopt_respondent_info table in Athena

In [8]:
create_adopt_respondent_info = f"""
CREATE TABLE IF NOT EXISTS fcsq.adopt_respondent_info
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/adopt_respondent_info') AS
SELECT DISTINCT
  {database}.roles.ROLE, 
  {database}.roles.REPRESENTATIVE_ROLE, 
  {database}.roles.ROLE_MODEL, 
  {database}.roles.PARTY, 
  {database}.roles.CASE_NUMBER, 
  {database}.parties.GENDER, 
  {database}.addresses.POSTCODE, 
  {database}.roles.DELETE_FLAG
FROM 
  ({database}.roles INNER JOIN {database}.parties ON {database}.roles.PARTY = {database}.parties.PARTY) 
    LEFT JOIN {database}.addresses ON {database}.roles.ADDRESS = {database}.addresses.ADDRESS
WHERE 
    ((({database}.roles.ROLE_MODEL)='RSPA') AND (({database}.roles.DELETE_FLAG)='N')) 
    OR ((({database}.roles.ROLE_MODEL)='RSPZ') AND (({database}.roles.DELETE_FLAG)='N'))
    OR ((({database}.roles.ROLE_MODEL)='RSPC') AND (({database}.roles.DELETE_FLAG)='N'))
    AND {database}.roles.mojap_snapshot_date = date '{snapshot_date}'
    AND {database}.parties.mojap_snapshot_date = date '{snapshot_date}'
    AND {database}.addresses.mojap_snapshot_date = date '{snapshot_date}';
"""

pydb.start_query_execution_and_wait(create_adopt_respondent_info)



{'QueryExecutionId': 'a33e4da3-df7a-4445-8371-c97b5590f024',
 'Query': "CREATE TABLE IF NOT EXISTS fcsq.adopt_respondent_info\nWITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/adopt_respondent_info') AS\nSELECT DISTINCT\n  familyman_dev_v2.roles.ROLE, \n  familyman_dev_v2.roles.REPRESENTATIVE_ROLE, \n  familyman_dev_v2.roles.ROLE_MODEL, \n  familyman_dev_v2.roles.PARTY, \n  familyman_dev_v2.roles.CASE_NUMBER, \n  familyman_dev_v2.parties.GENDER, \n  familyman_dev_v2.addresses.POSTCODE, \n  familyman_dev_v2.roles.DELETE_FLAG\nFROM \n  (familyman_dev_v2.roles INNER JOIN familyman_dev_v2.parties ON familyman_dev_v2.roles.PARTY = familyman_dev_v2.parties.PARTY) \n    LEFT JOIN familyman_dev_v2.addresses ON familyman_dev_v2.roles.ADDRESS = familyman_dev_v2.addresses.ADDRESS\nWHERE \n    (((familyman_dev_v2.roles.ROLE_MODEL)='RSPA') AND ((familyman_dev_v2.roles.DELETE_FLAG)='N')) \n    OR (((familyman_dev_v2.roles.ROLE_MODEL)='RSPZ') AND ((family

#### adopt_respondent_info validation

In [9]:
adopt_respondent_info_count = pydb.read_sql_query("select count(*) as count from fcsq.adopt_respondent_info")
adopt_respondent_info_count

Unnamed: 0,count
0,2385362


## applicants 3 table

In [10]:
create_adopt_applicants_1 = f"""
SELECT 	T1.role,
    T1.representative_role,
    T1.role_model,
    T1.party,
    T1.case_number,
    T1.gender,
    case when cast(gender as varchar(1)) = '1' then 'Male'
    when cast(gender as varchar(1)) = '2' then 'Female'
    else 'Unknown' end as Gender_Decode

    FROM fcsq.adopt_applicant_info AS t1
    ORDER BY t1.Case_Number;
"""
#pydb.start_query_execution_and_wait(create_adopt_applicants_1)
pydb.create_temp_table(create_adopt_applicants_1,'adopt_applicants_1')



create_adopt_applicants_2 = f"""
SELECT DISTINCT 
    T1.case_number,
    T1.party,
    max(T1.representative_role) as Rep_Role,
    max(T1.gender_decode) as Gender_Max
    from __temp__.adopt_applicants_1 AS t1
    group by Case_number, party;
"""

pydb.create_temp_table(create_adopt_applicants_2,'adopt_applicants_2')
#pydb.start_query_execution_and_wait(create_adopt_applicants_2)


create_adopt_applicants_3= f"""
SELECT t1.case_number,
    t1.party as App_Party_ID,
    t1.Rep_Role,
    t1.Gender_Max,
    case when t1.Rep_Role IS NULL then 'N'
    when t1.Rep_Role IS NOT NULL then 'Y'
    End as REPRESENTATION,
    case when Rep_Role IS NULL AND Gender_Max = 'Female' then 'Unrep_Female'
    when Rep_Role IS NULL AND Gender_Max = 'Male' then 'Unrep_Male'
    when Rep_Role IS NULL AND Gender_Max = 'Unknown' then 'Unrep_Unknown'
    when Rep_Role IS NOT NULL AND Gender_Max = 'Female' then 'Rep_Female'
    when Rep_Role IS NOT NULL AND Gender_Max = 'Male' then 'Rep_Male'
    when Rep_Role IS NOT NULL AND Gender_Max = 'Unknown' then 'Rep_Unknown'
    else '' end as App_Rep_Cat
    
    from __temp__.adopt_applicants_2 AS t1;


"""
#pydb.start_query_execution_and_wait(create_adopt_applicants_3)
pydb.create_temp_table(create_adopt_applicants_3,'adopt_applicants_3')


In [11]:
pydb.read_sql_query("SELECT count(*) as count from __temp__.adopt_applicants_3")

Unnamed: 0,count
0,1986062


In [12]:
create_adopt_respondents_1 = f"""
SELECT T1.role,
    T1.representative_role,
    T1.role_model,
    T1.party,
    T1.case_number,
    T1.gender,
    case when cast(gender as varchar(1)) = '1' then 'Male'
    when cast(gender as varchar(1)) = '2' then 'Female'
    else 'Unknown' end as Gender_Decode

    FROM fcsq.adopt_respondent_info AS t1
    ORDER BY t1.Case_Number;
"""
#pydb.start_query_execution_and_wait(create_adopt_respondents_1)
pydb.create_temp_table(create_adopt_respondents_1,'adopt_respondents_1')



create_adopt_respondents_2 = f"""
    SELECT DISTINCT T1.case_number,
        T1.party,
        max(T1.representative_role) as Rep_Role,
        max(T1.gender_decode) as Gender_Max
    from __temp__.adopt_respondents_1 AS t1
    group by Case_number, party;
"""

pydb.create_temp_table(create_adopt_respondents_2,'adopt_respondents_2')
#pydb.start_query_execution_and_wait(create_adopt_respondents_2)


create_adopt_respondents_3= f"""
SELECT t1.case_number,
    t1.party as Resp_Party_ID,
    t1.Rep_Role,
    t1.Gender_Max,
    case when t1.Rep_Role IS NULL then 'N'
    when t1.Rep_Role IS NOT NULL then 'Y'
    End as REPRESENTATION,
    case when Rep_Role IS NULL AND Gender_Max = 'Female' then 'Unrep_Female'
    when Rep_Role IS NULL AND Gender_Max = 'Male' then 'Unrep_Male'
    when Rep_Role IS NULL AND Gender_Max = 'Unknown' then 'Unrep_Unknown'
    when Rep_Role IS NOT NULL AND Gender_Max = 'Female' then 'Rep_Female'
    when Rep_Role IS NOT NULL AND Gender_Max = 'Male' then 'Rep_Male'
    when Rep_Role IS NOT NULL AND Gender_Max = 'Unknown' then 'Rep_Unknown'
    else '' end as Resp_Rep_Cat
    
    from __temp__.adopt_respondents_2 AS t1;


"""
#pydb.start_query_execution_and_wait(create_adopt_respondents_3)
pydb.create_temp_table(create_adopt_respondents_3,'adopt_respondents_3')

In [13]:
adopt_app_rep_final = f"""
SELECT t1.YEAR, 
    t1.QUARTER,
    t1.CASE_NUMBER, 
    t1.Court,
    t2.App_Party_ID,
    t2.Representation,
    t2.Gender_Max as App_Gender,
    t2.App_Rep_Cat          
FROM __temp__.ADOPT_CASE_DATA_v3 t1
    LEFT JOIN __temp__.ADOPT_APPLICANTS_3 t2 ON (t1.CASE_NUMBER = t2.CASE_NUMBER);

"""

pydb.create_temp_table(adopt_app_rep_final,'adopt_app_rep_final')


In [14]:
adopt_app_rep_final_check = "SELECT COUNT(*) as Count from __temp__.adopt_app_rep_final"
pydb.start_query_execution_and_wait(adopt_app_rep_final_check)

{'QueryExecutionId': '82949cb7-4911-4de7-a34c-25fdefe30c92',
 'Query': 'SELECT COUNT(*) as Count from mojap_de_temp_alpha_user_thomasauburnmoj.adopt_app_rep_final',
 'StatementType': 'DML',
 'ResultConfiguration': {'OutputLocation': 's3://aws-athena-query-results-593291632749-eu-west-1/82949cb7-4911-4de7-a34c-25fdefe30c92.csv'},
 'QueryExecutionContext': {},
 'Status': {'State': 'SUCCEEDED',
  'SubmissionDateTime': datetime.datetime(2022, 4, 7, 11, 1, 24, 820000, tzinfo=tzlocal()),
  'CompletionDateTime': datetime.datetime(2022, 4, 7, 11, 1, 25, 514000, tzinfo=tzlocal())},
 'Statistics': {'EngineExecutionTimeInMillis': 532,
  'DataScannedInBytes': 0,
  'TotalExecutionTimeInMillis': 694,
  'QueryQueueTimeInMillis': 120,
  'QueryPlanningTimeInMillis': 80,
  'ServiceProcessingTimeInMillis': 42},
 'WorkGroup': 'primary',
 'EngineVersion': {'SelectedEngineVersion': 'Athena engine version 2',
  'EffectiveEngineVersion': 'Athena engine version 2'}}

In [15]:
adopt_resp_rep_final = f"""
   SELECT t1.YEAR, 
        t1.QUARTER,
        t1.CASE_NUMBER, 
        t1.Court,
          t2.Resp_Party_ID,
          t2.Representation,
          t2.Gender_Max as Resp_Gender,
          t2.Resp_Rep_Cat
          
      FROM __temp__.ADOPT_CASE_DATA_v3 t1
           LEFT JOIN __temp__.ADOPT_RESPONDENTS_3 t2 ON (t1.CASE_NUMBER = t2.CASE_NUMBER);
"""

pydb.create_temp_table(adopt_resp_rep_final,'adopt_resp_rep_final')

In [16]:
adopt_resp_rep_final_check = "SELECT COUNT(*) as Count from __temp__.adopt_resp_rep_final"
pydb.start_query_execution_and_wait(adopt_resp_rep_final_check)

{'QueryExecutionId': 'b0e1e03a-ebfc-4452-8847-dfeafefa522f',
 'Query': 'SELECT COUNT(*) as Count from mojap_de_temp_alpha_user_thomasauburnmoj.adopt_resp_rep_final',
 'StatementType': 'DML',
 'ResultConfiguration': {'OutputLocation': 's3://aws-athena-query-results-593291632749-eu-west-1/b0e1e03a-ebfc-4452-8847-dfeafefa522f.csv'},
 'QueryExecutionContext': {},
 'Status': {'State': 'SUCCEEDED',
  'SubmissionDateTime': datetime.datetime(2022, 4, 7, 11, 1, 35, 912000, tzinfo=tzlocal()),
  'CompletionDateTime': datetime.datetime(2022, 4, 7, 11, 1, 36, 832000, tzinfo=tzlocal())},
 'Statistics': {'EngineExecutionTimeInMillis': 761,
  'DataScannedInBytes': 0,
  'TotalExecutionTimeInMillis': 920,
  'QueryQueueTimeInMillis': 116,
  'QueryPlanningTimeInMillis': 108,
  'ServiceProcessingTimeInMillis': 43},
 'WorkGroup': 'primary',
 'EngineVersion': {'SelectedEngineVersion': 'Athena engine version 2',
  'EffectiveEngineVersion': 'Athena engine version 2'}}

## Adopt_Hearing_Events table

### Drop the Adopt_Hearing_Events table if it already exists and remove its data from the S3 bucket

In [17]:
drop_Adopt_Hearing_Events = f"""
DROP TABLE IF EXISTS fcsq.Adopt_Hearing_Events;
"""
pydb.start_query_execution_and_wait(drop_Adopt_Hearing_Events)

# clean up previous Adopt_Hearing_Events files
bucket.objects.filter(Prefix="fcsq_processing/Adoption/Adopt_Hearing_Events/").delete()

[{'ResponseMetadata': {'RequestId': '8CZY7DT4W7P7DW9W',
   'HostId': '669ysJiTSh/r4Biy8HNB/cTDXicfqj4pRY39XuiH2F7nSZBnyTL/dZ104dnqCZq1kzBzRQIog7s=',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'x-amz-id-2': '669ysJiTSh/r4Biy8HNB/cTDXicfqj4pRY39XuiH2F7nSZBnyTL/dZ104dnqCZq1kzBzRQIog7s=',
    'x-amz-request-id': '8CZY7DT4W7P7DW9W',
    'date': 'Thu, 07 Apr 2022 11:01:41 GMT',
    'content-type': 'application/xml',
    'transfer-encoding': 'chunked',
    'server': 'AmazonS3',
    'connection': 'close'},
   'RetryAttempts': 0},
  'Deleted': [{'Key': 'fcsq_processing/Adoption/Adopt_Hearing_Events/20220406_162839_00087_wzk86_7c1e75ff-a25c-458d-9256-f388b92b65c1',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'TuTHHFqcE8ruT3VDL6P6vsZ6ehvBKgdX'},
   {'Key': 'fcsq_processing/Adoption/Adopt_Hearing_Events/20220406_162839_00087_wzk86_875636f6-3721-49ab-b1f7-33775eb563a0',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'E38y2K7AO3DIFyA_NveXJRs_FbO1m9AM'},
   {'Key': 'fcsq_processi

### Create the Adopt_Hearing_Events table in Athena

In [18]:
create_Adopt_Hearing_Events = f"""
CREATE TABLE IF NOT EXISTS fcsq.Adopt_Hearing_Events
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/Adopt_Hearing_Events') AS
SELECT {database}.hearings.EVENT,
  {database}.hearings.VACATED_FLAG,
  {database}.hearings.HEARING_TYPE,
  {database}.hearings.HEARING_DATE,
  {database}.events.RECEIPT_DATE,
  {database}.events.ERROR,
  {database}.events.CASE_NUMBER,
  {database}.events.EVENT_MODEL
FROM {database}.hearings
INNER JOIN {database}.events
ON {database}.hearings.EVENT            = {database}.events.EVENT
WHERE {database}.hearings.VACATED_FLAG IS NULL
AND {database}.events.ERROR             = 'N'
AND HEARING_DATE > date_parse('31-12-2009 00:00:00', '%d-%m-%Y %H:%i:%s')
AND (substring(case_number,5,1)='A' OR substring(case_number,5,1)='Z' OR substring(case_number,5,1)='C')
AND {database}.hearings.mojap_snapshot_date = date '{snapshot_date}' and {database}.events.mojap_snapshot_date = date '{snapshot_date}';
"""

pydb.start_query_execution_and_wait(create_Adopt_Hearing_Events)



{'QueryExecutionId': 'f9a7ff81-03f1-4466-bea0-c321ad09673e',
 'Query': "CREATE TABLE IF NOT EXISTS fcsq.Adopt_Hearing_Events\nWITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/Adopt_Hearing_Events') AS\nSELECT familyman_dev_v2.hearings.EVENT,\n  familyman_dev_v2.hearings.VACATED_FLAG,\n  familyman_dev_v2.hearings.HEARING_TYPE,\n  familyman_dev_v2.hearings.HEARING_DATE,\n  familyman_dev_v2.events.RECEIPT_DATE,\n  familyman_dev_v2.events.ERROR,\n  familyman_dev_v2.events.CASE_NUMBER,\n  familyman_dev_v2.events.EVENT_MODEL\nFROM familyman_dev_v2.hearings\nINNER JOIN familyman_dev_v2.events\nON familyman_dev_v2.hearings.EVENT            = familyman_dev_v2.events.EVENT\nWHERE familyman_dev_v2.hearings.VACATED_FLAG IS NULL\nAND familyman_dev_v2.events.ERROR             = 'N'\nAND HEARING_DATE > date_parse('31-12-2009 00:00:00', '%d-%m-%Y %H:%i:%s')\nAND (substring(case_number,5,1)='A' OR substring(case_number,5,1)='Z' OR substring(case_number,5,1)

#### Adopt_Hearing_Events validation

In [19]:
Adopt_Hearing_Events_count = pydb.read_sql_query("select count(*) as count from fcsq.Adopt_Hearing_Events")
Adopt_Hearing_Events_count

Unnamed: 0,count
0,1974912


## Adopt_Hearings_Cases table

### Drop the Adopt_Hearings_Cases table if it already exists and remove its data from the S3 bucket

In [20]:
drop_Adopt_Hearings_Cases = f"""
DROP TABLE IF EXISTS fcsq.Adopt_Hearings_Cases;
"""
pydb.start_query_execution_and_wait(drop_Adopt_Hearings_Cases)

# clean up previous Adopt_Hearings_Cases files
bucket.objects.filter(Prefix="fcsq_processing/Adoption/Adopt_Hearings_Cases/").delete()

[{'ResponseMetadata': {'RequestId': 'VKPA6J146C33XFP7',
   'HostId': '6f8f7QAfk07Rtdwx2jtySHVoQmpcgb3RiXXO+GSISdQ8DiI+phtZ5eIu676kXvjURweCe/Tso4U=',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'x-amz-id-2': '6f8f7QAfk07Rtdwx2jtySHVoQmpcgb3RiXXO+GSISdQ8DiI+phtZ5eIu676kXvjURweCe/Tso4U=',
    'x-amz-request-id': 'VKPA6J146C33XFP7',
    'date': 'Thu, 07 Apr 2022 11:02:03 GMT',
    'content-type': 'application/xml',
    'transfer-encoding': 'chunked',
    'server': 'AmazonS3',
    'connection': 'close'},
   'RetryAttempts': 0},
  'Deleted': [{'Key': 'fcsq_processing/Adoption/Adopt_Hearings_Cases/20220406_162900_00074_8wft4_f64cab54-a0c5-450e-9a75-be7836f690ec',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'oXhjMlYxqvsg6E774yY89dqMhdORD0jP'},
   {'Key': 'fcsq_processing/Adoption/Adopt_Hearings_Cases/20220406_162900_00074_8wft4_0534ebc0-ef79-4027-bc45-4d646ce726cd',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'IIKbC00woTimIKtoxId5hy87TUWiHp4.'},
   {'Key': 'fcsq_processi

### Create the Adopt_Hearings_Cases table in Athena

In [21]:
"""
Equivalent to Hearings_Adopt_V3
"""

create_Adopt_Hearings_Cases = f"""
CREATE TABLE IF NOT EXISTS fcsq.Adopt_Hearings_Cases
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/Adopt_Hearings_Cases') AS
select t1.case_number,
    t1.error,
    t1.event,
    t1.event_model,
    t1.hearing_date,
    t1.hearing_type,
    t1.receipt_date,
    t1.vacated_flag,
    substring(Case_Number,5,1) AS Case_Type
    from fcsq.Adopt_Hearing_Events AS t1
    where t1.event_model in ('A8', 'A90', 'A91', 'G60')
    order by t1.case_number, t1.receipt_date;
"""

pydb.start_query_execution_and_wait(create_Adopt_Hearings_Cases)

create_Adopt_Hearings_Cases_v2 = f"""
SELECT *,
(case when row_number() over (partition by Case_Number order by receipt_date) = 1 then 1 else 0 end) as Case_Number_ID
FROM fcsq.Adopt_Hearings_Cases
"""

pydb.create_temp_table(create_Adopt_Hearings_Cases_v2,'adopt_hearings_cases_v2')
pydb.start_query_execution_and_wait(create_Adopt_Hearings_Cases_v2)


{'QueryExecutionId': '0578f395-b1d9-4d7e-a9ea-966d76ec448c',
 'Query': 'SELECT *,\n(case when row_number() over (partition by Case_Number order by receipt_date) = 1 then 1 else 0 end) as Case_Number_ID\nFROM fcsq.Adopt_Hearings_Cases',
 'StatementType': 'DML',
 'ResultConfiguration': {'OutputLocation': 's3://aws-athena-query-results-593291632749-eu-west-1/0578f395-b1d9-4d7e-a9ea-966d76ec448c.csv'},
 'QueryExecutionContext': {},
 'Status': {'State': 'SUCCEEDED',
  'SubmissionDateTime': datetime.datetime(2022, 4, 7, 11, 2, 19, 987000, tzinfo=tzlocal()),
  'CompletionDateTime': datetime.datetime(2022, 4, 7, 11, 2, 23, 783000, tzinfo=tzlocal())},
 'Statistics': {'EngineExecutionTimeInMillis': 3618,
  'DataScannedInBytes': 2379198,
  'TotalExecutionTimeInMillis': 3796,
  'QueryQueueTimeInMillis': 123,
  'QueryPlanningTimeInMillis': 107,
  'ServiceProcessingTimeInMillis': 55},
 'WorkGroup': 'primary',
 'EngineVersion': {'SelectedEngineVersion': 'Athena engine version 2',
  'EffectiveEngineVe

#### Adopt_Hearings_Cases validation

In [22]:
Adopt_Hearings_Cases_count = pydb.read_sql_query("select count(*) as count from fcsq.Adopt_Hearings_Cases")
Adopt_Hearings_Cases_count

Unnamed: 0,count
0,323515


## Hearing_Adopt_Applicants table

### Drop the Hearing_Adopt_Applicants table if it already exists and remove its data from the S3 bucket

In [23]:
drop_Hearing_Adopt_Applicants = f"""
DROP TABLE IF EXISTS fcsq.Hearing_Adopt_Applicants;
"""
pydb.start_query_execution_and_wait(drop_Hearing_Adopt_Applicants)

# clean up previous Hearing_Adopt_Applicants files
bucket.objects.filter(Prefix="fcsq_processing/Adoption/Hearing_Adopt_Applicants/").delete()

[{'ResponseMetadata': {'RequestId': 'WFNMNHYZ7QWN10E8',
   'HostId': 'MiBfgF4ti7DpZnSuthX2yjP27SJgea5F5tJOl+EI6JJCRKU+L+XClcr7jZOgLxXWipSpGHf/W84=',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'x-amz-id-2': 'MiBfgF4ti7DpZnSuthX2yjP27SJgea5F5tJOl+EI6JJCRKU+L+XClcr7jZOgLxXWipSpGHf/W84=',
    'x-amz-request-id': 'WFNMNHYZ7QWN10E8',
    'date': 'Thu, 07 Apr 2022 11:02:33 GMT',
    'content-type': 'application/xml',
    'transfer-encoding': 'chunked',
    'server': 'AmazonS3',
    'connection': 'close'},
   'RetryAttempts': 0},
  'Deleted': [{'Key': 'fcsq_processing/Adoption/Hearing_Adopt_Applicants/20220406_162926_00033_rdxi3_84695170-0953-4d56-a458-26f5cad30461',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'GpVAF7h2G5fLswBHbyGMxTquxgheBJFH'},
   {'Key': 'fcsq_processing/Adoption/Hearing_Adopt_Applicants/20220406_162926_00033_rdxi3_9b6c361f-5f2d-4d52-a998-4fcdc94f8605',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'F.rw7Na.0Lml6Wu4rajCY0SBz4Cs9tsx'},
   {'Key': 'fcsq_

### Create the Hearing_Adopt_Applicants table in Athena

In [24]:
create_Hearing_Adopt_Applicants = f"""
CREATE TABLE IF NOT EXISTS fcsq.Hearing_Adopt_Applicants
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/Hearing_Adopt_Applicants') AS
SELECT t1.*,
t2.Case_Number_ID AS Hearing_Count
FROM __temp__.ADOPT_APP_REP_FINAL t1
LEFT JOIN __temp__.Adopt_Hearings_Cases_v2 t2 ON (t1.CASE_NUMBER = t2.CASE_NUMBER)
where t2.Case_Number_ID > 0;
"""

pydb.start_query_execution_and_wait(create_Hearing_Adopt_Applicants)



{'QueryExecutionId': 'a296c13f-3fde-4b5e-838e-c4feea2e9613',
 'Query': "CREATE TABLE IF NOT EXISTS fcsq.Hearing_Adopt_Applicants\nWITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/Hearing_Adopt_Applicants') AS\nSELECT t1.*,\nt2.Case_Number_ID AS Hearing_Count\nFROM mojap_de_temp_alpha_user_thomasauburnmoj.ADOPT_APP_REP_FINAL t1\nLEFT JOIN mojap_de_temp_alpha_user_thomasauburnmoj.Adopt_Hearings_Cases_v2 t2 ON (t1.CASE_NUMBER = t2.CASE_NUMBER)\nwhere t2.Case_Number_ID > 0",
 'StatementType': 'DDL',
 'ResultConfiguration': {'OutputLocation': 's3://aws-athena-query-results-593291632749-eu-west-1/tables/a296c13f-3fde-4b5e-838e-c4feea2e9613'},
 'QueryExecutionContext': {},
 'Status': {'State': 'SUCCEEDED',
  'SubmissionDateTime': datetime.datetime(2022, 4, 7, 11, 2, 34, 973000, tzinfo=tzlocal()),
  'CompletionDateTime': datetime.datetime(2022, 4, 7, 11, 2, 39, 710000, tzinfo=tzlocal())},
 'Statistics': {'EngineExecutionTimeInMillis': 4402,
  'Data

#### Hearing_Adopt_Applicants validation

In [25]:
Hearing_Adopt_Applicants_count = pydb.read_sql_query("select count(*) as count from fcsq.Hearing_Adopt_Applicants")
Hearing_Adopt_Applicants_count

Unnamed: 0,count
0,95784


## Hearing_Adopt_Respondents table

### Drop the Hearing_Adopt_Respondents table if it already exists and remove its data from the S3 bucket

In [26]:
drop_Hearing_Adopt_Respondents = f"""
DROP TABLE IF EXISTS fcsq.Hearing_Adopt_Respondents;
"""
pydb.start_query_execution_and_wait(drop_Hearing_Adopt_Respondents)

# clean up previous Hearing_Adopt_Respondents files
bucket.objects.filter(Prefix="fcsq_processing/Adoption/Hearing_Adopt_Respondents/").delete()

[{'ResponseMetadata': {'RequestId': '3Q5WAKSD00PT8W1W',
   'HostId': '27qdPJp1oM0LRH41J9SFnzOxDwiVQEIOB4diIOyZWuYtoZy/4HN2k4J+2AJ+tWVDkRFzdwJMd6Y=',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'x-amz-id-2': '27qdPJp1oM0LRH41J9SFnzOxDwiVQEIOB4diIOyZWuYtoZy/4HN2k4J+2AJ+tWVDkRFzdwJMd6Y=',
    'x-amz-request-id': '3Q5WAKSD00PT8W1W',
    'date': 'Thu, 07 Apr 2022 11:02:50 GMT',
    'content-type': 'application/xml',
    'transfer-encoding': 'chunked',
    'server': 'AmazonS3',
    'connection': 'close'},
   'RetryAttempts': 0},
  'Deleted': [{'Key': 'fcsq_processing/Adoption/Hearing_Adopt_Respondents/20220406_162939_00061_vwdke_6f7da1be-a4e2-4849-ab44-5923c3e339a3',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'SpyQzWoLb5C96DocXNKLRla8evj3dBmS'},
   {'Key': 'fcsq_processing/Adoption/Hearing_Adopt_Respondents/20220406_162939_00061_vwdke_9e2112aa-c0be-4ea6-89a5-cd83e31d6247',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'sAfIziedYi1jnRuhyulNgTi2tK2Q5U8p'},
   {'Key': 'fcs

### Create the Hearing_Adopt_Respondents table in Athena

In [27]:
create_Hearing_Adopt_Respondents = f"""
CREATE TABLE IF NOT EXISTS fcsq.Hearing_Adopt_Respondents
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/Hearing_Adopt_Respondents') AS
    SELECT t1.*,
    t2.Case_Number_ID AS Hearing_Count
    FROM __temp__.ADOPT_RESP_REP_FINAL t1
    LEFT JOIN __temp__.Adopt_Hearings_Cases_v2 t2 ON (t1.CASE_NUMBER = t2.CASE_NUMBER)
    where t2.Case_Number_ID > 0;
"""

pydb.start_query_execution_and_wait(create_Hearing_Adopt_Respondents)



{'QueryExecutionId': '3cec9124-a2de-4959-8d8d-6e4e5fde4f0d',
 'Query': "CREATE TABLE IF NOT EXISTS fcsq.Hearing_Adopt_Respondents\nWITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/Hearing_Adopt_Respondents') AS\n    SELECT t1.*,\n    t2.Case_Number_ID AS Hearing_Count\n    FROM mojap_de_temp_alpha_user_thomasauburnmoj.ADOPT_RESP_REP_FINAL t1\n    LEFT JOIN mojap_de_temp_alpha_user_thomasauburnmoj.Adopt_Hearings_Cases_v2 t2 ON (t1.CASE_NUMBER = t2.CASE_NUMBER)\n    where t2.Case_Number_ID > 0",
 'StatementType': 'DDL',
 'ResultConfiguration': {'OutputLocation': 's3://aws-athena-query-results-593291632749-eu-west-1/tables/3cec9124-a2de-4959-8d8d-6e4e5fde4f0d'},
 'QueryExecutionContext': {},
 'Status': {'State': 'SUCCEEDED',
  'SubmissionDateTime': datetime.datetime(2022, 4, 7, 11, 2, 51, 910000, tzinfo=tzlocal()),
  'CompletionDateTime': datetime.datetime(2022, 4, 7, 11, 2, 55, 2000, tzinfo=tzlocal())},
 'Statistics': {'EngineExecutionTimeInM

#### Hearing_Adopt_Respondents validation

In [28]:
Hearing_Adopt_Respondents_count = pydb.read_sql_query("select count(*) as count from fcsq.Hearing_Adopt_Respondents")
Hearing_Adopt_Respondents_count

Unnamed: 0,count
0,94991


## Adopt_App table

### Drop the Adopt_App table if it already exists and remove its data from the S3 bucket

In [29]:
drop_Adopt_App = f"""
DROP TABLE IF EXISTS fcsq.Adopt_App;
"""
pydb.start_query_execution_and_wait(drop_Adopt_App)

# clean up previous Adopt_App files
bucket.objects.filter(Prefix="fcsq_processing/Adoption/Adopt_App/").delete()

[{'ResponseMetadata': {'RequestId': 'R7J35SA697J7N2B1',
   'HostId': '6xVb4DRSZAFUHTLT6TLaSMBUWf1NrdR4R6FbZH4HUbb4YWJ9sHPdL9YsqrQhWK6f0hF98xMr8iE=',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'x-amz-id-2': '6xVb4DRSZAFUHTLT6TLaSMBUWf1NrdR4R6FbZH4HUbb4YWJ9sHPdL9YsqrQhWK6f0hF98xMr8iE=',
    'x-amz-request-id': 'R7J35SA697J7N2B1',
    'date': 'Thu, 07 Apr 2022 11:03:04 GMT',
    'content-type': 'application/xml',
    'transfer-encoding': 'chunked',
    'server': 'AmazonS3',
    'connection': 'close'},
   'RetryAttempts': 0},
  'Deleted': [{'Key': 'fcsq_processing/Adoption/Adopt_App/20220406_162952_00026_69yp4_7abd92d3-ac6a-4f28-93e4-101428912974',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'b_gLDDuLOmmx8LYR5S06TulirJ2SJ.qb'},
   {'Key': 'fcsq_processing/Adoption/Adopt_App/20220406_162952_00026_69yp4_a912316b-ae84-4ed4-b405-6763862c38cd',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'bFpNDgJprQP.OgAQCTFtWIDfqpFmRmEg'},
   {'Key': 'fcsq_processing/Adoption/Adopt_App/

### Create the Adopt_App table in Athena

In [30]:
create_Adopt_App = f"""
CREATE TABLE IF NOT EXISTS fcsq.Adopt_App
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/Adopt_App') AS
SELECT
  'Adoption' AS CASE_TYPE,
  Year,
  Quarter,
  'Party' AS Category,
  'Applicant' AS PARTY,
   App_Gender AS Gender,
  Representation,
  Count (*) AS Count
FROM
  fcsq.HEARING_ADOPT_APPLICANTS
WHERE 
  Representation <> '' /*A very small number of cases from 2011/12 look into whether these should be recoded as N (gender is also blank)*/
GROUP BY
  'Adoption',
  Year,
  Quarter,
  'Party',
  'Applicant',
  App_Gender,
  Representation;
"""

pydb.start_query_execution_and_wait(create_Adopt_App)



{'QueryExecutionId': 'ce982698-c419-4f76-bbeb-0fd7dc36eb76',
 'Query': "CREATE TABLE IF NOT EXISTS fcsq.Adopt_App\nWITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/Adopt_App') AS\nSELECT\n  'Adoption' AS CASE_TYPE,\n  Year,\n  Quarter,\n  'Party' AS Category,\n  'Applicant' AS PARTY,\n   App_Gender AS Gender,\n  Representation,\n  Count (*) AS Count\nFROM\n  fcsq.HEARING_ADOPT_APPLICANTS\nWHERE \n  Representation <> '' /*A very small number of cases from 2011/12 look into whether these should be recoded as N (gender is also blank)*/\nGROUP BY\n  'Adoption',\n  Year,\n  Quarter,\n  'Party',\n  'Applicant',\n  App_Gender,\n  Representation",
 'StatementType': 'DDL',
 'ResultConfiguration': {'OutputLocation': 's3://aws-athena-query-results-593291632749-eu-west-1/tables/ce982698-c419-4f76-bbeb-0fd7dc36eb76'},
 'QueryExecutionContext': {},
 'Status': {'State': 'SUCCEEDED',
  'SubmissionDateTime': datetime.datetime(2022, 4, 7, 11, 3, 5, 845000, t

#### Adopt_App validation

In [31]:
Adopt_App_count = pydb.read_sql_query("select count(*) as count from fcsq.Adopt_App")
Adopt_App_count

Unnamed: 0,count
0,221


## Adopt_resp table

### Drop the Adopt_resp table if it already exists and remove its data from the S3 bucket

In [32]:
drop_Adopt_resp = f"""
DROP TABLE IF EXISTS fcsq.Adopt_resp;
"""
pydb.start_query_execution_and_wait(drop_Adopt_resp)

# clean up previous Adopt_resp files
bucket.objects.filter(Prefix="fcsq_processing/Adoption/Adopt_resp/").delete()

[{'ResponseMetadata': {'RequestId': 'XBYYVJPEK8CZ8TWD',
   'HostId': 'mWf1itJkYdOcvndaSr7zsDqZgTNeRXL8epcr7jYy0puQmHzh4IS0TfohlobsRbHKDQzBVpStdRLfzQ21NdBezQ==',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'x-amz-id-2': 'mWf1itJkYdOcvndaSr7zsDqZgTNeRXL8epcr7jYy0puQmHzh4IS0TfohlobsRbHKDQzBVpStdRLfzQ21NdBezQ==',
    'x-amz-request-id': 'XBYYVJPEK8CZ8TWD',
    'date': 'Thu, 07 Apr 2022 11:03:17 GMT',
    'content-type': 'application/xml',
    'transfer-encoding': 'chunked',
    'server': 'AmazonS3',
    'connection': 'close'},
   'RetryAttempts': 0},
  'Deleted': [{'Key': 'fcsq_processing/Adoption/Adopt_resp/20220406_163005_00056_e3ahn_9290478d-b7c8-4825-8bee-5ece6866df99',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'SvGJ6FTJgDIv4_PLMudzS5c6JRZNUwsI'},
   {'Key': 'fcsq_processing/Adoption/Adopt_resp/20220406_163005_00056_e3ahn_eea2230b-bdda-416a-ab33-1a1d98ebc286',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': '0f0YGW0qAGhfzqLZle8xWQuYHgYkZhWq'},
   {'Key': 'fcsq_proc

### Create the Adopt_resp table in Athena

In [33]:
create_Adopt_resp = f"""
CREATE TABLE IF NOT EXISTS fcsq.Adopt_resp
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/Adopt_resp') AS
SELECT
  'Adoption' AS CASE_TYPE,
  Year,
  Quarter,
  'Party' AS Category,
  'Respondent' AS PARTY,
  Resp_Gender AS Gender,
  Representation,
  Count (*) AS Count
FROM
  fcsq.HEARING_ADOPT_RESPONDENTS
WHERE 
  Representation <> '' /*A very small number of cases from 2011/12 look into whether these should be recoded as N (gender is also blank)*/
GROUP BY
  'Adoption',
  Year,
  Quarter,
  'Party',
  'Applicant',
  Resp_Gender,
  Representation;
"""

pydb.start_query_execution_and_wait(create_Adopt_resp)



{'QueryExecutionId': 'a29956fe-fa27-4c3f-927f-e16d45d8fce8',
 'Query': "CREATE TABLE IF NOT EXISTS fcsq.Adopt_resp\nWITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/Adopt_resp') AS\nSELECT\n  'Adoption' AS CASE_TYPE,\n  Year,\n  Quarter,\n  'Party' AS Category,\n  'Respondent' AS PARTY,\n  Resp_Gender AS Gender,\n  Representation,\n  Count (*) AS Count\nFROM\n  fcsq.HEARING_ADOPT_RESPONDENTS\nWHERE \n  Representation <> '' /*A very small number of cases from 2011/12 look into whether these should be recoded as N (gender is also blank)*/\nGROUP BY\n  'Adoption',\n  Year,\n  Quarter,\n  'Party',\n  'Applicant',\n  Resp_Gender,\n  Representation",
 'StatementType': 'DDL',
 'ResultConfiguration': {'OutputLocation': 's3://aws-athena-query-results-593291632749-eu-west-1/tables/a29956fe-fa27-4c3f-927f-e16d45d8fce8'},
 'QueryExecutionContext': {},
 'Status': {'State': 'SUCCEEDED',
  'SubmissionDateTime': datetime.datetime(2022, 4, 7, 11, 3, 18, 609

#### Adopt_resp validation

In [34]:
Adopt_resp_count = pydb.read_sql_query("select count(*) as count from fcsq.Adopt_resp")
Adopt_resp_count

Unnamed: 0,count
0,235


## adopt_case table

### Drop the adopt_case table if it already exists and remove its data from the S3 bucket

In [35]:
drop_adopt_case = f"""
DROP TABLE IF EXISTS fcsq.adopt_case;
"""
pydb.start_query_execution_and_wait(drop_adopt_case)

# clean up previous adopt_case files
bucket.objects.filter(Prefix="fcsq_processing/Adoption/Adopt_case/").delete()

[{'ResponseMetadata': {'RequestId': 'BYH79DNPT7ZBPV7Q',
   'HostId': 'nO35zb9roegTj6UakkyAIdJOFDAtzz1FuCnYFJiGyTM6JeoG7Qm8d8mEEfiY9NOy4WpsbioGjao=',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'x-amz-id-2': 'nO35zb9roegTj6UakkyAIdJOFDAtzz1FuCnYFJiGyTM6JeoG7Qm8d8mEEfiY9NOy4WpsbioGjao=',
    'x-amz-request-id': 'BYH79DNPT7ZBPV7Q',
    'date': 'Thu, 07 Apr 2022 11:03:30 GMT',
    'content-type': 'application/xml',
    'transfer-encoding': 'chunked',
    'server': 'AmazonS3',
    'connection': 'close'},
   'RetryAttempts': 0},
  'Deleted': [{'Key': 'fcsq_processing/Adoption/Adopt_case/20220406_163017_00086_hu6sp_f293ebdd-bd05-4b31-b574-cffb5dc952bf',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'ADeMsAV5uoF6sz0slAKn03ZMtEFufnZX'},
   {'Key': 'fcsq_processing/Adoption/Adopt_case/20220406_163017_00086_hu6sp_6364cdd6-9bb0-4ff7-b49d-21497a0ef45a',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': '5deRKhMaa7nOxRfVR.lGdOM.cZkKPrGu'}]}]

### Create the adopt_case table in Athena

In [36]:
create_adopt_case = f"""
CREATE TABLE IF NOT EXISTS fcsq.Adopt_case
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/Adopt_case') AS
SELECT *,
    Count(*) as Count FROM
    (SELECT
        'Adoption' AS CASE_TYPE,
        Year,
        Quarter,
        'Cases' AS Category,
        'N/A' AS PARTY,
        'N/A' AS Gender,
        'N/A' AS Representation
    FROM
      __temp__.adopt_case_data_v3)
GROUP BY
  CASE_TYPE,
  Year,
  Quarter,
  Category,
  PARTY,
  Gender,
  Representation;
"""

pydb.start_query_execution_and_wait(create_adopt_case)



{'QueryExecutionId': '09aaf7ac-2515-4bbb-9f8a-b45149344096',
 'Query': "CREATE TABLE IF NOT EXISTS fcsq.Adopt_case\nWITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/Adopt_case') AS\nSELECT *,\n    Count(*) as Count FROM\n    (SELECT\n        'Adoption' AS CASE_TYPE,\n        Year,\n        Quarter,\n        'Cases' AS Category,\n        'N/A' AS PARTY,\n        'N/A' AS Gender,\n        'N/A' AS Representation\n    FROM\n      mojap_de_temp_alpha_user_thomasauburnmoj.adopt_case_data_v3)\nGROUP BY\n  CASE_TYPE,\n  Year,\n  Quarter,\n  Category,\n  PARTY,\n  Gender,\n  Representation",
 'StatementType': 'DDL',
 'ResultConfiguration': {'OutputLocation': 's3://aws-athena-query-results-593291632749-eu-west-1/tables/09aaf7ac-2515-4bbb-9f8a-b45149344096'},
 'QueryExecutionContext': {},
 'Status': {'State': 'SUCCEEDED',
  'SubmissionDateTime': datetime.datetime(2022, 4, 7, 11, 3, 31, 506000, tzinfo=tzlocal()),
  'CompletionDateTime': datetime.datet

#### adopt_case validation

In [37]:
adopt_case_count = pydb.read_sql_query("select count(*) as count from fcsq.adopt_case")
adopt_case_count

Unnamed: 0,count
0,43


## Adopt_Case_Hearings table

### Drop the Adopt_Case_Hearings table if it already exists and remove its data from the S3 bucket

In [38]:
drop_Adopt_Case_Hearings = f"""
DROP TABLE IF EXISTS fcsq.Adopt_Case_Hearings;
"""
pydb.start_query_execution_and_wait(drop_Adopt_Case_Hearings)

# clean up previous Adopt_Case_Hearings files
bucket.objects.filter(Prefix="fcsq_processing/Adoption/Adopt_Case_Hearings/").delete()

[{'ResponseMetadata': {'RequestId': 'M4GYR1F2QPBPXERD',
   'HostId': 'wZF8AMnzwSTvDnFSA7Z1AYFRXdI7F3yn03Kh3jMu+Kdo/9JSuDQD28/X33F71DlaTkveacFqnYk=',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'x-amz-id-2': 'wZF8AMnzwSTvDnFSA7Z1AYFRXdI7F3yn03Kh3jMu+Kdo/9JSuDQD28/X33F71DlaTkveacFqnYk=',
    'x-amz-request-id': 'M4GYR1F2QPBPXERD',
    'date': 'Thu, 07 Apr 2022 11:03:43 GMT',
    'content-type': 'application/xml',
    'transfer-encoding': 'chunked',
    'server': 'AmazonS3',
    'connection': 'close'},
   'RetryAttempts': 0},
  'Deleted': [{'Key': 'fcsq_processing/Adoption/Adopt_Case_Hearings/20220406_163038_00010_hbsqe_bd6a1017-064b-4ee7-8f93-59f9231660f9',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': '8xcAg5.NZjRiSfW0LCphsWo_xS6xFEyG'},
   {'Key': 'fcsq_processing/Adoption/Adopt_Case_Hearings/20220406_163038_00010_hbsqe_ec57a0a3-a9f5-4d65-9718-a299fb979505',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': '6j4F4KcDZuhYzerNH8lD5XFW88YXlLqD'}]}]

### Create the Adopt_Case_Hearings table in Athena

In [39]:
create_hearing_adopt_case =f"""
SELECT DISTINCT Year, Quarter, Case_Number
FROM fcsq.HEARING_ADOPT_Applicants;
"""

pydb.create_temp_table(create_hearing_adopt_case,'hearing_adopt_case')



create_Adopt_Case_Hearings = f"""
CREATE TABLE IF NOT EXISTS fcsq.Adopt_Case_Hearings
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/Adopt_Case_Hearings') AS
SELECT *, Count(*) as Count FROM
    (SELECT
      'Adoption' AS CASE_TYPE,
      Year,
      Quarter,
      'Cases with a hearing' AS Category,
      'N/A' AS PARTY,
      'N/A' AS Gender,
      'N/A' AS Representation
    FROM
      __temp__.Hearing_ADOPT_Case)
GROUP BY
  CASE_TYPE,
  Year,
  Quarter,
  Category,
  PARTY,
  Gender,
  Representation;
"""

pydb.start_query_execution_and_wait(create_Adopt_Case_Hearings)



{'QueryExecutionId': 'e2a53f59-f9a9-4808-9085-23edc4d130fc',
 'Query': "CREATE TABLE IF NOT EXISTS fcsq.Adopt_Case_Hearings\nWITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/Adopt_Case_Hearings') AS\nSELECT *, Count(*) as Count FROM\n    (SELECT\n      'Adoption' AS CASE_TYPE,\n      Year,\n      Quarter,\n      'Cases with a hearing' AS Category,\n      'N/A' AS PARTY,\n      'N/A' AS Gender,\n      'N/A' AS Representation\n    FROM\n      mojap_de_temp_alpha_user_thomasauburnmoj.Hearing_ADOPT_Case)\nGROUP BY\n  CASE_TYPE,\n  Year,\n  Quarter,\n  Category,\n  PARTY,\n  Gender,\n  Representation",
 'StatementType': 'DDL',
 'ResultConfiguration': {'OutputLocation': 's3://aws-athena-query-results-593291632749-eu-west-1/tables/e2a53f59-f9a9-4808-9085-23edc4d130fc'},
 'QueryExecutionContext': {},
 'Status': {'State': 'SUCCEEDED',
  'SubmissionDateTime': datetime.datetime(2022, 4, 7, 11, 3, 52, 991000, tzinfo=tzlocal()),
  'CompletionDateTime': 

#### Adopt_Case_Hearings validation

In [40]:
Adopt_Case_Hearings_count = pydb.read_sql_query("select count(*) as count from fcsq.Adopt_Case_Hearings")
Adopt_Case_Hearings_count

Unnamed: 0,count
0,43


## Adoption table

### Drop the Adoption table if it already exists and remove its data from the S3 bucket

In [41]:
drop_Adoption = f"""
DROP TABLE IF EXISTS fcsq.Adoption;
"""
pydb.start_query_execution_and_wait(drop_Adoption)

# clean up previous Adoption files
bucket.objects.filter(Prefix="fcsq_processing/Adoption/Adoption/").delete()

[{'ResponseMetadata': {'RequestId': 'A5HSZN8ZDWSB31X7',
   'HostId': 'CcpqdTSb+jgRba6GVZkDxJNGnrzUjyLLhv5r4dhHJDmes5phMr5ofy58DzSnOVdGqqrb6W3xumo=',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'x-amz-id-2': 'CcpqdTSb+jgRba6GVZkDxJNGnrzUjyLLhv5r4dhHJDmes5phMr5ofy58DzSnOVdGqqrb6W3xumo=',
    'x-amz-request-id': 'A5HSZN8ZDWSB31X7',
    'date': 'Thu, 07 Apr 2022 11:04:04 GMT',
    'content-type': 'application/xml',
    'transfer-encoding': 'chunked',
    'server': 'AmazonS3',
    'connection': 'close'},
   'RetryAttempts': 0},
  'Deleted': [{'Key': 'fcsq_processing/Adoption/Adoption/20220406_163051_00074_neckb_91ac0d6d-4a2d-4509-9f90-95b2f5e4e8d4',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'Ec3XpbLfzMgQM7uA.oVhbjBdPw3D2k26'},
   {'Key': 'fcsq_processing/Adoption/Adoption/20220406_163051_00074_neckb_324cb459-dbe3-4b89-8162-d92cb27cc80c',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'pOOwEfotdhrTRekg_AxX9kMVanua7rGZ'},
   {'Key': 'fcsq_processing/Adoption/Adoption/202

### Create the Adoption table in Athena

In [42]:
create_Adoption = f"""
CREATE TABLE IF NOT EXISTS fcsq.Adoption
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/Adoption') AS
SELECT
  *
FROM
 fcsq.ADOPT_APP
UNION ALL
SELECT
  *
FROM
  fcsq.ADOPT_RESP
UNION ALL
SELECT
  *
FROM
  fcsq.ADOPT_CASE
UNION ALL
SELECT
  *
FROM
  fcsq.ADOPT_CASE_HEARINGS;
"""

pydb.start_query_execution_and_wait(create_Adoption)



{'QueryExecutionId': '300b1a7d-8f7c-4eb5-aed5-ff5ea81da968',
 'Query': "CREATE TABLE IF NOT EXISTS fcsq.Adoption\nWITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/Adoption') AS\nSELECT\n  *\nFROM\n fcsq.ADOPT_APP\nUNION ALL\nSELECT\n  *\nFROM\n  fcsq.ADOPT_RESP\nUNION ALL\nSELECT\n  *\nFROM\n  fcsq.ADOPT_CASE\nUNION ALL\nSELECT\n  *\nFROM\n  fcsq.ADOPT_CASE_HEARINGS",
 'StatementType': 'DDL',
 'ResultConfiguration': {'OutputLocation': 's3://aws-athena-query-results-593291632749-eu-west-1/tables/300b1a7d-8f7c-4eb5-aed5-ff5ea81da968'},
 'QueryExecutionContext': {},
 'Status': {'State': 'SUCCEEDED',
  'SubmissionDateTime': datetime.datetime(2022, 4, 7, 11, 4, 5, 842000, tzinfo=tzlocal()),
  'CompletionDateTime': datetime.datetime(2022, 4, 7, 11, 4, 8, 125000, tzinfo=tzlocal())},
 'Statistics': {'EngineExecutionTimeInMillis': 2129,
  'DataScannedInBytes': 12209,
  'DataManifestLocation': 's3://aws-athena-query-results-593291632749-eu-west-1/tab

#### Adoption validation

In [43]:
Adoption_count = pydb.read_sql_query("select count(*) as count from fcsq.Adoption")
Adoption_count

Unnamed: 0,count
0,542


## Applicant_representation table

### Drop the Applicant_representation table if it already exists and remove its data from the S3 bucket

In [44]:
drop_Applicant_representation = f"""
DROP TABLE IF EXISTS fcsq.Applicant_representation;
"""
pydb.start_query_execution_and_wait(drop_Applicant_representation)

# clean up previous Applicant_representation files
bucket.objects.filter(Prefix="fcsq_processing/Adoption/Applicant_representation/").delete()

[{'ResponseMetadata': {'RequestId': 'BP207EFQY3N0320D',
   'HostId': 'Q/fhtAxEzQEf51MvQ9sHXHO2zxrk1cNzBkMOU9BnAcaVDNju/UC3gXCI7r/oskHfJU3g77Pt4UA=',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'x-amz-id-2': 'Q/fhtAxEzQEf51MvQ9sHXHO2zxrk1cNzBkMOU9BnAcaVDNju/UC3gXCI7r/oskHfJU3g77Pt4UA=',
    'x-amz-request-id': 'BP207EFQY3N0320D',
    'date': 'Thu, 07 Apr 2022 11:04:18 GMT',
    'content-type': 'application/xml',
    'transfer-encoding': 'chunked',
    'server': 'AmazonS3',
    'connection': 'close'},
   'RetryAttempts': 0},
  'Deleted': [{'Key': 'fcsq_processing/Adoption/Applicant_representation/20220406_163127_00086_3wfqx_f0252942-96fc-40ea-92b5-630365a0cb53',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': '6ip2VXyJvrtl62BkahxllXNEKo4ZGgXt'},
   {'Key': 'fcsq_processing/Adoption/Applicant_representation/20220406_163127_00086_3wfqx_4c7a79b5-6c62-4fc9-a2d8-69d47bca5cf5',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'TFnD8EU8Mpq2ej1Gw4FaB6zgTiQlJt_X'},
   {'Key': 'fcsq_

### Create the Applicant_representation table in Athena

In [45]:
create_applicants_1 = f"""
SELECT Distinct t1.Case_Number, t1.Party, MAX(t1.Representative_Role) as Max_Rep_Role
FROM fcsq.Adopt_Applicant_Info t1
Group by Case_Number, Party;
"""

pydb.create_temp_table(create_applicants_1,'applicants_1')

create_applicants_2 = f"""
SELECT  t1.*,
case when Max_Rep_Role IS NULL then 0
else 1
end as Rep_IND
FROM __temp__.Applicants_1 t1;
"""
pydb.create_temp_table(create_applicants_2,'applicants_2')

create_applicants_3 = f"""
SELECT Distinct t1.Case_Number,
Count(t1.Party) as CountOfParty,
SUM(t1.Rep_Ind) as SumOfRep_IND
FROM __temp__.Applicants_2 t1
Group by Case_Number;
"""
pydb.create_temp_table(create_applicants_3,'applicants_3')

create_Applicant_representation = f"""
CREATE TABLE IF NOT EXISTS fcsq.Applicant_representation
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/Applicant_representation') AS
SELECT t1.Case_Number,
t1.CountOfParty,
t1.SumOfRep_IND,
CASE WHEN t1.SumOfRep_Ind > t1.CountOfParty then 'Error'
WHEN t1.SumOfRep_Ind = t1.CountOfParty then 'All'
WHEN t1.SumOfRep_Ind =0 then 'None' else 'Some'  end as App_Rep_Cat
FROM __temp__.Applicants_3 t1;
"""

pydb.start_query_execution_and_wait(create_Applicant_representation)



{'QueryExecutionId': '7b67fc70-8eaa-4168-8598-c772b52e9610',
 'Query': "CREATE TABLE IF NOT EXISTS fcsq.Applicant_representation\nWITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/Applicant_representation') AS\nSELECT t1.Case_Number,\nt1.CountOfParty,\nt1.SumOfRep_IND,\nCASE WHEN t1.SumOfRep_Ind > t1.CountOfParty then 'Error'\nWHEN t1.SumOfRep_Ind = t1.CountOfParty then 'All'\nWHEN t1.SumOfRep_Ind =0 then 'None' else 'Some'  end as App_Rep_Cat\nFROM mojap_de_temp_alpha_user_thomasauburnmoj.Applicants_3 t1",
 'StatementType': 'DDL',
 'ResultConfiguration': {'OutputLocation': 's3://aws-athena-query-results-593291632749-eu-west-1/tables/7b67fc70-8eaa-4168-8598-c772b52e9610'},
 'QueryExecutionContext': {},
 'Status': {'State': 'SUCCEEDED',
  'SubmissionDateTime': datetime.datetime(2022, 4, 7, 11, 4, 44, 331000, tzinfo=tzlocal()),
  'CompletionDateTime': datetime.datetime(2022, 4, 7, 11, 4, 46, 831000, tzinfo=tzlocal())},
 'Statistics': {'EngineE

#### Applicant_representation validation

In [46]:
Applicant_representation_count = pydb.read_sql_query("select count(*) as count from fcsq.Applicant_representation")
Applicant_representation_count

Unnamed: 0,count
0,1861318


## Respondent_Representation table

### Drop the Respondent_Representation table if it already exists and remove its data from the S3 bucket

In [47]:
drop_Respondent_Representation = f"""
DROP TABLE IF EXISTS fcsq.Respondent_Representation;
"""
pydb.start_query_execution_and_wait(drop_Respondent_Representation)

# clean up previous Respondent_Representation files
bucket.objects.filter(Prefix="fcsq_processing/Adoption/Respondent_Representation/").delete()

[{'ResponseMetadata': {'RequestId': 'D9WTM2NNTV3JK8CY',
   'HostId': 'ZFGpNeYbxvlarpnPoGR/hgjo0Uyh9F4+OfWlXTmMPQhLYkzpQfAv5GnhQ4VaZnlcKVhxD528T2VyD+qUkHfEYg==',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'x-amz-id-2': 'ZFGpNeYbxvlarpnPoGR/hgjo0Uyh9F4+OfWlXTmMPQhLYkzpQfAv5GnhQ4VaZnlcKVhxD528T2VyD+qUkHfEYg==',
    'x-amz-request-id': 'D9WTM2NNTV3JK8CY',
    'date': 'Thu, 07 Apr 2022 11:04:56 GMT',
    'content-type': 'application/xml',
    'transfer-encoding': 'chunked',
    'server': 'AmazonS3',
    'connection': 'close'},
   'RetryAttempts': 0},
  'Deleted': [{'Key': 'fcsq_processing/Adoption/Respondent_Representation/20220406_163205_00088_vbfvk_7b77400a-9bf6-4d36-840b-4d1f2890684f',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'OqzQ_wiwYXvPGnHJtikpu_zvOlqzg3e6'},
   {'Key': 'fcsq_processing/Adoption/Respondent_Representation/20220406_163205_00088_vbfvk_fe9fadc1-dda6-40ea-be23-a9b6fd4cb9e8',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'uH7YzbsS7bMg2hnnwjXeYlJm8Sr

### Create the Respondent_Representation table in Athena

In [48]:
create_respondents_1 = f"""
SELECT Distinct t1.Case_Number, t1.Party, MAX(t1.Representative_Role) as Max_Rep_Role
FROM fcsq.Adopt_Respondent_Info t1
Group by Case_Number, Party;
"""

pydb.create_temp_table(create_respondents_1,'respondents_1')

create_respondents_2 = f"""
SELECT  t1.*,
case when Max_Rep_Role IS NULL then 0
else 1
end as Rep_IND
FROM __temp__.respondents_1 t1;
"""
pydb.create_temp_table(create_respondents_2,'respondents_2')

create_respondents_3 = f"""
SELECT Distinct t1.Case_Number,
Count(t1.Party) as CountOfParty,
SUM(t1.Rep_Ind) as SumOfRep_IND
FROM __temp__.respondents_2 t1
Group by Case_Number;
"""
pydb.create_temp_table(create_respondents_3,'respondents_3')

create_Respondent_Representation = f"""
CREATE TABLE IF NOT EXISTS fcsq.Respondent_Representation
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/Respondent_Representation') AS
SELECT t1.Case_Number,
t1.CountOfParty,
t1.SumOfRep_IND,
CASE WHEN t1.SumOfRep_Ind > t1.CountOfParty then 'Error'
WHEN t1.SumOfRep_Ind = t1.CountOfParty then 'All'
WHEN t1.SumOfRep_Ind =0 then 'None' else 'Some'  end as Res_Rep_Cat
FROM __temp__.Respondents_3 t1
"""

pydb.start_query_execution_and_wait(create_Respondent_Representation)



{'QueryExecutionId': 'ef10ba04-6353-4648-b38c-4c9c05b13d0c',
 'Query': "CREATE TABLE IF NOT EXISTS fcsq.Respondent_Representation\nWITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/Respondent_Representation') AS\nSELECT t1.Case_Number,\nt1.CountOfParty,\nt1.SumOfRep_IND,\nCASE WHEN t1.SumOfRep_Ind > t1.CountOfParty then 'Error'\nWHEN t1.SumOfRep_Ind = t1.CountOfParty then 'All'\nWHEN t1.SumOfRep_Ind =0 then 'None' else 'Some'  end as Res_Rep_Cat\nFROM mojap_de_temp_alpha_user_thomasauburnmoj.Respondents_3 t1",
 'StatementType': 'DDL',
 'ResultConfiguration': {'OutputLocation': 's3://aws-athena-query-results-593291632749-eu-west-1/tables/ef10ba04-6353-4648-b38c-4c9c05b13d0c'},
 'QueryExecutionContext': {},
 'Status': {'State': 'SUCCEEDED',
  'SubmissionDateTime': datetime.datetime(2022, 4, 7, 11, 5, 22, 480000, tzinfo=tzlocal()),
  'CompletionDateTime': datetime.datetime(2022, 4, 7, 11, 5, 24, 927000, tzinfo=tzlocal())},
 'Statistics': {'Engi

#### Respondent_Representation validation

In [49]:
Respondent_Representation_count = pydb.read_sql_query("select count(*) as count from fcsq.Respondent_Representation")
Respondent_Representation_count

Unnamed: 0,count
0,1854596


## Adopt_Disposals_Final table

In [50]:
create_ADOPT_APP_AND_ORDERS_WITH_REP = f"""
SELECT t1.*,
t2.APP_REP_CAT, 
t3.RES_REP_CAT
FROM fcsq.ADOPT_APPS_AND_ORDERS_MATCH AS t1
LEFT JOIN fcsq.Applicant_Representation AS t2 ON (t1.CASE_NUMBER = t2.CASE_NUMBER)
LEFT JOIN fcsq.RESPONDENT_REPRESENTATION as t3 ON t1.CASE_NUMBER = t3.CASE_NUMBER;
"""
pydb.create_temp_table(create_ADOPT_APP_AND_ORDERS_WITH_REP,'ADOPT_APP_AND_ORDERS_WITH_REP')

In [51]:
pydb.read_sql_query("SELECT Count(*) as COUNT from __temp__.adopt_app_and_orders_with_rep")

Unnamed: 0,count
0,147875


In [52]:
create_Adopt_Disposals_Final = f"""
SELECT t1.*,
cast(t1.Year as varchar(3)) || '-Q' || cast(t1.quarter as varchar(3)) AS Quarter2, 
case when (t1.APP_REP_CAT Is Null Or t1.RES_REP_CAT Is Null)  then '5 Unknown'
    when t1.APP_REP_CAT='None' and t1.RES_REP_CAT ='None' then '4 Neither'
    when t1.APP_REP_CAT='None' and t1.RES_REP_CAT != 'None' then '3 Respondent Only'
    when t1.APP_REP_CAT != 'None' and t1.RES_REP_CAT = 'None' then '2 Applicant Only'
Else '1 Both'
End AS REP_CAT

FROM __temp__.ADOPT_APP_AND_ORDERS_WITH_REP AS t1 LEFT JOIN (SELECT * FROM {database}.courts_mv WHERE {database}.courts_mv.mojap_snapshot_date = date '{snapshot_date}')
AS t2 
ON t1.DSP_COURT = cast(t2.Code as varchar(3));

"""

pydb.create_temp_table(create_Adopt_Disposals_Final,'Adopt_Disposals_Final')

"""
Missed out these columns for now:

t2.Region_Pre2014, 
t2.Region,
Case when t1.YEAR < 2014 then t2.Region_Pre2014
Else t2.Region
End As Final_Region

"""

create_Adopt_Disposals_Final_2 = f"""
SELECT *
FROM __temp__.ADOPT_DISPOSALS_FINAL
WHERE adoption = 'Adoption';
"""

pydb.create_temp_table(create_Adopt_Disposals_Final_2,'Adopt_Disposals_Final_2')

#### Adopt_Disposals_Final validation

In [53]:
Adopt_Disposals_Final_count = pydb.read_sql_query("select count(*) as count from __temp__.Adopt_Disposals_Final_2")
Adopt_Disposals_Final_count

Unnamed: 0,count
0,82011


## Adopt_Quarterly table

### Drop the Adopt_Quarterly table if it already exists and remove its data from the S3 bucket

In [54]:
drop_Adopt_Quarterly = f"""
DROP TABLE IF EXISTS fcsq.Adopt_Quarterly;
"""
pydb.start_query_execution_and_wait(drop_Adopt_Quarterly)

# clean up previous Adopt_Quarterly files
bucket.objects.filter(Prefix="fcsq_processing/Adoption/Adopt_Quarterly/").delete()

[{'ResponseMetadata': {'RequestId': 'S3KRYH3J7EX13D7T',
   'HostId': 'AIAoVI+MaDLDXi8t1ZIqKAS/vXk9ngEakTjfVoJmT0WqnDG7jpb6XtAfpwrYDYVclpHFyUtV3Ho=',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'x-amz-id-2': 'AIAoVI+MaDLDXi8t1ZIqKAS/vXk9ngEakTjfVoJmT0WqnDG7jpb6XtAfpwrYDYVclpHFyUtV3Ho=',
    'x-amz-request-id': 'S3KRYH3J7EX13D7T',
    'date': 'Thu, 07 Apr 2022 11:06:11 GMT',
    'content-type': 'application/xml',
    'transfer-encoding': 'chunked',
    'server': 'AmazonS3',
    'connection': 'close'},
   'RetryAttempts': 0},
  'Deleted': [{'Key': 'fcsq_processing/Adoption/Adopt_Quarterly/20220406_163255_00048_6mumg_c9fe2920-8ad1-40bd-9c4d-a81680d2ae62',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'n46Ur2YAGbgUpYSgIQjoS1gGyigO1I5r'},
   {'Key': 'fcsq_processing/Adoption/Adopt_Quarterly/20220406_163255_00048_6mumg_52a69ac9-3da3-44ba-8ead-c826bd93625f',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'CTOgGo.vkH_KrkbL_8uYYkIaMrxgSn8h'},
   {'Key': 'fcsq_processing/Adoptio

### Create the Adopt_Quarterly table in Athena

In [55]:
create_Adopt_Quarterly = f"""
CREATE TABLE IF NOT EXISTS fcsq.Adopt_Quarterly
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/Adopt_Quarterly') AS
SELECT DISTINCT
        'Adoption' as type,
        year,
        'Q' || cast(quarter as varchar(3)) AS quarter,
        rep_cat,
        count(*) as n,
        avg(wait_weeks) as mean
    FROM 
        __temp__.ADOPT_DISPOSALS_FINAL_2
    WHERE year > 2010 
    AND quarter2 <> '2021-Q4'
    AND adoption='Adoption'
GROUP BY
    year,
    quarter,
    rep_cat
    
UNION ALL
SELECT DISTINCT
        'Adoption' as type,
        year,
        'Q' || cast(quarter as varchar(3)) AS quarter,
        'All' as rep_cat,
        count(*) as n,
        avg(wait_weeks) as mean
    FROM 
        __temp__.ADOPT_DISPOSALS_FINAL_2
    WHERE year > 2010 
    AND quarter2 <> '2021-Q4'
    AND adoption='Adoption'
GROUP BY
    year,
    quarter
"""


testA = f"""
SELECT * FROM __temp__.ADOPT_DISPOSALS_FINAL_2 WHERE year=2015 AND quarter = 3;
"""
testB = f"""
SELECT * FROM fcsq.Adopt_Quarterly WHERE year=2015 AND quarter = 'Q3' ORDER BY rep_cat
"""

pydb.start_query_execution_and_wait(create_Adopt_Quarterly)
#pydb.read_sql_query(testA)
pydb.read_sql_query(testB)





Unnamed: 0,type,year,quarter,rep_cat,n,mean
0,Adoption,2015,Q3,1 Both,16,28.3125
1,Adoption,2015,Q3,2 Applicant Only,59,20.949153
2,Adoption,2015,Q3,3 Respondent Only,44,27.613636
3,Adoption,2015,Q3,4 Neither,1325,14.77283
4,Adoption,2015,Q3,5 Unknown,15,20.133333
5,Adoption,2015,Q3,All,1459,15.613434


#### Adopt_Quarterly validation

In [56]:
Adopt_Quarterly_count = pydb.read_sql_query("select count(*) as count from fcsq.Adopt_Quarterly")
Adopt_Quarterly_count

pydb.read_sql_query("select * from fcsq.Adopt_Quarterly WHERE year = 2014 AND quarter = \'Q2' ORDER BY rep_cat")

Unnamed: 0,type,year,quarter,rep_cat,n,mean
0,Adoption,2014,Q2,1 Both,17,35.058824
1,Adoption,2014,Q2,2 Applicant Only,58,23.672414
2,Adoption,2014,Q2,3 Respondent Only,45,24.444444
3,Adoption,2014,Q2,4 Neither,1580,14.211392
4,Adoption,2014,Q2,5 Unknown,15,11.8
5,Adoption,2014,Q2,All,1715,14.985423


## Adopt_Annual table

### Drop the Adopt_Annual table if it already exists and remove its data from the S3 bucket

In [57]:
drop_Adopt_Annual = f"""
DROP TABLE IF EXISTS fcsq.Adopt_Annual;
"""
pydb.start_query_execution_and_wait(drop_Adopt_Annual)

# clean up previous Adopt_Annual files
bucket.objects.filter(Prefix="fcsq_processing/Adoption/Adopt_Annual/").delete()

[{'ResponseMetadata': {'RequestId': '218WQMT2C56V9PG7',
   'HostId': 'L7Te+uGOip3m4jMQN3/H8gudaU5bDI2QRcuEsHvexEMLkUnJltc2ZaY//Eq/nsIW5cIMfibi94g=',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'x-amz-id-2': 'L7Te+uGOip3m4jMQN3/H8gudaU5bDI2QRcuEsHvexEMLkUnJltc2ZaY//Eq/nsIW5cIMfibi94g=',
    'x-amz-request-id': '218WQMT2C56V9PG7',
    'date': 'Thu, 07 Apr 2022 11:06:35 GMT',
    'content-type': 'application/xml',
    'transfer-encoding': 'chunked',
    'server': 'AmazonS3',
    'connection': 'close'},
   'RetryAttempts': 0},
  'Deleted': [{'Key': 'fcsq_processing/Adoption/Adopt_Annual/20220406_163318_00092_9fx83_077dec32-8ed5-4e00-8f3d-7c2a3cad494f',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'qXOAeNcndjFjocuXNOt.YUljmOMNsSmg'},
   {'Key': 'fcsq_processing/Adoption/Adopt_Annual/20220406_163318_00092_9fx83_298b0f7f-f431-4cf7-a9eb-28240b7843d8',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'NSnWWGkiOdGdEZiynp7OMmEDb8zXtaZV'},
   {'Key': 'fcsq_processing/Adoption/Adop

### Create the Adopt_Annual table in Athena

In [58]:
create_Adopt_Annual = f"""
CREATE TABLE IF NOT EXISTS fcsq.Adopt_Annual
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/Adopt_Annual') AS
SELECT DISTINCT
        'Adoption' as type,
        year,
        'N/A' as quarter,
        rep_cat,
        count(*) as n,
        avg(wait_weeks) as mean
    FROM 
        __temp__.ADOPT_DISPOSALS_FINAL_2
    WHERE year > 2010 
    AND year < 2021
    AND adoption='Adoption'
GROUP BY
    year,
    rep_cat
    
UNION ALL
SELECT DISTINCT
        'Adoption' as type,
        year,
        'N/A' as quarter,
        'All' as rep_cat,
        count(*) as n,
        avg(wait_weeks) as mean
    FROM 
        __temp__.ADOPT_DISPOSALS_FINAL_2
    WHERE year > 2010 
    AND year < 2021
    AND adoption='Adoption'
GROUP BY
    year
"""

pydb.start_query_execution_and_wait(create_Adopt_Annual)



{'QueryExecutionId': '4acbd691-ac97-4198-ad27-0f242c4121b6',
 'Query': "CREATE TABLE IF NOT EXISTS fcsq.Adopt_Annual\nWITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/Adopt_Annual') AS\nSELECT DISTINCT\n        'Adoption' as type,\n        year,\n        'N/A' as quarter,\n        rep_cat,\n        count(*) as n,\n        avg(wait_weeks) as mean\n    FROM \n        mojap_de_temp_alpha_user_thomasauburnmoj.ADOPT_DISPOSALS_FINAL_2\n    WHERE year > 2010 \n    AND year < 2021\n    AND adoption='Adoption'\nGROUP BY\n    year,\n    rep_cat\n    \nUNION ALL\nSELECT DISTINCT\n        'Adoption' as type,\n        year,\n        'N/A' as quarter,\n        'All' as rep_cat,\n        count(*) as n,\n        avg(wait_weeks) as mean\n    FROM \n        mojap_de_temp_alpha_user_thomasauburnmoj.ADOPT_DISPOSALS_FINAL_2\n    WHERE year > 2010 \n    AND year < 2021\n    AND adoption='Adoption'\nGROUP BY\n    year",
 'StatementType': 'DDL',
 'ResultConfigurat

#### Adopt_Annual validation

In [59]:
Adopt_Annual_count = pydb.read_sql_query("select count(*) as count from fcsq.Adopt_Annual")
Adopt_Annual_count



Unnamed: 0,count
0,60


In [60]:
df = pydb.read_sql_query("SELECT * FROM fcsq.Adoption")
df.to_csv(path_or_buf = 's3://alpha-family-data/CSVs/Adoption_legrep.csv',index=False)
#df.to_excel('adoption.xlsx')


In [61]:
df = pydb.read_sql_query("SELECT * FROM fcsq.Adopt_Annual UNION ALL SELECT * FROM fcsq.Adopt_Quarterly ORDER BY type,year,quarter,rep_cat")
df.to_csv(path_or_buf = '~/FCSQ_data/timeliness.csv',index=False)

## adopt_timeliness_combined table

### Drop the adopt_timeliness_combined table if it already exists and remove its data from the S3 bucket

In [62]:
drop_adopt_timeliness_combined = f"""
DROP TABLE IF EXISTS fcsq.adopt_timeliness_combined;
"""
pydb.start_query_execution_and_wait(drop_adopt_timeliness_combined)

# clean up previous adopt_timeliness_combined files
bucket.objects.filter(Prefix="fcsq_processing/Adoption/adopt_timeliness_combined/").delete()

[{'ResponseMetadata': {'RequestId': 'H4GSCFWHYXMK14DN',
   'HostId': 'Tm4eeYQXxiv35JOAIqxpEcqQzsPBQKinvFxg012J3n5uYFZor9+JV6uMIUoYt46uj61KJLCyhqQ=',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'x-amz-id-2': 'Tm4eeYQXxiv35JOAIqxpEcqQzsPBQKinvFxg012J3n5uYFZor9+JV6uMIUoYt46uj61KJLCyhqQ=',
    'x-amz-request-id': 'H4GSCFWHYXMK14DN',
    'date': 'Thu, 07 Apr 2022 11:07:01 GMT',
    'content-type': 'application/xml',
    'transfer-encoding': 'chunked',
    'server': 'AmazonS3',
    'connection': 'close'},
   'RetryAttempts': 0},
  'Deleted': [{'Key': 'fcsq_processing/Adoption/adopt_timeliness_combined/20220406_163345_00098_w2gwr_641c9532-49dc-4cd1-a80b-a588edb94abd',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'aBhgNr3OHc3wLVoX6bsv1R7BUYCoi26S'}]}]

### Create the adopt_timeliness_combined table in Athena

In [63]:
create_adopt_timeliness_combined = f"""
CREATE TABLE IF NOT EXISTS fcsq.adopt_timeliness_combined
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/adopt_timeliness_combined') AS
SELECT * FROM fcsq.Adopt_Annual 
UNION ALL 
SELECT * FROM fcsq.Adopt_Quarterly ORDER BY type,year,quarter,rep_cat
"""

pydb.start_query_execution_and_wait(create_adopt_timeliness_combined)



{'QueryExecutionId': 'f4df8bb6-3f5a-4e4b-bdf0-27c73f102519',
 'Query': "CREATE TABLE IF NOT EXISTS fcsq.adopt_timeliness_combined\nWITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/adopt_timeliness_combined') AS\nSELECT * FROM fcsq.Adopt_Annual \nUNION ALL \nSELECT * FROM fcsq.Adopt_Quarterly ORDER BY type,year,quarter,rep_cat",
 'StatementType': 'DDL',
 'ResultConfiguration': {'OutputLocation': 's3://aws-athena-query-results-593291632749-eu-west-1/tables/f4df8bb6-3f5a-4e4b-bdf0-27c73f102519'},
 'QueryExecutionContext': {},
 'Status': {'State': 'SUCCEEDED',
  'SubmissionDateTime': datetime.datetime(2022, 4, 7, 11, 7, 2, 742000, tzinfo=tzlocal()),
  'CompletionDateTime': datetime.datetime(2022, 4, 7, 11, 7, 4, 973000, tzinfo=tzlocal())},
 'Statistics': {'EngineExecutionTimeInMillis': 2062,
  'DataScannedInBytes': 7823,
  'DataManifestLocation': 's3://aws-athena-query-results-593291632749-eu-west-1/tables/f4df8bb6-3f5a-4e4b-bdf0-27c73f102519-m

#### adopt_timeliness_combined validation

In [64]:
adopt_timeliness_combined_count = pydb.read_sql_query("select count(*) as count from fcsq.adopt_timeliness_combined")
adopt_timeliness_combined_count

Unnamed: 0,count
0,318


In [65]:
df = pydb.read_sql_query("SELECT * FROM fcsq.adopt_timeliness_combined WHERE (Year<2021 OR Quarter!='Q3') AND (Year<2021 OR Quarter!='Q2')")
df = df.pivot_table(index=['type','year','quarter'],columns=['rep_cat'],values = ['n','mean'],aggfunc=sum, fill_value=0).swaplevel(axis=1).sort_index(axis=1)
df

Unnamed: 0_level_0,Unnamed: 1_level_0,rep_cat,1 Both,1 Both,2 Applicant Only,2 Applicant Only,3 Respondent Only,3 Respondent Only,4 Neither,4 Neither,5 Unknown,5 Unknown,All,All
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,n,mean,n,mean,n,mean,n,mean,n,mean,n
type,year,quarter,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2
Adoption,2011,,28.56,100,18.811024,254,18.597765,179,15.139358,4083,14.797688,173,15.731259,4789
Adoption,2011,Q1,31.761905,21,21.619048,63,20.225,40,15.391906,939,14.319149,47,16.183784,1110
Adoption,2011,Q2,35.125,16,18.923077,52,16.181818,44,14.228782,1084,14.947368,38,14.789303,1234
Adoption,2011,Q3,25.392857,28,16.2,65,19.674419,43,15.819556,992,13.931034,58,16.113828,1186
Adoption,2011,Q4,26.171429,35,18.635135,74,18.5,52,15.209738,1068,17.033333,30,15.895155,1259
Adoption,2012,,29.315789,76,22.701299,231,19.245399,163,13.914185,4801,18.504762,105,14.760789,5376
Adoption,2012,Q1,26.071429,14,21.688525,61,19.692308,39,14.21978,1092,17.08,25,14.956133,1231
Adoption,2012,Q2,29.733333,15,25.888889,63,18.7,40,13.996473,1134,19.914286,35,15.069153,1287
Adoption,2012,Q3,33.0,23,20.846154,52,21.459459,37,14.154589,1242,19.125,24,15.004354,1378
Adoption,2012,Q4,27.416667,24,21.927273,55,17.595745,47,13.369842,1333,17.142857,21,14.103378,1480


In [66]:
df.to_csv(path_or_buf = '~/FCSQ_data/timeliness.csv',index=False)