# Adoption extraction tables

# Contents
1. [Import packages and options](#import_packages)
2. [Define key variables](#define_key_variables)
3. [Adoption_parties](#adoption_parties)
4. [Adoptions_applications](#adoptions_applications)
5. [Adoptions_cases](#adoptions_cases)
6. [Adoptions_disposals](#adoptions_disposals)
7. [Adoptions_disposal_fields](#adoptions_disposal_fields)
8. [adopt_app_count](#adopt_app_count)
9. [adopt_appl_rtc](#adopt_appl_rtc)
10. [adopt_applicant_type](#adopt_applicant_type)
11. [adopt_child_parties](#adopt_child_parties)
12. [adopt_application_4](#adopt_application_4)
13. [adopt_application_5](#adopt_application_5)
14. [adopt_apps_6_adoptions_only](#adopt_apps_6_adoptions_only)
15. [adopt_apps_6_non_adoptions](#adopt_apps_6_non_adoptions)
16. [summaries](#summaries)
17. [adopt_adopt_only_case_count](#adopt_adopt_only_case_count)
18. [disposals](#disposals)
19. [adopt_order_type](#adopt_order_type)
20. [adopt_disposals_type](#adopt_disposals_type)
21. [adopt_country_of_birth](#adopt_country_of_birth)
22. [adopt_disposals_with_child](#adopt_disposals_with_child)
23. [adopt_disposals5](#adopt_disposals5)
24. [adopt_disposals5_adoption](#adopt_disposals5_adoption)
25. [adopt_disposals5_non_adoption](#adopt_disposals5_non_adoption)
26. [adopt_disposals5_non_orders](#adopt_disposals5_non_orders)
27. [adopt_disposals5_2011](#adopt_disposals5_2011)
28. [Country of birth lookup update](#Country of birth lookup update)
29. [adopt_birth_country_unknown](#adopt_birth_country_unknown)
30. [adopt_birth_country_added](#adopt_birth_country_added)
31. [adopt_country_lookup_update](#adopt_country_lookup_update)
32. [adopt_birth_country_lookup](#adopt_birth_country_lookup)
33. [Stage 5 - Preparing the final output](#Stage 5 - Preparing the final output)
34. [adopt_adoptions_only_standard](#adopt_adoptions_only_standard)
35. [adopt_adoption_only_convention](#adopt_adoption_only_convention)
36. [adopt_adoption_only_foreign](#adopt_adoption_only_foreign)
37. [adopt_adoptions_other_only](#adopt_adoptions_other_only)
38. [adopt_adoption_only](#adopt_adoption_only)
39. [adopt_adoption_placement](#adopt_adoption_placement)
40. [adopt_placement_revoke_or_vary](#adopt_placement_revoke_or_vary)
41. [adopt_adoption_contact_s26](#adopt_adoption_contact_s26)
42. [adopt_contact_s26_revoke_vary](#adopt_contact_s26_revoke_vary)
43. [adopt_change_surname](#adopt_change_surname)
44. [adopt_remove_child_from_uk](#adopt_remove_child_from_uk)
45. [adopt_other_order_type](#adopt_other_order_type)
46. [adopt_non_adopt_types](#adopt_non_adopt_types)
47. [adopt_non_adoptions](#adopt_non_adoptions)
48. [adopt_applications](#adopt_applications)
49. [adopt_disp_details](#adopt_disp_details)
50. [adopt_disp_csv_data](#adopt_disp_csv_data)
51. [adopt_disp_order_type](#adopt_disp_order_type)
52. [adopt_disposals](#adopt_disposals)
53. [adopt_application_case_count](#adopt_application_case_count)
54. [adopt_disposal_case_count](#adopt_disposal_case_count)
55. [adopt_csv](#adopt_csv)
56. [adopt_csv2](#adopt_csv2)




## Import packages and set options 
<a name="import_packages"></a>

In [1]:
import pandas as pd  # for the data structures to store and manipulate tables
import pydbtools as pydb  # see https://github.com/moj-analytical-services/pydbtools
import boto3  # for working with AWS

# few things for viewing dataframes better
pd.set_option("display.max_columns", 100)
pd.set_option("display.width", 900)
pd.set_option("display.max_colwidth", 200)

## Define some variables to be used throughout the notebook 
<a name="define_key_variables"></a>

In [2]:
database = "familyman_dev_v2"
snapshot_date = "2021-08-19"
database_derived = "familyman_derived_dev_v1"

fcsq_database = "fcsq"

s3 = boto3.resource("s3")
bucket = s3.Bucket("alpha-family-data")

## Adoption_parties table
<a name="adoption_parties"></a>

### Drop the adoptions_parties table if it already exists and remove its data from the S3 bucket

In [3]:
drop_adoptions_parties = f"""
DROP TABLE IF EXISTS fcsq.ADOPTIONS_PARTIES;
"""
pydb.start_query_execution_and_wait(drop_adoptions_parties)

# clean up previous adoptions_parties files
bucket.objects.filter(Prefix="fcsq_processing/Adoption/Adoptions_Parties/").delete()

[{'ResponseMetadata': {'RequestId': 'NQY4B0MTVSG6310N',
   'HostId': 'vCYDQRN07SBr/xXn4eOe3J6x4KTrB2ItXSgwKF8wZF38lhIhpTNMvGrtMM0Mw66HZJyNwg14QZA=',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'x-amz-id-2': 'vCYDQRN07SBr/xXn4eOe3J6x4KTrB2ItXSgwKF8wZF38lhIhpTNMvGrtMM0Mw66HZJyNwg14QZA=',
    'x-amz-request-id': 'NQY4B0MTVSG6310N',
    'date': 'Wed, 06 Apr 2022 16:46:50 GMT',
    'content-type': 'application/xml',
    'transfer-encoding': 'chunked',
    'server': 'AmazonS3',
    'connection': 'close'},
   'RetryAttempts': 0},
  'Deleted': [{'Key': 'fcsq_processing/Adoption/Adoptions_Parties/20220406_153523_00112_6sk6p_b78b99a3-1298-47b8-84e7-d455ea6af855',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'lubNV6eA_JjJUF9FjlsnRVx2IC1p6IJ8'},
   {'Key': 'fcsq_processing/Adoption/Adoptions_Parties/20220406_153523_00112_6sk6p_2675c408-82af-4538-b8fa-a979ca25b245',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'vgBXn54ABU0dV9mabQpSI798uOaLIsSk'},
   {'Key': 'fcsq_processing/Ado

### Create the adoptions_parties table in Athena

In [4]:
create_adoptions_parties = f"""
CREATE TABLE IF NOT EXISTS fcsq.ADOPTIONS_PARTIES
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/Adoptions_Parties') AS
SELECT R.CASE_NUMBER,
  R.ROLE,
  R.ROLE_MODEL,
  R.REPRESENTATIVE_ROLE AS Representative,
  F.FIELD_MODEL,
  F.VALUE AS Rel_to_child,
  R.PARTY,
  P.DOB,
  P.GENDER,
  P.MOJAP_SNAPSHOT_DATE
FROM {database}.parties P
INNER JOIN {database}.roles R
ON P.PARTY = R.PARTY
INNER JOIN {database}.role_fields F
ON R.ROLE               = F.ROLE
WHERE ((R.ROLE_MODEL     = 'APLZ'
AND F.FIELD_MODEL = 'APLZ_RTC')
OR (R.ROLE_MODEL        = 'CHLDZ'
AND F.FIELD_MODEL = 'CHLDZ_CP'))
AND R.mojap_snapshot_date = date '{snapshot_date}'
AND F.mojap_snapshot_date = date '{snapshot_date}'
AND P.mojap_snapshot_date = date '{snapshot_date}'
"""

pydb.start_query_execution_and_wait(create_adoptions_parties)



{'QueryExecutionId': 'e179b15c-b0b9-45e4-9939-70c98ac6e466',
 'Query': "CREATE TABLE IF NOT EXISTS fcsq.ADOPTIONS_PARTIES\nWITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/Adoptions_Parties') AS\nSELECT R.CASE_NUMBER,\n  R.ROLE,\n  R.ROLE_MODEL,\n  R.REPRESENTATIVE_ROLE AS Representative,\n  F.FIELD_MODEL,\n  F.VALUE AS Rel_to_child,\n  R.PARTY,\n  P.DOB,\n  P.GENDER,\n  P.MOJAP_SNAPSHOT_DATE\nFROM familyman_dev_v2.parties P\nINNER JOIN familyman_dev_v2.roles R\nON P.PARTY = R.PARTY\nINNER JOIN familyman_dev_v2.role_fields F\nON R.ROLE               = F.ROLE\nWHERE ((R.ROLE_MODEL     = 'APLZ'\nAND F.FIELD_MODEL = 'APLZ_RTC')\nOR (R.ROLE_MODEL        = 'CHLDZ'\nAND F.FIELD_MODEL = 'CHLDZ_CP'))\nAND R.mojap_snapshot_date = date '2021-08-19'\nAND F.mojap_snapshot_date = date '2021-08-19'\nAND P.mojap_snapshot_date = date '2021-08-19'",
 'StatementType': 'DDL',
 'ResultConfiguration': {'OutputLocation': 's3://aws-athena-query-results-5932916327

#### Adoption_parties validation

In [5]:
# Query the table just created. The database to query is called __temp__, this is
# an alias for a sandbox database that is created for each user. For more details, see
# the pydbtools docs

parties_count = pydb.read_sql_query("select count(*) as count from fcsq.adoptions_parties")

parties_count

Unnamed: 0,count
0,311659


## Adoptions_applications table
<a name="adoptions_applications"></a>

### Drop the adoptions_applications table if it already exists and remove its data from the S3 bucket

In [6]:
drop_adoptions_applications = "DROP TABLE IF EXISTS fcsq.ADOPTIONS_APPLICATIONS;"
pydb.start_query_execution_and_wait(drop_adoptions_applications)

bucket.objects.filter(
    Prefix="fcsq_processing/Adoption/Adoptions_applications/"
).delete()

[{'ResponseMetadata': {'RequestId': '0VDRH9TA5K3EXD32',
   'HostId': 'Q7vw2t9AE2I3k0NmfUkpORPyQHYF44guPT02MYK8+fBjD/OzpqqLNvWyYXbCWra0Tr81/uUhB0E=',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'x-amz-id-2': 'Q7vw2t9AE2I3k0NmfUkpORPyQHYF44guPT02MYK8+fBjD/OzpqqLNvWyYXbCWra0Tr81/uUhB0E=',
    'x-amz-request-id': '0VDRH9TA5K3EXD32',
    'date': 'Wed, 06 Apr 2022 16:47:21 GMT',
    'content-type': 'application/xml',
    'transfer-encoding': 'chunked',
    'server': 'AmazonS3',
    'connection': 'close'},
   'RetryAttempts': 0},
  'Deleted': [{'Key': 'fcsq_processing/Adoption/Adoptions_applications/20220208_114022_00049_65igx_59ce96c2-c085-4364-8ddf-9d83c6ab0389',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'j9bOQyshVuUqGHTomNsFBEymuH73TVUO'},
   {'Key': 'fcsq_processing/Adoption/Adoptions_applications/20220208_114022_00049_65igx_f86125df-4e54-4f3b-96a3-8d1f77587242',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': '9SJCm8dmuEzqI9X9pe9Pj5MMLTA2rCqf'},
   {'Key': 'fcsq_proc

### Create the adoptions_applications table in Athena

In [7]:
create_adoptions_applications_u21 = f"""
SELECT E.CASE_NUMBER,
  E.EVENT,
  E.RECEIPT_DATE,
  E.ENTRY_DATE,
  CASE E.RECEIPT_DATE
      WHEN NULL THEN E.ENTRY_DATE
      ELSE E.RECEIPT_DATE
  END AS Receipt_date2,
  E.EVENT_MODEL,
  F.VALUE  AS App_Type,
  date_parse(F1.VALUE, '%Y-%m-%d') AS Date_of_issue,
  F2.VALUE AS High_court
FROM {database}.events E
INNER JOIN {database}.event_fields F2
ON E.EVENT = F2.EVENT
INNER JOIN {database}.event_fields F1
ON E.EVENT = F1.EVENT
INNER JOIN {database}.event_fields F
ON E.EVENT               = F.EVENT
WHERE (E.EVENT_MODEL      = 'U21'
AND F.FIELD_MODEL  = 'U21_1'
AND F1.FIELD_MODEL = 'U21_2'
AND F2.FIELD_MODEL = 'U21_HC')
AND E.mojap_snapshot_date = date '{snapshot_date}'
AND F.mojap_snapshot_date = date '{snapshot_date}'
AND F1.mojap_snapshot_date = date '{snapshot_date}'
AND F2.mojap_snapshot_date = date '{snapshot_date}';
"""


pydb.create_temp_table(create_adoptions_applications_u21, "adoptions_applications_u21")
print("u21 applications done")

create_adoptions_applications_G50 = f"""
SELECT
  E.CASE_NUMBER,
  E.EVENT,
  E.RECEIPT_DATE,
  E.ENTRY_DATE,
  CASE E.RECEIPT_DATE
      WHEN NULL THEN E.ENTRY_DATE
      ELSE E.RECEIPT_DATE
  END AS Receipt_date2,
  E.EVENT_MODEL,
  CASE WHEN F.VALUE like '%RUK%' and F.VALUE like '%CCS%' THEN 'RUK,CCS'
       WHEN F.VALUE like '%RUK%' THEN 'RUK'
       WHEN F.VALUE like '%CCS%' THEN 'CCS'
        END AS App_Type, /*Have done it like this so that we do not include non adoption app types*/
  CASE E.RECEIPT_DATE
      WHEN NULL THEN E.ENTRY_DATE
      ELSE E.RECEIPT_DATE
  END AS Date_of_issue,
  'N' AS High_court
FROM
  {database}.events E
  INNER JOIN {database}.event_fields F
    ON E.EVENT = F.EVENT
WHERE
  F.FIELD_MODEL = 'G50_AT'
  AND (F.VALUE like '%RUK%'
   OR F.VALUE like '%CCS%')
   AND E.mojap_snapshot_date = date '{snapshot_date}'
   AND F.mojap_snapshot_date = date '{snapshot_date}';
"""

pydb.create_temp_table(create_adoptions_applications_G50, "adoptions_applications_G50")


print("G50 cases done")

create_adoptions_applications = f"""
CREATE TABLE IF NOT EXISTS fcsq.ADOPTIONS_APPLICATIONS
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/Adoptions_applications') AS
SELECT
  *
FROM
  __temp__.ADOPTIONS_APPLICATIONS_U21
WHERE case_number <> 'CV11Z00105'
UNION ALL
SELECT
  *
FROM
  __temp__.ADOPTIONS_APPLICATIONS_G50
WHERE case_number <> 'CV11Z00105';
"""

pydb.start_query_execution_and_wait(create_adoptions_applications)

print("adoptions_applications table created")


u21 applications done
G50 cases done
adoptions_applications table created


#### Adoptions_applications validation

In [8]:
applications_count = pydb.read_sql_query("select count(*) as count from fcsq.adoptions_applications")
applications_count

Unnamed: 0,count
0,175169


## Adoptions_cases table
<a name="adoptions_cases"></a>

### Drop the adoptions_cases table if it already exists and remove its data from the S3 bucket

In [9]:
drop_adoptions_cases = "DROP TABLE IF EXISTS fcsq.ADOPTIONS_CASES;"
pydb.start_query_execution_and_wait(drop_adoptions_cases)

bucket.objects.filter(Prefix="fcsq_processing/Adoption/Adoptions_cases/").delete()

[{'ResponseMetadata': {'RequestId': 'N4BSGGGDPASEGYT3',
   'HostId': '3uL2MF56H1q+oouzLMboczj44ScRICWI2pdwJHfOLO1LSipSi+N+9TYJMNbrL8SEg24fW/GltUk=',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'x-amz-id-2': '3uL2MF56H1q+oouzLMboczj44ScRICWI2pdwJHfOLO1LSipSi+N+9TYJMNbrL8SEg24fW/GltUk=',
    'x-amz-request-id': 'N4BSGGGDPASEGYT3',
    'date': 'Wed, 06 Apr 2022 16:48:04 GMT',
    'content-type': 'application/xml',
    'transfer-encoding': 'chunked',
    'server': 'AmazonS3',
    'connection': 'close'},
   'RetryAttempts': 0},
  'Deleted': [{'Key': 'fcsq_processing/Adoption/Adoptions_cases/20220208_114028_00014_6fsnz_bc120fa9-0499-4b62-a28b-e16e5a5297f2',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'inzaBOKVJadzOgCPWDKMU.ir13QJWz.U'},
   {'Key': 'fcsq_processing/Adoption/Adoptions_cases/20220208_114028_00014_6fsnz_8a109ade-7d51-4541-af35-566b96531d6a',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'yp6EYjJcMa8mvqO7qk3UhiHXcFt1zHPp'},
   {'Key': 'fcsq_processing/Adoptio

### Create the adoptions_cases table in Athena

In [10]:
create_adoptions_cases = f"""
CREATE TABLE IF NOT EXISTS fcsq.ADOPTIONS_CASES
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/Adoptions_cases') AS
SELECT F.CASE_NUMBER,
  F.VALUE AS Contested,
  F1.VALUE AS Case_issue_date,
  F2.VALUE AS Case_app_type,
  CASE F.CASE_NUMBER
      WHEN 'BM08Z09028' THEN '2008-09-19'
      ELSE F1.VALUE
  END AS Case_issue_date2
FROM {database}.case_fields F
INNER JOIN {database}.case_fields F2
ON F.CASE_NUMBER = F2.CASE_NUMBER
INNER JOIN {database}.case_fields F1
ON F.CASE_NUMBER    = F1.CASE_NUMBER
WHERE F.FIELD_MODEL = 'FM3A_AC'
AND F1.FIELD_MODEL  = 'FM3A_DOI'
AND F2.FIELD_MODEL  = 'FM3A_AT'
AND F.case_number <> 'CV11Z00105'
AND F.mojap_snapshot_date = date '{snapshot_date}'
AND F1.mojap_snapshot_date = date '{snapshot_date}'
AND F2.mojap_snapshot_date = date '{snapshot_date}';

"""
pydb.start_query_execution_and_wait(create_adoptions_cases)

{'QueryExecutionId': '5b603aad-048d-4ccc-a4aa-687e1119cbf7',
 'Query': "CREATE TABLE IF NOT EXISTS fcsq.ADOPTIONS_CASES\nWITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/Adoptions_cases') AS\nSELECT F.CASE_NUMBER,\n  F.VALUE AS Contested,\n  F1.VALUE AS Case_issue_date,\n  F2.VALUE AS Case_app_type,\n  CASE F.CASE_NUMBER\n      WHEN 'BM08Z09028' THEN '2008-09-19'\n      ELSE F1.VALUE\n  END AS Case_issue_date2\nFROM familyman_dev_v2.case_fields F\nINNER JOIN familyman_dev_v2.case_fields F2\nON F.CASE_NUMBER = F2.CASE_NUMBER\nINNER JOIN familyman_dev_v2.case_fields F1\nON F.CASE_NUMBER    = F1.CASE_NUMBER\nWHERE F.FIELD_MODEL = 'FM3A_AC'\nAND F1.FIELD_MODEL  = 'FM3A_DOI'\nAND F2.FIELD_MODEL  = 'FM3A_AT'\nAND F.case_number <> 'CV11Z00105'\nAND F.mojap_snapshot_date = date '2021-08-19'\nAND F1.mojap_snapshot_date = date '2021-08-19'\nAND F2.mojap_snapshot_date = date '2021-08-19'",
 'StatementType': 'DDL',
 'ResultConfiguration': {'OutputLocat

#### Adoptions_cases validation

In [11]:
cases_count = pydb.read_sql_query("select count(*) as count from fcsq.adoptions_cases")
cases_count

Unnamed: 0,count
0,133333


## Adoptions_disposals table
<a name="adoptions_disposals"></a>

### Drop the adoptions_disposals table if it already exists and remove its data from the S3 bucket

In [12]:
drop_adoptions_disposals = "DROP TABLE IF EXISTS fcsq.ADOPTIONS_DISPOSALS;"
pydb.start_query_execution_and_wait(drop_adoptions_disposals)

bucket.objects.filter(Prefix="fcsq_processing/Adoption/Adoptions_disposals/").delete()

[{'ResponseMetadata': {'RequestId': 'P7JKFTAVRXMBFGJW',
   'HostId': '+h6B5zJREyanDmV04m3EV/NAkUbl8rZS5OwgsS23TzSMPugtFjwAN65ZBxYN/OmOiL5qBPB0RA8=',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'x-amz-id-2': '+h6B5zJREyanDmV04m3EV/NAkUbl8rZS5OwgsS23TzSMPugtFjwAN65ZBxYN/OmOiL5qBPB0RA8=',
    'x-amz-request-id': 'P7JKFTAVRXMBFGJW',
    'date': 'Wed, 06 Apr 2022 16:48:19 GMT',
    'content-type': 'application/xml',
    'transfer-encoding': 'chunked',
    'server': 'AmazonS3',
    'connection': 'close'},
   'RetryAttempts': 0},
  'Deleted': [{'Key': 'fcsq_processing/Adoption/Adoptions_disposals/20220208_114038_00046_wsbdw_69055f4d-37f5-4c20-9c3e-bfb16a9bd1e1',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'ywFe5H22fti8RC1YZiA659dbZkBR6lnM'},
   {'Key': 'fcsq_processing/Adoption/Adoptions_disposals/20220208_114038_00046_wsbdw_6f5fd5a6-b0a2-4fcd-809e-bec6d18e2894',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'DLUfLgOenE6efbQ_SgHTvrlJGa04wD0x'},
   {'Key': 'fcsq_processing

### Create the adoptions_disposals table in Athena

In [13]:
create_adoptions_disposals = f"""
CREATE TABLE IF NOT EXISTS fcsq.ADOPTIONS_DISPOSALS
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/Adoptions_disposals') AS
SELECT E.CASE_NUMBER,
  E.EVENT,
  E.RECEIPT_DATE,
  E.ENTRY_DATE,
  CASE E.RECEIPT_DATE
      WHEN NULL THEN E.ENTRY_DATE
      ELSE E.RECEIPT_DATE  
  END AS Receipt_date2,
  E.EVENT_MODEL
FROM {database}.events E
WHERE ((E.EVENT_MODEL IN ('A70', 'A71', 'A72', 'A73', 'A74', 'A75', 'A76', 'A77', 'A78', 'A79', 'A80', 'A81', 'A12', 'A13', 'A15')
AND E.ERROR          = 'N')
OR (E.EVENT_MODEL    IN ('G63', 'ORDREF', 'ORDNOM')
AND E.ERROR          = 'N')
OR (E.EVENT_MODEL    IN ('A12', 'A13', 'A15')
AND E.ERROR          = 'N'))
AND E.case_number <> 'CV11Z00105'
AND E.mojap_snapshot_date = date '{snapshot_date}'; 
"""
pydb.start_query_execution_and_wait(create_adoptions_disposals)

{'QueryExecutionId': '6077748d-8609-4b34-a4de-5c4641464b8e',
 'Query': "CREATE TABLE IF NOT EXISTS fcsq.ADOPTIONS_DISPOSALS\nWITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/Adoptions_disposals') AS\nSELECT E.CASE_NUMBER,\n  E.EVENT,\n  E.RECEIPT_DATE,\n  E.ENTRY_DATE,\n  CASE E.RECEIPT_DATE\n      WHEN NULL THEN E.ENTRY_DATE\n      ELSE E.RECEIPT_DATE  \n  END AS Receipt_date2,\n  E.EVENT_MODEL\nFROM familyman_dev_v2.events E\nWHERE ((E.EVENT_MODEL IN ('A70', 'A71', 'A72', 'A73', 'A74', 'A75', 'A76', 'A77', 'A78', 'A79', 'A80', 'A81', 'A12', 'A13', 'A15')\nAND E.ERROR          = 'N')\nOR (E.EVENT_MODEL    IN ('G63', 'ORDREF', 'ORDNOM')\nAND E.ERROR          = 'N')\nOR (E.EVENT_MODEL    IN ('A12', 'A13', 'A15')\nAND E.ERROR          = 'N'))\nAND E.case_number <> 'CV11Z00105'\nAND E.mojap_snapshot_date = date '2021-08-19'",
 'StatementType': 'DDL',
 'ResultConfiguration': {'OutputLocation': 's3://aws-athena-query-results-593291632749-eu-west

#### Adoptions_disposals validation

In [14]:
disposals_count = pydb.read_sql_query("select count(*) as count from fcsq.adoptions_disposals")
disposals_count

Unnamed: 0,count
0,478447


## Adoptions_disposal_fields table
<a name="adoptions_disposal_fields"></a>

### Drop the adoptions_disposal_fields table if it already exists and remove its data from the S3 bucket

In [15]:
drop_adoptions_disposal_fields = "DROP TABLE IF EXISTS fcsq.ADOPTIONS_DISPOSAL_FIELDS;"
pydb.start_query_execution_and_wait(drop_adoptions_disposal_fields)

bucket.objects.filter(
    Prefix="fcsq_processing/Adoption/Adoptions_disposal_fields/"
).delete()

[{'ResponseMetadata': {'RequestId': 'FG5EHARP84CQWS3Y',
   'HostId': 'crreztcU0qskrxQj6FO/2n6msfLCdVU8b+5Ps4bDc4RcIr1nBNwB1Hau7/UTMbKkkiVty+84Q6w=',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'x-amz-id-2': 'crreztcU0qskrxQj6FO/2n6msfLCdVU8b+5Ps4bDc4RcIr1nBNwB1Hau7/UTMbKkkiVty+84Q6w=',
    'x-amz-request-id': 'FG5EHARP84CQWS3Y',
    'date': 'Wed, 06 Apr 2022 16:48:33 GMT',
    'content-type': 'application/xml',
    'transfer-encoding': 'chunked',
    'server': 'AmazonS3',
    'connection': 'close'},
   'RetryAttempts': 0},
  'Deleted': [{'Key': 'fcsq_processing/Adoption/Adoptions_disposal_fields/20220208_114047_00040_z3s2z_1addf592-9e82-4ed5-a16b-4d32c230eb52',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'GBnk3VdE65dCQzqJ1qPiL_flQx8hK48s'},
   {'Key': 'fcsq_processing/Adoption/Adoptions_disposal_fields/20220208_114047_00040_z3s2z_5d91c866-4f2c-4a02-9c69-8f5467bec4bb',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': '180gj5HFng6VwtRPuPoK9KXv6af36AlO'},
   {'Key': 'fcs

### Create the adoptions_disposal_fields table in Athena

In [16]:
create_adoptions_disposal_fields = f"""
CREATE TABLE IF NOT EXISTS fcsq.ADOPTIONS_DISPOSAL_FIELDS
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/Adoptions_disposal_fields') AS
SELECT EVENT,
FIELD_MODEL,
VALUE
FROM {database}.event_fields E
WHERE (FIELD_MODEL LIKE 'A1%'
  OR FIELD_MODEL LIKE 'A7%'
  OR FIELD_MODEL IN ('G63_1', 'ORDNOM_5', 'ORDREF_5', 'A80_4', 'A81_5'))
  AND E.mojap_snapshot_date = date '{snapshot_date}';
"""
pydb.start_query_execution_and_wait(create_adoptions_disposal_fields)

{'QueryExecutionId': 'b5764b19-6e16-4f6d-8b52-ebba8ae4cc92',
 'Query': "CREATE TABLE IF NOT EXISTS fcsq.ADOPTIONS_DISPOSAL_FIELDS\nWITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/Adoptions_disposal_fields') AS\nSELECT EVENT,\nFIELD_MODEL,\nVALUE\nFROM familyman_dev_v2.event_fields E\nWHERE (FIELD_MODEL LIKE 'A1%'\n  OR FIELD_MODEL LIKE 'A7%'\n  OR FIELD_MODEL IN ('G63_1', 'ORDNOM_5', 'ORDREF_5', 'A80_4', 'A81_5'))\n  AND E.mojap_snapshot_date = date '2021-08-19'",
 'StatementType': 'DDL',
 'ResultConfiguration': {'OutputLocation': 's3://aws-athena-query-results-593291632749-eu-west-1/tables/b5764b19-6e16-4f6d-8b52-ebba8ae4cc92'},
 'QueryExecutionContext': {},
 'Status': {'State': 'SUCCEEDED',
  'SubmissionDateTime': datetime.datetime(2022, 4, 6, 16, 48, 35, 269000, tzinfo=tzlocal()),
  'CompletionDateTime': datetime.datetime(2022, 4, 6, 16, 48, 46, 204000, tzinfo=tzlocal())},
 'Statistics': {'EngineExecutionTimeInMillis': 10791,
  'DataSca

#### Adoptions_disposal_fields validation

In [17]:
disposal_fields_count = pydb.read_sql_query("select count(*) as count from fcsq.adoptions_disposal_fields")
disposal_fields_count

Unnamed: 0,count
0,1216487


# Stage 1 - Create The Main Parties Data

## Adopt_app_count table
<a name="adopt_app_count"></a>

### Drop the adopt_app_count table if it already exists and remove its data from the S3 bucket

In [18]:
drop_adopt_app_count = f"""
DROP TABLE IF EXISTS fcsq.ADOPT_APP_COUNT;
"""
pydb.start_query_execution_and_wait(drop_adopt_app_count)

# clean up previous adopt_app_count files
bucket.objects.filter(Prefix="fcsq_processing/Adoption/adopt_app_count/").delete()

[{'ResponseMetadata': {'RequestId': '0818T1PJJFDDY8BH',
   'HostId': 'og4dS47hXrGjyGFRnw6pp603CdSdvcOVRq2RKt34ESBI5fdH2RcX+H6EvlWxCbZGmEvmO3FVbto=',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'x-amz-id-2': 'og4dS47hXrGjyGFRnw6pp603CdSdvcOVRq2RKt34ESBI5fdH2RcX+H6EvlWxCbZGmEvmO3FVbto=',
    'x-amz-request-id': '0818T1PJJFDDY8BH',
    'date': 'Wed, 06 Apr 2022 16:48:55 GMT',
    'content-type': 'application/xml',
    'transfer-encoding': 'chunked',
    'server': 'AmazonS3',
    'connection': 'close'},
   'RetryAttempts': 0},
  'Deleted': [{'Key': 'fcsq_processing/Adoption/adopt_app_count/20220208_114056_00016_49pf3_db600184-9725-4d23-9a55-c4d29ef7ca69',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'bLTFD1d7i8Iv6n54T6nv2iti0fa6hGJd'},
   {'Key': 'fcsq_processing/Adoption/adopt_app_count/20220208_114056_00016_49pf3_a79e2789-c73e-4721-af8a-bb848e86016a',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': '.qXaIySfe_ZWFeslwPk8CyUxlUn7ZQkq'},
   {'Key': 'fcsq_processing/Adoptio

### Create the adopt_app_count table in Athena

In [19]:
create_adopt_app_count = f"""
CREATE TABLE IF NOT EXISTS fcsq.ADOPT_APP_COUNT
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/adopt_app_count') AS
SELECT case_number, role_model, COUNT(role) AS countofrole
FROM fcsq.adoptions_parties
GROUP BY case_number, role_model
HAVING role_model = 'APLZ';
"""

pydb.start_query_execution_and_wait(create_adopt_app_count)



{'QueryExecutionId': 'fa60ccbc-5d8c-46a7-bef7-790cb3627869',
 'Query': "CREATE TABLE IF NOT EXISTS fcsq.ADOPT_APP_COUNT\nWITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/adopt_app_count') AS\nSELECT case_number, role_model, COUNT(role) AS countofrole\nFROM fcsq.adoptions_parties\nGROUP BY case_number, role_model\nHAVING role_model = 'APLZ'",
 'StatementType': 'DDL',
 'ResultConfiguration': {'OutputLocation': 's3://aws-athena-query-results-593291632749-eu-west-1/tables/fa60ccbc-5d8c-46a7-bef7-790cb3627869'},
 'QueryExecutionContext': {},
 'Status': {'State': 'SUCCEEDED',
  'SubmissionDateTime': datetime.datetime(2022, 4, 6, 16, 48, 57, 270000, tzinfo=tzlocal()),
  'CompletionDateTime': datetime.datetime(2022, 4, 6, 16, 48, 59, 545000, tzinfo=tzlocal())},
 'Statistics': {'EngineExecutionTimeInMillis': 2123,
  'DataScannedInBytes': 2616251,
  'DataManifestLocation': 's3://aws-athena-query-results-593291632749-eu-west-1/tables/fa60ccbc-5d8c-46a

#### adopt_app_count validation

In [20]:
create_adopt_app_count_count = pydb.read_sql_query("select count(*) as count from fcsq.adopt_app_count")
create_adopt_app_count_count

Unnamed: 0,count
0,133193


## Adopt_appl_sex table
<a name="adopt_appl_sex"></a>

### Drop the adopt_appl_sex table if it already exists and remove its data from the S3 bucket

In [21]:
drop_adopt_appl_sex = f"""
DROP TABLE IF EXISTS fcsq.ADOPT_APPL_SEX;
"""
pydb.start_query_execution_and_wait(drop_adopt_appl_sex)

# clean up previous adopt_appl_sex files
bucket.objects.filter(Prefix="fcsq_processing/Adoption/adopt_appl_sex/").delete()

[{'ResponseMetadata': {'RequestId': 'D6G9506XN7RADW73',
   'HostId': 'caT0HW1ncVcFMexGZWYeWycqzq5+nHAG2yufWokfFE12QAJXnYmdNC0IAg9+3si4P9E4UilRFKU=',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'x-amz-id-2': 'caT0HW1ncVcFMexGZWYeWycqzq5+nHAG2yufWokfFE12QAJXnYmdNC0IAg9+3si4P9E4UilRFKU=',
    'x-amz-request-id': 'D6G9506XN7RADW73',
    'date': 'Wed, 06 Apr 2022 16:49:09 GMT',
    'content-type': 'application/xml',
    'transfer-encoding': 'chunked',
    'server': 'AmazonS3',
    'connection': 'close'},
   'RetryAttempts': 0},
  'Deleted': [{'Key': 'fcsq_processing/Adoption/adopt_appl_sex/20220208_114108_00030_n8n9w_3c9111cf-a6fb-4b6c-b8d3-64fc02bd51d8',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': '9G3Uyldb57Qq99Yowx8ETEe_L3PUXDDl'},
   {'Key': 'fcsq_processing/Adoption/adopt_appl_sex/20220208_114108_00030_n8n9w_45158b4c-8367-4bca-bbc6-6a8db260666c',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': '5d1UEVyAlwIV3S7hNThbUW6lRt2EFH6U'},
   {'Key': 'fcsq_processing/Adoption/

### Create the adopt_appl_sex table in Athena

In [22]:
create_adopt_appl_sex = f"""
CREATE TABLE IF NOT EXISTS fcsq.ADOPT_APPL_SEX
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/adopt_appl_sex') AS
SELECT app_count.case_number, app_count.role_model, 
app_count.countofrole, 
MIN(parties.gender) min_sex,
MAX(parties_1.gender) max_sex
FROM fcsq.adopt_app_count app_count INNER JOIN fcsq.adoptions_parties parties ON app_count.case_number = parties.case_number
INNER JOIN fcsq.adoptions_parties parties_1 ON app_count.case_number = parties_1.case_number
GROUP BY app_count.case_number, app_count.role_model, app_count.countofrole, parties.role_model, parties_1.role_model
HAVING parties.role_model='APLZ' AND parties_1.role_model='APLZ';
"""

pydb.start_query_execution_and_wait(create_adopt_appl_sex)



{'QueryExecutionId': 'a96e9b21-d240-44c4-845c-6861066b96d6',
 'Query': "CREATE TABLE IF NOT EXISTS fcsq.ADOPT_APPL_SEX\nWITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/adopt_appl_sex') AS\nSELECT app_count.case_number, app_count.role_model, \napp_count.countofrole, \nMIN(parties.gender) min_sex,\nMAX(parties_1.gender) max_sex\nFROM fcsq.adopt_app_count app_count INNER JOIN fcsq.adoptions_parties parties ON app_count.case_number = parties.case_number\nINNER JOIN fcsq.adoptions_parties parties_1 ON app_count.case_number = parties_1.case_number\nGROUP BY app_count.case_number, app_count.role_model, app_count.countofrole, parties.role_model, parties_1.role_model\nHAVING parties.role_model='APLZ' AND parties_1.role_model='APLZ'",
 'StatementType': 'DDL',
 'ResultConfiguration': {'OutputLocation': 's3://aws-athena-query-results-593291632749-eu-west-1/tables/a96e9b21-d240-44c4-845c-6861066b96d6'},
 'QueryExecutionContext': {},
 'Status': {'State'

#### adopt_appl_sex validation

In [23]:
adopt_appl_sex_count = pydb.read_sql_query("select count(*) as count from fcsq.adopt_appl_sex")
adopt_appl_sex_count

Unnamed: 0,count
0,133193


## adopt_appl_rtc table
<a name="adopt_appl_rtc"></a>

### Drop the adopt_appl_rtc table if it already exists and remove its data from the S3 bucket

In [24]:
drop_adopt_appl_rtc = f"""
DROP TABLE IF EXISTS fcsq.adopt_appl_rtc;
"""
pydb.start_query_execution_and_wait(drop_adopt_appl_rtc)

# clean up previous adopt_appl_rtc files
bucket.objects.filter(Prefix="fcsq_processing/Adoption/adopt_appl_rtc/").delete()

[{'ResponseMetadata': {'RequestId': 'NEM2Z3XD24ZGJBHG',
   'HostId': 'XZDvV+1snDIkh1QnquT65/zwtRgEf0EHEqWwlW3JSVQcQleEtLNxmBQ+adrK2mcLpgONoQ09VOM=',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'x-amz-id-2': 'XZDvV+1snDIkh1QnquT65/zwtRgEf0EHEqWwlW3JSVQcQleEtLNxmBQ+adrK2mcLpgONoQ09VOM=',
    'x-amz-request-id': 'NEM2Z3XD24ZGJBHG',
    'date': 'Wed, 06 Apr 2022 16:49:23 GMT',
    'content-type': 'application/xml',
    'transfer-encoding': 'chunked',
    'server': 'AmazonS3',
    'connection': 'close'},
   'RetryAttempts': 0},
  'Deleted': [{'Key': 'fcsq_processing/Adoption/adopt_appl_rtc/20220208_114116_00013_j4ryk_03ed8bd3-154b-489f-b583-81ad5d06db58',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'tBPw6Av7QbCgf7KWTbbtCoI1j7QgvV2a'},
   {'Key': 'fcsq_processing/Adoption/adopt_appl_rtc/20220208_114116_00013_j4ryk_2f28717d-fca8-4ef3-bc24-25e251226799',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'bwBTFeKanyyf2Za0FbdtiPAXKiD8hsj.'},
   {'Key': 'fcsq_processing/Adoption/

### Create the adopt_appl_rtc table in Athena

In [25]:
create_adopt_appl_rtc = f"""
CREATE TABLE IF NOT EXISTS fcsq.ADOPT_APPL_RTC
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/adopt_appl_rtc') AS
SELECT appl_sex.case_number, 
        appl_sex.role_model, 
        appl_sex.countofrole, 
        appl_sex.min_sex, 
        appl_sex.max_sex, 
        MIN(parties.rel_to_child) min_rtc, 
        MAX(parties_1.rel_to_child) max_rtc
FROM fcsq.adopt_appl_sex appl_sex
INNER JOIN fcsq.adoptions_parties parties 
        ON appl_sex.case_number = parties.case_number 
INNER JOIN fcsq.adoptions_parties parties_1 
        ON appl_sex.case_number = parties_1.case_number
GROUP BY appl_sex.case_number, 
          appl_sex.role_model, 
          appl_sex.countofrole, 
          appl_sex.min_sex, 
          appl_sex.max_sex, 
          parties.role_model, 
          parties_1.role_model
HAVING parties.role_model='APLZ' AND parties_1.role_model='APLZ';
"""

pydb.start_query_execution_and_wait(create_adopt_appl_rtc)



{'QueryExecutionId': '12b17a0c-21dd-492f-a80e-5840e8b33079',
 'Query': "CREATE TABLE IF NOT EXISTS fcsq.ADOPT_APPL_RTC\nWITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/adopt_appl_rtc') AS\nSELECT appl_sex.case_number, \n        appl_sex.role_model, \n        appl_sex.countofrole, \n        appl_sex.min_sex, \n        appl_sex.max_sex, \n        MIN(parties.rel_to_child) min_rtc, \n        MAX(parties_1.rel_to_child) max_rtc\nFROM fcsq.adopt_appl_sex appl_sex\nINNER JOIN fcsq.adoptions_parties parties \n        ON appl_sex.case_number = parties.case_number \nINNER JOIN fcsq.adoptions_parties parties_1 \n        ON appl_sex.case_number = parties_1.case_number\nGROUP BY appl_sex.case_number, \n          appl_sex.role_model, \n          appl_sex.countofrole, \n          appl_sex.min_sex, \n          appl_sex.max_sex, \n          parties.role_model, \n          parties_1.role_model\nHAVING parties.role_model='APLZ' AND parties_1.role_model='APL

#### adopt_appl_rtc validation

In [26]:
adopt_appl_rtc_count = pydb.read_sql_query("select count(*) as count from fcsq.adopt_appl_rtc")
adopt_appl_rtc_count

Unnamed: 0,count
0,133193


## adopt_applicant_type table
<a name="adopt_applicant_type"></a>

### Drop the adopt_applicant_type table if it already exists and remove its data from the S3 bucket

In [27]:
drop_adopt_applicant_type = f"""
DROP TABLE IF EXISTS fcsq.adopt_applicant_type;
"""
pydb.start_query_execution_and_wait(drop_adopt_applicant_type)

# clean up previous adopt_applicant_type files
bucket.objects.filter(Prefix="fcsq_processing/Adoption/adopt_applicant_type/").delete()

[{'ResponseMetadata': {'RequestId': 'QD78MQMKZ1C1Q9DX',
   'HostId': 'Akum2Qbhqa07ODnxVdEGLjqBfTLxxuesj5K7/m9vP5JZGHR2P5M+j77X8yDSYapzoSwCv892yyI=',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'x-amz-id-2': 'Akum2Qbhqa07ODnxVdEGLjqBfTLxxuesj5K7/m9vP5JZGHR2P5M+j77X8yDSYapzoSwCv892yyI=',
    'x-amz-request-id': 'QD78MQMKZ1C1Q9DX',
    'date': 'Wed, 06 Apr 2022 16:49:38 GMT',
    'content-type': 'application/xml',
    'transfer-encoding': 'chunked',
    'server': 'AmazonS3',
    'connection': 'close'},
   'RetryAttempts': 0},
  'Deleted': [{'Key': 'fcsq_processing/Adoption/adopt_applicant_type/20220208_114141_00042_yafe3_d5b03d31-79fa-4021-b729-6de0b1bccc42',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'NZQW3OmAnaXfam.Bpr2dgE0QUhpJ8p7r'},
   {'Key': 'fcsq_processing/Adoption/adopt_applicant_type/20220208_114141_00042_yafe3_1352bcac-478a-4ace-b363-f4f5b9b2c002',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'A461WhfPO4lf9_lia3WP_J5b1HvKeVG2'},
   {'Key': 'fcsq_processi

### Create the adopt_applicant_type table in Athena

In [28]:
create_adopt_applicant_type_a = f"""
SELECT case_number, 
        role_model, 
        countofrole number_applicants, 
        min_sex, 
        max_sex, 
        min_rtc, 
        max_rtc,
CASE  WHEN countofrole=1 THEN 'single' 
      WHEN min_sex=1 AND max_sex =1 THEN 'same-sex couple' 
      WHEN min_sex=2 AND max_sex=2  THEN 'same-sex couple' 
      WHEN min_sex=1 AND max_sex =2 THEN 'mixed-sex couple'
                ELSE 'Other or not stated'       
END adopter_type
FROM fcsq.adopt_appl_rtc;
"""
pydb.create_temp_table(create_adopt_applicant_type_a, "adopt_applicant_type_a")

create_adopt_applicant_type_b = f"""
SELECT case_number, 
        role_model, 
        number_applicants, 
        min_sex, 
        max_sex, 
        min_rtc,
        max_rtc,
        adopter_type,
CASE WHEN (min_rtc Like 'Step%' OR max_rtc Like 'Step%') THEN 'Step parent'
      WHEN (min_rtc Like 'Natural%' OR max_rtc like 'Natural%') THEN 'Other or not stated'  
      WHEN number_applicants > 2 THEN 'Other or not stated'
      WHEN (min_rtc = 'Other' OR max_rtc = 'Other') THEN 'Other or not stated'
      WHEN (min_rtc IN ('Child','Adoption Agency','Central Authority','Home Office','Local Authority','Voluntary Organisation') AND max_rtc IN ('Child','Adoption Agency','Central Authority','Home Office','Local Authority','Voluntary Organisation')) THEN 'Other or not stated'
      WHEN (min_rtc IN ('Child','Adoption Agency','Central Authority','Home Office','Local Authority','Voluntary Organisation') OR max_rtc IN ('Child','Adoption Agency','Central Authority','Home Office','Local Authority','Voluntary Organisation')) THEN 'Sole applicant'
      WHEN number_applicants >1 THEN adopter_type 
      ELSE 'Sole applicant' 
      END adopter 
FROM __temp__.adopt_applicant_type_a;
"""

pydb.create_temp_table(create_adopt_applicant_type_b, "adopt_applicant_type_b")

create_adopt_applicant_type = f"""
CREATE TABLE IF NOT EXISTS fcsq.adopt_applicant_type
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/adopt_applicant_type') AS
SELECT
case_number, 
  role_model, 
  number_applicants, 
  min_sex, 
  max_sex, 
  min_rtc, 
  max_rtc, 
  adopter_type,
CASE WHEN (min_rtc = 'Foster Carer' AND max_rtc = 'Sibling') THEN 'Other or not stated'
        WHEN (adopter_type = 'mixed-sex couple' AND min_rtc = 'Uncle' AND max_rtc = 'Uncle') THEN 'Other or not stated'
        ELSE adopter
        END adopter
FROM __temp__.adopt_applicant_type_b;
"""

pydb.start_query_execution_and_wait(create_adopt_applicant_type)



{'QueryExecutionId': 'ca40bc3a-b785-4af2-a048-586cdc53f3ae',
 'Query': "CREATE TABLE IF NOT EXISTS fcsq.adopt_applicant_type\nWITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/adopt_applicant_type') AS\nSELECT\ncase_number, \n  role_model, \n  number_applicants, \n  min_sex, \n  max_sex, \n  min_rtc, \n  max_rtc, \n  adopter_type,\nCASE WHEN (min_rtc = 'Foster Carer' AND max_rtc = 'Sibling') THEN 'Other or not stated'\n        WHEN (adopter_type = 'mixed-sex couple' AND min_rtc = 'Uncle' AND max_rtc = 'Uncle') THEN 'Other or not stated'\n        ELSE adopter\n        END adopter\nFROM mojap_de_temp_alpha_user_thomasauburnmoj.adopt_applicant_type_b",
 'StatementType': 'DDL',
 'ResultConfiguration': {'OutputLocation': 's3://aws-athena-query-results-593291632749-eu-west-1/tables/ca40bc3a-b785-4af2-a048-586cdc53f3ae'},
 'QueryExecutionContext': {},
 'Status': {'State': 'SUCCEEDED',
  'SubmissionDateTime': datetime.datetime(2022, 4, 6, 16, 49, 57

#### adopt_applicant_type validation

In [29]:
adopt_applicant_type_count = pydb.read_sql_query("select count(*) as count from fcsq.adopt_applicant_type")
adopt_applicant_type_count

Unnamed: 0,count
0,133193


## adopt_child_parties table
<a name="adopt_child_parties"></a>

### Drop the adopt_child_parties table if it already exists and remove its data from the S3 bucket

In [30]:
drop_adopt_child_parties = f"""
DROP TABLE IF EXISTS fcsq.adopt_child_parties;
"""
pydb.start_query_execution_and_wait(drop_adopt_child_parties)

# clean up previous adopt_child_parties files
bucket.objects.filter(Prefix="fcsq_processing/Adoption/adopt_child_parties/").delete()

[{'ResponseMetadata': {'RequestId': '4B9S56E67CF8E38F',
   'HostId': 'TGV6AyS+Jxz9GomutcvEYOoJ2uaRbUF3KmMOIMACdEHnhUCywiU5pgavdXfEmgYF3catjwu9NHU=',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'x-amz-id-2': 'TGV6AyS+Jxz9GomutcvEYOoJ2uaRbUF3KmMOIMACdEHnhUCywiU5pgavdXfEmgYF3catjwu9NHU=',
    'x-amz-request-id': '4B9S56E67CF8E38F',
    'date': 'Wed, 06 Apr 2022 16:50:12 GMT',
    'content-type': 'application/xml',
    'transfer-encoding': 'chunked',
    'server': 'AmazonS3',
    'connection': 'close'},
   'RetryAttempts': 0},
  'Deleted': [{'Key': 'fcsq_processing/Adoption/adopt_child_parties/20220208_114148_00030_snct4_94f3c990-aad6-43ee-8987-ba1a84149c12',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'CWtnkDi2K.xqHbTz0snpQ.WJqmn5dvHC'},
   {'Key': 'fcsq_processing/Adoption/adopt_child_parties/20220208_114148_00030_snct4_f479a18d-8b68-41cf-aa01-6984fffba1ae',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'SXDFWO19zzn9KMq0NWiuEQ4HjKWPIzuK'},
   {'Key': 'fcsq_processing

### Create the adopt_child_parties table in Athena

In [31]:
create_adopt_child_parties = f"""
CREATE TABLE IF NOT EXISTS fcsq.adopt_child_parties
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/adopt_child_parties') AS
SELECT case_number, 
        role child_role, 
        party child_party, 
        dob, 
        gender child_sex
FROM fcsq.adoptions_parties
WHERE role_model='CHLDZ';
"""


"""
Note:
Removed dob TO_DATE as not supported and left dob as original timestamp

"""
pydb.start_query_execution_and_wait(create_adopt_child_parties)



{'QueryExecutionId': '049f465b-6c76-487a-bf0f-561c4dc7e906',
 'Query': "CREATE TABLE IF NOT EXISTS fcsq.adopt_child_parties\nWITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/adopt_child_parties') AS\nSELECT case_number, \n        role child_role, \n        party child_party, \n        dob, \n        gender child_sex\nFROM fcsq.adoptions_parties\nWHERE role_model='CHLDZ'",
 'StatementType': 'DDL',
 'ResultConfiguration': {'OutputLocation': 's3://aws-athena-query-results-593291632749-eu-west-1/tables/049f465b-6c76-487a-bf0f-561c4dc7e906'},
 'QueryExecutionContext': {},
 'Status': {'State': 'SUCCEEDED',
  'SubmissionDateTime': datetime.datetime(2022, 4, 6, 16, 50, 13, 64000, tzinfo=tzlocal()),
  'CompletionDateTime': datetime.datetime(2022, 4, 6, 16, 50, 15, 95000, tzinfo=tzlocal())},
 'Statistics': {'EngineExecutionTimeInMillis': 1888,
  'DataScannedInBytes': 4244603,
  'DataManifestLocation': 's3://aws-athena-query-results-593291632749-eu-we

#### adopt_child_parties validation

In [32]:
adopt_child_parties_count = pydb.read_sql_query("select count(*) as count from fcsq.adopt_child_parties")
adopt_child_parties_count

Unnamed: 0,count
0,130580


# Stage 2 - Applications

## adopt_application_4 table
<a name="adopt_application_4"></a>

### Drop the adopt_application_4 table if it already exists and remove its data from the S3 bucket

In [33]:
drop_adopt_application_4 = f"""
DROP TABLE IF EXISTS fcsq.adopt_application_4;
"""
pydb.start_query_execution_and_wait(drop_adopt_application_4)

# clean up previous adopt_application_4 files
bucket.objects.filter(Prefix="fcsq_processing/Adoption/adopt_application_4/").delete()

[{'ResponseMetadata': {'RequestId': 'ZCQFMSDN62WC2EAR',
   'HostId': 'Qrny/X1qwdodTCfAR7/okK7fhNGbIrNzG+kdUSZI00O1Vjx2H/hBdUsljpNX8NGTZM1+y9sX6OA=',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'x-amz-id-2': 'Qrny/X1qwdodTCfAR7/okK7fhNGbIrNzG+kdUSZI00O1Vjx2H/hBdUsljpNX8NGTZM1+y9sX6OA=',
    'x-amz-request-id': 'ZCQFMSDN62WC2EAR',
    'date': 'Wed, 06 Apr 2022 16:50:23 GMT',
    'content-type': 'application/xml',
    'transfer-encoding': 'chunked',
    'server': 'AmazonS3',
    'connection': 'close'},
   'RetryAttempts': 0},
  'Deleted': [{'Key': 'fcsq_processing/Adoption/adopt_application_4/20220208_115022_00038_3g97r_10d468b4-0e58-4385-b1da-7d77ffc0e812',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'E3zNdTFeUCGKu5F4cgpy98JSzwssdrn5'},
   {'Key': 'fcsq_processing/Adoption/adopt_application_4/20220208_115022_00038_3g97r_48861de7-b7ee-4746-95c7-3099cade45ef',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'Ei2u2bYqxncJtPwp53ARO.2R719eH7Ir'},
   {'Key': 'fcsq_processing

### Create the adopt_application_4 table in Athena

In [34]:
create_adopt_application1 = f"""
SELECT     a1.case_number, 
        a1.event, 
        a1.receipt_date2, 
        a1.app_type, 
        c1.case_app_type, 
        a1.date_of_issue, 
        date_parse(c1.case_issue_date2, '%Y-%m-%d') AS case_issue_date2,
        a1.high_court, 
        c1.contested, 
        b1.number_applicants, 
        b1.min_sex, 
        b1.max_sex, 
        b1.adopter_type, 
        b1.min_rtc,
        b1.max_rtc, 
        b1.adopter, 
        SUBSTR(cast(event as varchar(50)), 1,3) court, 
        ', '||  app_type ||  ',' app_type2,
    
        CASE WHEN a1.date_of_issue <= a1.receipt_date2 
                THEN a1.date_of_issue
              ELSE a1.receipt_date2
              END min_date,

        CASE WHEN a1.app_type='AO' OR a1.app_type='CA' OR a1.app_type='PFO' OR a1.app_type='AD' OR a1.app_type='PF' OR a1.app_type='SP' THEN 'Adoption'
              WHEN a1.app_type LIKE '%AO%' OR a1.app_type LIKE  '%CA%' OR a1.app_type LIKE '%PF%' OR a1.app_type LIKE '%AD%' OR a1.app_type LIKE '%SP%' THEN 'Adoption+other'
              ELSE 'Non-adoption'
              END adoption

FROM (fcsq.adoptions_applications a1 
LEFT JOIN fcsq.adopt_applicant_type b1 ON a1.case_number = b1.case_number) 
LEFT JOIN fcsq.adoptions_cases c1 ON a1.case_number = c1.case_number;
"""

pydb.create_temp_table(create_adopt_application1,"adopt_application1")

create_adopt_application2 = f"""
SELECT case_number, 
        event, 
        min_date, 
        EXTRACT(YEAR FROM min_date) year,

        CASE WHEN  min_date IS NULL THEN NULL
              WHEN EXTRACT(MONTH FROM min_date) between 1 and 3 THEN 1
              WHEN EXTRACT(MONTH from min_date) between 4 and 6 THEN 2
              WHEN EXTRACT(month from min_date) between 7 and 9 THEN 3
              WHEN EXTRACT(month from min_date) between 10 and 12 THEN 4
              END quarter,

        court, 
        app_type, 
        app_type2, 
        adoption, 
        case_app_type, 
        high_court, 
        contested, 
        number_applicants, 
        min_sex, 
        max_sex, 
        adopter_type, 
        min_rtc, 
        max_rtc, 
        adopter, 
        receipt_date2, 
        date_of_issue, 
        case_issue_date2,

        CASE WHEN app_type2 LIKE ', AO%' OR app_type2 LIKE ', AD%' OR app_type2 LIKE ', CA%' OR app_type2 LIKE ', PFO%' OR app_type2 LIKE ', PF%' THEN 'Adoption' 
              ELSE 'N'
              END adoption1 ,

        CASE WHEN app_type2 LIKE ', AO%' OR app_type2 LIKE ', AD%' OR app_type2 LIKE ', SP%' THEN 1
              ELSE 0
              END standard ,

        CASE WHEN app_type2 LIKE '%, CA%' OR app_type2 LIKE '%, CA%'
              THEN 1 
              ELSE 0
              END convention , 

        CASE WHEN app_type2 LIKE '%, PF%'
              THEN 1
                ELSE 0
                END foreign , 

        CASE WHEN app_type2 LIKE '%, PLA%' OR app_type2 LIKE ', FO%'
              THEN 1
              ELSE 0
              END placement , 

        CASE WHEN app_type2 LIKE '%, RPLA%'
              THEN 1
              ELSE 0
              END placement_revoke  , 


        CASE WHEN app_type2 LIKE '%, VPLA%'
              THEN 1
              ELSE 0
              END  placement_vary , 

        CASE WHEN app_type2 LIKE '%, CNO%'
              THEN 1
              ELSE 0
              END contact_s26 , 

        CASE WHEN app_type2 LIKE '%, RCNO%'
              THEN 1
              ELSE 0
              END contact_s26_revoke, 

        CASE WHEN app_type2 LIKE '%, VCNO%'
              THEN 1
              ELSE 0
              END contact_s26_vary , 

        CASE WHEN app_type2 LIKE '%, CCS%' 
              THEN 1
              ELSE 0
              END change_surname , 

        CASE WHEN app_type2 LIKE '%, RUK%'
              THEN 1
              ELSE 0
              END  remove_child_from_uk, 

        CASE WHEN app_type2 LIKE '%, OR%'
              THEN 1
              ELSE 0
              END other_recovery,

        CASE WHEN app_type2 LIKE '%, PT10%'
              THEN 1
              ELSE 0
              END other_part_10, 

        CASE WHEN app_type2 LIKE '%, PT9%'
              THEN 1
              ELSE 0
              END other_part_9, 

        CASE WHEN app_type2 LIKE '%, S84%'
              THEN 1
              ELSE 0
              END other_s84_order, 

        CASE WHEN app_type2 LIKE '%, S88%'
              THEN 1
              ELSE 0
              END other_s88_direction , 

        CASE WHEN app_type2 LIKE '%, S89%'
              THEN 1
              ELSE 0
              END other_s89_order 

FROM __temp__.adopt_application1;
"""

pydb.create_temp_table(create_adopt_application2,"adopt_application2")

create_adopt_application3 = f"""
SELECT application2.case_number, 
        MIN(application2.min_date) app_date, 
        application2.year, 
        application2.quarter, 
        application2.court, 
        application2.app_type, 
        application2.app_type2, 
        application2.case_app_type, 
        application2.adoption, 
        application2.high_court, 
        application2.contested, 
        application2.number_applicants, 
        application2.min_sex, 
        application2.max_sex, 
        application2.adopter_type, 
        application2.min_rtc, 
        application2.max_rtc, 
        application2.adopter
FROM __temp__.adopt_application2 application2
GROUP BY application2.case_number, 
          application2.year, 
          application2.quarter, 
          application2.court, 
          application2.app_type, 
          application2.app_type2, 
          application2.case_app_type, 
          application2.adoption, 
          application2.high_court, 
          application2.contested, 
          application2.number_applicants, 
          application2.min_sex, 
          application2.max_sex, 
          application2.adopter_type, 
          application2.min_rtc, 
          application2.max_rtc, 
          application2.adopter
HAVING (application2.case_number)<>'CV11Z00105';
"""

pydb.create_temp_table(create_adopt_application3,"adopt_application3")

create_adopt_application4 = f"""
CREATE TABLE IF NOT EXISTS fcsq.adopt_application_4
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/adopt_application_4') AS
SELECT application2.case_number, 
        application2.min_date app_date, 
        application2.year, 
        application2.quarter, 
        application2.court, 
        application2.app_type, 
        application2.case_app_type, 
        application2.adoption, 
        application2.high_court, 
        application2.contested, 
        application2.number_applicants, 
        application2.min_sex, 
        application2.max_sex, 
        application2.adopter_type, 
        application2.min_rtc, 
        application2.max_rtc, 
        application2.adopter, 
        application2.standard, 
        application2.convention, 
        application2.foreign, 
        application2.placement, 

        application2.placement_revoke + application2.placement_vary  placement_revoke_or_vary, 

        application2.contact_s26, 

        application2.contact_s26_revoke + application2.contact_s26_vary contact_s26_revoke_or_vary, 
        
        application2.change_surname, 
        application2.remove_child_from_uk, 

        application2.other_recovery+application2.other_part_10+application2.other_part_9+application2.other_s84_order+application2.other_s88_direction+application2.other_s89_order other_order_type


FROM __temp__.adopt_application2 application2;
"""
pydb.start_query_execution_and_wait(create_adopt_application4)

{'QueryExecutionId': '7e06b3bc-fa37-4ae6-b720-4f97ede8b219',
 'Query': "CREATE TABLE IF NOT EXISTS fcsq.adopt_application_4\nWITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/adopt_application_4') AS\nSELECT application2.case_number, \n        application2.min_date app_date, \n        application2.year, \n        application2.quarter, \n        application2.court, \n        application2.app_type, \n        application2.case_app_type, \n        application2.adoption, \n        application2.high_court, \n        application2.contested, \n        application2.number_applicants, \n        application2.min_sex, \n        application2.max_sex, \n        application2.adopter_type, \n        application2.min_rtc, \n        application2.max_rtc, \n        application2.adopter, \n        application2.standard, \n        application2.convention, \n        application2.foreign, \n        application2.placement, \n\n        application2.placement_revoke 

#### adopt_application_4 validation

In [35]:
adopt_application_4_count = pydb.read_sql_query("select count(*) as count from fcsq.adopt_application_4")
adopt_application_4_count

Unnamed: 0,count
0,175169


## adopt_application_5 table
<a name="adopt_application_5"></a>

### Drop the adopt_application_5 table if it already exists and remove its data from the S3 bucket

In [36]:
drop_adopt_application_5 = f"""
DROP TABLE IF EXISTS fcsq.adopt_application_5;
"""
pydb.start_query_execution_and_wait(drop_adopt_application_5)

# clean up previous adopt_application_5 files
bucket.objects.filter(Prefix="fcsq_processing/Adoption/adopt_application_5/").delete()

[{'ResponseMetadata': {'RequestId': '7MCGW4V0EVQE2W8B',
   'HostId': 'x0wJpzG/a7k807h1U0LOHLxsR3dfMPDMdNb0OnihAEe6r9Z4ZWPrL/6LpM2rXf1acPb0v9oESQk=',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'x-amz-id-2': 'x0wJpzG/a7k807h1U0LOHLxsR3dfMPDMdNb0OnihAEe6r9Z4ZWPrL/6LpM2rXf1acPb0v9oESQk=',
    'x-amz-request-id': '7MCGW4V0EVQE2W8B',
    'date': 'Wed, 06 Apr 2022 16:51:04 GMT',
    'content-type': 'application/xml',
    'transfer-encoding': 'chunked',
    'server': 'AmazonS3',
    'connection': 'close'},
   'RetryAttempts': 0},
  'Deleted': [{'Key': 'fcsq_processing/Adoption/adopt_application_5/20211027_134916_00033_wgnzr_d5b8f581-94c1-46d0-98d1-732d9bf13ec5',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'Y6KaRd3fT2Y11MddB7EEaKm7tWb00Iys'},
   {'Key': 'fcsq_processing/Adoption/adopt_application_5/20211027_134916_00033_wgnzr_088f649f-13b0-4f8a-b88a-0b256ce7040b',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'kBa69bGl8KWXITLmDILiSz2GNjvNRDSV'},
   {'Key': 'fcsq_processing

### Create the adopt_application_5 table in Athena

In [38]:
create_adopt_application_5 = f"""
CREATE TABLE IF NOT EXISTS fcsq.adopt_application_5
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/adopt_application_5') AS
SELECT application4.*, 

        CASE WHEN adoption='Adoption' OR adoption='Adoption+other' 
              THEN 'Adoption'
              ELSE ' '
              END AS adoption_cases, 

        CASE WHEN adoption='Non-adoption' OR adoption='Adoption+other'
              THEN 'Non-adoption'
              ELSE '' 
              END AS non_adoption_cases, 

        CASE WHEN adopter IS NULL
              THEN 'Other or not stated'
              ELSE adopter
              END AS adopter_2

FROM fcsq.adopt_application_4 application4;
"""

pydb.start_query_execution_and_wait(create_adopt_application_5)



{'QueryExecutionId': 'a857fa41-9c29-4088-856e-a3e1f77a6dde',
 'Query': "CREATE TABLE IF NOT EXISTS fcsq.adopt_application_5\nWITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/adopt_application_5') AS\nSELECT application4.*, \n\n        CASE WHEN adoption='Adoption' OR adoption='Adoption+other' \n              THEN 'Adoption'\n              ELSE ' '\n              END AS adoption_cases, \n\n        CASE WHEN adoption='Non-adoption' OR adoption='Adoption+other'\n              THEN 'Non-adoption'\n              ELSE '' \n              END AS non_adoption_cases, \n\n        CASE WHEN adopter IS NULL\n              THEN 'Other or not stated'\n              ELSE adopter\n              END AS adopter_2\n\nFROM fcsq.adopt_application_4 application4",
 'StatementType': 'DDL',
 'ResultConfiguration': {'OutputLocation': 's3://aws-athena-query-results-593291632749-eu-west-1/tables/a857fa41-9c29-4088-856e-a3e1f77a6dde'},
 'QueryExecutionContext': {},
 'S

#### adopt_application_5 validation

In [39]:
adopt_application_5_count = pydb.read_sql_query("select count(*) as count from fcsq.adopt_application_5")
adopt_application_5_count

Unnamed: 0,count
0,175169


## adopt_apps_6_adoptions_only table
<a name="adopt_apps_6_adoptions_only"></a>

### Drop the adopt_apps_6_adoptions_only table if it already exists and remove its data from the S3 bucket

In [40]:
drop_adopt_apps_6_adoptions_only = f"""
DROP TABLE IF EXISTS fcsq.adopt_apps_6_adoptions_only;
"""
pydb.start_query_execution_and_wait(drop_adopt_apps_6_adoptions_only)

# clean up previous adopt_apps_6_adoptions_only files
bucket.objects.filter(Prefix="fcsq_processing/Adoption/adopt_apps_6_adoptions_only/").delete()

[{'ResponseMetadata': {'RequestId': '3A37CNFSBCHVAPB0',
   'HostId': 'unvTQtAChawZY2dcRrUCw+FewS/hSe7vn7ZoeOYP5726ae+lNiExA7a1r/QXrjSp4vWLxQk19QY=',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'x-amz-id-2': 'unvTQtAChawZY2dcRrUCw+FewS/hSe7vn7ZoeOYP5726ae+lNiExA7a1r/QXrjSp4vWLxQk19QY=',
    'x-amz-request-id': '3A37CNFSBCHVAPB0',
    'date': 'Wed, 06 Apr 2022 17:00:30 GMT',
    'content-type': 'application/xml',
    'transfer-encoding': 'chunked',
    'server': 'AmazonS3',
    'connection': 'close'},
   'RetryAttempts': 0},
  'Deleted': [{'Key': 'fcsq_processing/Adoption/adopt_apps_6_adoptions_only/20211027_123203_00062_mzyrh_99cf6179-4539-4632-bdc4-d16b5669eb1b',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': '1g0Qs4fgRIcpdLfNZBzsXrVzGp.T7GOy'},
   {'Key': 'fcsq_processing/Adoption/adopt_apps_6_adoptions_only/20211027_123203_00062_mzyrh_b0d8a7b6-5265-4440-b898-f47bf851a158',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'sY544_NoBIh_BliCQP438JgvzaUx8D7_'},
   {'Key': 

### Create the adopt_apps_6_adoptions_only table in Athena

In [41]:
create_adopt_apps_6_adoptions_only = f"""
CREATE TABLE IF NOT EXISTS fcsq.adopt_apps_6_adoptions_only
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/adopt_apps_6_adoptions_only')
AS
SELECT application_5.case_number, 
        application_5.app_date, 
        application_5.year, 
        application_5.quarter,
        application_5.court, 
        application_5.app_type, 
        application_5.case_app_type, 
        application_5.adoption, 
        application_5.high_court, 
        application_5.contested, 
        application_5.number_applicants, 
        application_5.min_sex, 
        application_5.max_sex, 
        application_5.adopter_type,
        application_5.min_rtc, 
        application_5.max_rtc, 
        application_5.adopter, 
        application_5.standard, 
        application_5.convention, 
        application_5.foreign,
        application_5.adoption_cases, 
        application_5.adopter_2,
        
        (application_5.standard + application_5.convention + application_5.foreign) AS adoptions_total 
        
FROM fcsq.adopt_application_5 application_5
WHERE application_5.adoption_cases='Adoption';
"""

pydb.start_query_execution_and_wait(create_adopt_apps_6_adoptions_only)



{'QueryExecutionId': 'ac87a2e4-4ab5-45b2-924f-3f5605a6fd50',
 'Query': "CREATE TABLE IF NOT EXISTS fcsq.adopt_apps_6_adoptions_only\nWITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/adopt_apps_6_adoptions_only')\nAS\nSELECT application_5.case_number, \n        application_5.app_date, \n        application_5.year, \n        application_5.quarter,\n        application_5.court, \n        application_5.app_type, \n        application_5.case_app_type, \n        application_5.adoption, \n        application_5.high_court, \n        application_5.contested, \n        application_5.number_applicants, \n        application_5.min_sex, \n        application_5.max_sex, \n        application_5.adopter_type,\n        application_5.min_rtc, \n        application_5.max_rtc, \n        application_5.adopter, \n        application_5.standard, \n        application_5.convention, \n        application_5.foreign,\n        application_5.adoption_cases, \n        a

#### adopt_apps_6_adoptions_only validation

In [42]:
adopt_apps_6_adoptions_only_count = pydb.read_sql_query("select count(*) as count from fcsq.adopt_apps_6_adoptions_only")
adopt_apps_6_adoptions_only_count

Unnamed: 0,count
0,68360


## adopt_apps_6_non_adoptions table
<a name="adopt_apps_6_non_adoptions"></a>

### Drop the adopt_apps_6_non_adoptions table if it already exists and remove its data from the S3 bucket

In [43]:
drop_adopt_apps_6_non_adoptions = f"""
DROP TABLE IF EXISTS fcsq.adopt_apps_6_non_adoptions;
"""
pydb.start_query_execution_and_wait(drop_adopt_apps_6_non_adoptions)

# clean up previous adopt_apps_6_non_adoptions files
bucket.objects.filter(Prefix="fcsq_processing/Adoption/adopt_apps_6_non_adoptions/").delete()

[{'ResponseMetadata': {'RequestId': 'Z23NPPATHBG8S42N',
   'HostId': 'qX6ij1FR62iXbLpSrmKpba1Qey6Qn/atiJ4Gaox+Geue0r3jTHFl+FuagYnNsdbmOJ0AtoQEIpU=',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'x-amz-id-2': 'qX6ij1FR62iXbLpSrmKpba1Qey6Qn/atiJ4Gaox+Geue0r3jTHFl+FuagYnNsdbmOJ0AtoQEIpU=',
    'x-amz-request-id': 'Z23NPPATHBG8S42N',
    'date': 'Wed, 06 Apr 2022 17:00:46 GMT',
    'content-type': 'application/xml',
    'transfer-encoding': 'chunked',
    'server': 'AmazonS3',
    'connection': 'close'},
   'RetryAttempts': 0},
  'Deleted': [{'Key': 'fcsq_processing/Adoption/adopt_apps_6_non_adoptions/20211027_123351_00041_e24a4_419062db-472d-4987-8b44-074273f4255c',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'Jb6iSuWAQVV2Epk51XxaQZv1xKYodZPk'},
   {'Key': 'fcsq_processing/Adoption/adopt_apps_6_non_adoptions/20211027_123351_00041_e24a4_0a441a78-56d0-461c-aa66-4d64e25311ec',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': '2GdCaF9r.X.fL_tEnjpUW8x.vcf66vdt'},
   {'Key': 'f

### Create the adopt_apps_6_non_adoptions table in Athena

In [44]:
create_adopt_apps_6_non_adoptions = f"""
CREATE TABLE IF NOT EXISTS fcsq.adopt_apps_6_non_adoptions
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/adopt_apps_6_non_adoptions') AS
SELECT application_5.case_number, 
        application_5.app_date, 
        application_5.year, 
        application_5.quarter, 
        application_5.court, 
        application_5.app_type, 
        application_5.case_app_type, 
        application_5.adoption, 
        application_5.high_court, 
        application_5.contested, 
        application_5.number_applicants, 
        application_5.min_sex, 
        application_5.max_sex, 
        application_5.adopter_type, 
        application_5.min_rtc, 
        application_5.max_rtc, 
        application_5.adopter, 
        application_5.placement, 
        application_5.placement_revoke_or_vary, 
        application_5.contact_s26, 
        application_5.contact_s26_revoke_or_vary, 
        application_5.change_surname, 
        application_5.remove_child_from_uk, 
        application_5.other_order_type, 
        application_5.non_adoption_cases, 
        application_5.adopter_2, 
        
        placement+placement_revoke_or_vary+contact_s26+contact_s26_revoke_or_vary+change_surname+remove_child_from_uk+other_order_type AS non_adoptions_total,
        
        REPLACE(app_type,'AO, ','') AS remove_ao
        
FROM fcsq.adopt_application_5 application_5
WHERE application_5.non_adoption_cases='Non-adoption' AND 
(placement+placement_revoke_or_vary+contact_s26+contact_s26_revoke_or_vary+change_surname+remove_child_from_uk+other_order_type)>0;
"""

pydb.start_query_execution_and_wait(create_adopt_apps_6_non_adoptions)



{'QueryExecutionId': '7321f8d3-f01b-4421-8660-3eb768bc81e5',
 'Query': "CREATE TABLE IF NOT EXISTS fcsq.adopt_apps_6_non_adoptions\nWITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/adopt_apps_6_non_adoptions') AS\nSELECT application_5.case_number, \n        application_5.app_date, \n        application_5.year, \n        application_5.quarter, \n        application_5.court, \n        application_5.app_type, \n        application_5.case_app_type, \n        application_5.adoption, \n        application_5.high_court, \n        application_5.contested, \n        application_5.number_applicants, \n        application_5.min_sex, \n        application_5.max_sex, \n        application_5.adopter_type, \n        application_5.min_rtc, \n        application_5.max_rtc, \n        application_5.adopter, \n        application_5.placement, \n        application_5.placement_revoke_or_vary, \n        application_5.contact_s26, \n        application_5.contact_

#### adopt_apps_6_non_adoptions validation

In [45]:
adopt_apps_6_non_adoptions_count = pydb.read_sql_query("select count(*) as count from fcsq.adopt_apps_6_non_adoptions")
adopt_apps_6_non_adoptions_count

Unnamed: 0,count
0,85983


## Summary 1: Adoption applications by adopter
<a name="summaries"></a>

In [46]:
distinct_adopter_2 = f"""
SELECT DISTINCT adopter_2  FROM fcsq.adopt_apps_6_adoptions_only;
"""
table1 = pydb.start_query_execution_and_wait(distinct_adopter_2)
table1

{'QueryExecutionId': '7866aecc-d248-4738-9ca5-2048a4e78482',
 'Query': 'SELECT DISTINCT adopter_2  FROM fcsq.adopt_apps_6_adoptions_only',
 'StatementType': 'DML',
 'ResultConfiguration': {'OutputLocation': 's3://aws-athena-query-results-593291632749-eu-west-1/7866aecc-d248-4738-9ca5-2048a4e78482.csv'},
 'QueryExecutionContext': {},
 'Status': {'State': 'SUCCEEDED',
  'SubmissionDateTime': datetime.datetime(2022, 4, 6, 17, 0, 57, 959000, tzinfo=tzlocal()),
  'CompletionDateTime': datetime.datetime(2022, 4, 6, 17, 0, 59, 69000, tzinfo=tzlocal())},
 'Statistics': {'EngineExecutionTimeInMillis': 815,
  'DataScannedInBytes': 23593,
  'TotalExecutionTimeInMillis': 1110,
  'QueryQueueTimeInMillis': 187,
  'QueryPlanningTimeInMillis': 177,
  'ServiceProcessingTimeInMillis': 108},
 'WorkGroup': 'primary',
 'EngineVersion': {'SelectedEngineVersion': 'Athena engine version 2',
  'EffectiveEngineVersion': 'Athena engine version 2'}}

In [48]:
sum_adopter_types = f"""
SELECT year, 
        quarter,
        SUM(CASE WHEN adopter_2='Other or not stated' THEN adoptions_total ELSE 0 END) other_or_not_stated,
        SUM(CASE WHEN adopter_2='Sole applicant' THEN adoptions_total ELSE 0 END) Sole_applicant,
        SUM(CASE WHEN adopter_2='Step parent' THEN adoptions_total ELSE 0  END) Step_Parent,
        SUM(CASE WHEN adopter_2='mixed-sex couple' THEN adoptions_total ELSE 0 END) mix_sex_couple,
        SUM(CASE WHEN adopter_2='same-sex couple' THEN adoptions_total ELSE 0 END) same_sex_couple,
        SUM(adoptions_total) as adoptions_total
FROM fcsq.adopt_apps_6_adoptions_only WHERE year > 2010 
GROUP BY year, 
          quarter
ORDER BY 1,2;
"""
table1 = pydb.read_sql_query(sum_adopter_types)
table1

Unnamed: 0,year,quarter,other_or_not_stated,sole_applicant,step_parent,mix_sex_couple,same_sex_couple,adoptions_total
0,2011,1,28,195,143,945,50,1361
1,2011,2,27,155,143,777,51,1153
2,2011,3,11,189,138,783,42,1163
3,2011,4,19,207,129,780,60,1195
4,2012,1,16,251,129,965,67,1428
5,2012,2,8,199,132,882,73,1294
6,2012,3,18,233,121,883,69,1324
7,2012,4,17,213,94,1033,91,1448
8,2013,1,26,255,104,1126,117,1628
9,2013,2,10,246,123,1167,99,1645


## Summary 2: non-adoption applications by adopter

In [49]:
adopt_non_adopt_summary = f"""
SELECT year, 
        quarter,
        SUM (placement) AS placement_total,
        SUM (placement_revoke_or_vary) AS placement_revoke_or_vary,
        SUM (contact_s26 ) AS contact_s26,
        SUM (contact_s26_revoke_or_vary) AS contact_s26_revoke_or_vary,
        SUM (change_surname)AS change_surname,
        SUM (remove_child_from_uk) AS remove_child_from_uk,
        SUM (other_order_type)AS other_order_type,
        SUM (non_adoptions_total) AS non_adoptions_total_sum

FROM fcsq.adopt_apps_6_non_adoptions 
WHERE year > 2010 /*RB Note (18/11/15): Added this qualifier in as presently results before 2010 not used.*/
GROUP BY year, 
          quarter
ORDER BY year,
          quarter;
"""

table1 = pydb.start_query_execution_and_wait(adopt_non_adopt_summary)
table1

Unnamed: 0,year,quarter,placement_total,placement_revoke_or_vary,contact_s26,contact_s26_revoke_or_vary,change_surname,remove_child_from_uk,other_order_type,non_adoptions_total_sum
0,2011,1,1496,72,7,1,11,1,9,1597
1,2011,2,1353,79,15,1,6,2,4,1460
2,2011,3,1509,74,6,1,16,2,5,1613
3,2011,4,1466,99,5,1,24,2,9,1606
4,2012,1,1731,116,2,2,10,2,3,1866
5,2012,2,1712,94,5,4,26,0,12,1853
6,2012,3,1838,166,10,2,16,3,7,2042
7,2012,4,1804,146,4,2,6,5,5,1972
8,2013,1,1716,231,15,4,17,5,5,1993
9,2013,2,1892,259,9,2,20,4,3,2189


## Summary 3 - produces a case level table for purposes of case level counts

In [50]:
adopt_application_case_count_a = f"""
SELECT case_number,
        court, 
        MIN(app_date) AS minofapp_date
FROM fcsq.adopt_application_4 
WHERE year > 2010
GROUP BY case_number,court
ORDER BY 1,2;
"""
pydb.create_temp_table(adopt_application_case_count_a,"adopt_application_case_count_a")

In [51]:
adopt_application_case_count_b = f"""
SELECT case_number, 
        minofapp_date, 
        CAST(EXTRACT(YEAR FROM minofapp_date) as varchar(3)) || ' - ' ||
        
        (CASE WHEN (EXTRACT (MONTH FROM minofapp_date)) BETWEEN 1 AND 3  THEN CAST(1 as varchar(3))
              WHEN (EXTRACT (MONTH FROM minofapp_date)) BETWEEN 4 AND 6 THEN CAST(2 as varchar(3))
              WHEN (EXTRACT (MONTH FROM minofapp_date)) BETWEEN 7 AND 9 THEN CAST(3 as varchar(3))
              WHEN (EXTRACT (MONTH FROM minofapp_date)) BETWEEN 10 AND 12 THEN CAST(4 as varchar(3))
              END) quarter

FROM __temp__.adopt_application_case_count_a;
"""
pydb.create_temp_table(adopt_application_case_count_b,"adopt_application_case_count_b")


In [52]:
adopt_apps_case_count_total = f"""
SELECT quarter, 
        COUNT(case_number) count_of_case
FROM __temp__.adopt_application_case_count_b
GROUP BY quarter
ORDER BY quarter; 
"""


pydb.create_temp_table(adopt_apps_case_count_total,"adopt_apps_case_count_total")

In [53]:
table1 = pydb.read_sql_query("select * from __temp__.adopt_application_case_count_a")
table1

Unnamed: 0,case_number,court,minofapp_date
0,TA19Z00032,347,2019-04-24
1,TA19Z00033,347,2019-04-26
2,TA19Z00034,347,2019-05-03
3,TA19Z00035,347,2019-05-08
4,TA19Z00036,347,2019-05-10
...,...,...,...
125163,PD14Z00353,299,2014-06-17
125164,PD14Z00356,299,2014-06-24
125165,PD14Z00357,299,2014-06-24
125166,PD14Z00358,299,2014-06-24


In [54]:
table2 = pydb.read_sql_query("select * from __temp__.adopt_application_case_count_b")
table2

Unnamed: 0,case_number,minofapp_date,quarter
0,GC11Z00042,2011-11-09,2011 - 4
1,GC11Z00043,2011-11-11,2011 - 4
2,GC11Z00044,2011-12-07,2011 - 4
3,GC11Z00045,2011-12-07,2011 - 4
4,GC12Z00001,2012-01-23,2012 - 1
...,...,...,...
125163,SE11Z00736,2011-09-27,2011 - 3
125164,SE11Z00737,2011-09-27,2011 - 3
125165,SE11Z00739,2011-09-27,2011 - 3
125166,SE11Z00741,2011-09-28,2011 - 3


In [55]:
table3 = pydb.read_sql_query("select * from __temp__.adopt_apps_case_count_total")
table3

Unnamed: 0,quarter,count_of_case
0,2011 - 1,2946
1,2011 - 2,2606
2,2011 - 3,2762
3,2011 - 4,2777
4,2012 - 1,3286
5,2012 - 2,3123
6,2012 - 3,3348
7,2012 - 4,3410
8,2013 - 1,3602
9,2013 - 2,3810


## adopt_adopt_only_case_count summary
<a name="adopt_adopt_only_case_count"></a>

In [56]:
adopt_adopt_only_case_count_a = f"""
SELECT case_number, 
        MIN(app_date) AS minofapp_date 
FROM fcsq.adopt_apps_6_adoptions_only
WHERE year > 2010
GROUP BY case_number, court;
"""
pydb.create_temp_table(adopt_adopt_only_case_count_a,"adopt_adopt_only_case_count_a")

adopt_adopt_only_case_count_b = f"""
SELECT case_number, 
        minofapp_date, 
        CAST(EXTRACT(YEAR FROM (minofapp_date)) as varchar(3)) || ' - ' ||

        (CASE WHEN (EXTRACT (MONTH FROM minofapp_date)) BETWEEN 1 AND 3  THEN CAST(1 as varchar(3))
              WHEN (EXTRACT (MONTH FROM minofapp_date)) BETWEEN 4 AND 6 THEN CAST(2 as varchar(3))
              WHEN (EXTRACT (MONTH FROM minofapp_date)) BETWEEN 7 AND 9 THEN CAST(3 as varchar(3))
              WHEN (EXTRACT (MONTH FROM minofapp_date)) BETWEEN 10 AND 12 THEN CAST(4 as varchar(3))
              END) quarter

FROM __temp__.adopt_adopt_only_case_count_a;
"""
pydb.create_temp_table(adopt_adopt_only_case_count_b,"adopt_adopt_only_case_count_b")

adopt_only_case_count_total = f"""
SELECT quarter, 
        COUNT (case_number) AS count_of_case
FROM __temp__.adopt_adopt_only_case_count_b
GROUP BY quarter
ORDER BY quarter;
"""
pydb.create_temp_table(adopt_only_case_count_total,"adopt_adopt_only_case_count_total")

## The next couple of queries are here to find the court level data for the court level/DFJ CSV file. Currently these are not used for FCSQ

In [57]:
applications_court_level = f"""
SELECT year, 
        quarter, 
        court, 
        COUNT(*) n 
FROM fcsq.adopt_apps_6_adoptions_only
WHERE year>2010
GROUP BY year, 
          quarter, 
          court
ORDER BY 1,2,3;
"""

pydb.start_query_execution_and_wait(applications_court_level)

Unnamed: 0,year,quarter,court,n
0,2011,1,100,90
1,2011,1,127,37
2,2011,1,130,14
3,2011,1,139,15
4,2011,1,150,47
...,...,...,...,...
3417,2021,3,380,1
3418,2021,3,384,3
3419,2021,3,386,5
3420,2021,3,388,4


In [58]:
#Secondly the applications case level court level file
adopt_application_case_count_c = f"""
SELECT 
  case_number, 
  MIN(app_date) AS minofapp_date,
  court 
FROM fcsq.adopt_apps_6_adoptions_only 
GROUP BY case_number,
          court;
"""

pydb.create_temp_table(adopt_application_case_count_c,"adopt_application_case_count_c")

adopt_application_case_count_d = f"""
SELECT case_number, 
        EXTRACT(YEAR FROM (minofapp_date)) year,
        CAST(EXTRACT(YEAR FROM (minofapp_date)) as varchar(3)) || ' - ' ||

        (CASE WHEN (EXTRACT (MONTH FROM minofapp_date)) BETWEEN 1 AND 3  THEN CAST(1 as varchar(3))
              WHEN (EXTRACT (MONTH FROM minofapp_date)) BETWEEN 4 AND 6 THEN CAST(2 as varchar(3))
              WHEN (EXTRACT (MONTH FROM minofapp_date)) BETWEEN 7 AND 9 THEN CAST(3 as varchar(3))
              WHEN (EXTRACT (MONTH FROM minofapp_date)) BETWEEN 10 AND 12 THEN CAST(4 as varchar(3))
              END) quarter,
        minofapp_date,
        court 
FROM __temp__.adopt_application_case_count_c;
"""

pydb.create_temp_table(adopt_application_case_count_c,"adopt_application_case_count_c")
pydb.create_temp_table(adopt_application_case_count_d,"adopt_application_case_count_d")


final_query = f"""
SELECT year, 
        quarter, 
        court, 
        COUNT(*) n 
FROM __temp__.adopt_application_case_count_d
WHERE year > 2010
GROUP BY year, 
          quarter, 
          court
ORDER BY year, 
          quarter, 
          court;
"""

pydb.read_sql_query(final_query)

Unnamed: 0,year,quarter,court,n
0,2011,2011 - 1,100,90
1,2011,2011 - 1,127,37
2,2011,2011 - 1,130,14
3,2011,2011 - 1,139,15
4,2011,2011 - 1,150,47
...,...,...,...,...
3417,2021,2021 - 3,380,1
3418,2021,2021 - 3,384,3
3419,2021,2021 - 3,386,5
3420,2021,2021 - 3,388,4


# Stage 3 - Disposals
<a name="disposals"></a>

## adopt_order_type table
<a name="adopt_order_type"></a>

In [59]:
#Create new disposal_fields table while old one not working
data = pd.read_csv('s3://alpha-family-data/fcsq_processing/Adoption/disposal_fields_csv/ADOPTIONS_DISPOSAL_FIELDS.csv')

import awswrangler as wr
wr.s3.to_parquet(  # Storing the data and metadata to Data Lake
    df=data,
    path="s3://alpha-family-data/fcsq_processing/Adoption/disposal_fields_csv/fields_parquet/",
    dataset=True, database = "default", table = "temporary_df"
)



{'paths': ['s3://alpha-family-data/fcsq_processing/Adoption/disposal_fields_csv/fields_parquet/61a23ee6419645e99b47f0a97e797c56.snappy.parquet'],
 'partitions_values': {}}

### Drop the adopt_order_type table if it already exists and remove its data from the S3 bucket

In [60]:

drop_adopt_order_type = f"""
DROP TABLE IF EXISTS fcsq.adopt_order_type;
"""
pydb.start_query_execution_and_wait(drop_adopt_order_type)

# clean up previous adopt_order_type files
bucket.objects.filter(Prefix="fcsq_processing/Adoption/adopt_order_type/").delete()

[{'ResponseMetadata': {'RequestId': 'MPCMZEJ56BZKNGAD',
   'HostId': 'Dp0XFpl1RO6WvZ9+oiXfQRSar0rgLwN0N8R8N3L+zu7/E9bHPSpUU5dwZ/jXR+u776JEoUBHmSGg5rNBIaYOow==',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'x-amz-id-2': 'Dp0XFpl1RO6WvZ9+oiXfQRSar0rgLwN0N8R8N3L+zu7/E9bHPSpUU5dwZ/jXR+u776JEoUBHmSGg5rNBIaYOow==',
    'x-amz-request-id': 'MPCMZEJ56BZKNGAD',
    'date': 'Wed, 06 Apr 2022 17:10:25 GMT',
    'content-type': 'application/xml',
    'transfer-encoding': 'chunked',
    'server': 'AmazonS3',
    'connection': 'close'},
   'RetryAttempts': 0},
  'Deleted': [{'Key': 'fcsq_processing/Adoption/adopt_order_type/20211108_164347_00035_ahtvy_679f6828-8308-464c-8022-24ea57439d07',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'lzAj6j5wai3Fogtd2nCP8DjE6lpzIKtb'},
   {'Key': 'fcsq_processing/Adoption/adopt_order_type/20211108_164347_00035_ahtvy_e785589b-3bb2-4038-b1e7-591334552608',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'JwoZPoZfMARi4WBYotpE5KecI.66U1X_'},
   {'Key'

### Create the adopt_order_type table in Athena

In [61]:
create_adopt_order_type = f"""
CREATE TABLE IF NOT EXISTS fcsq.adopt_order_type
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/adopt_order_type') AS
SELECT event, 
        field_model, 
        value 
FROM fcsq.adoptions_disposal_fields_temporary
WHERE field_model='A73_1'
  OR field_model='A74_2' 
  OR field_model='A80_4' 
  OR field_model='G63_1' 
  OR field_model='ORDNOM_5' 
  OR field_model='ORDREF_5' 
  OR field_model='A81_5';
"""

pydb.start_query_execution_and_wait(create_adopt_order_type)



{'QueryExecutionId': '3f1209d2-85a6-48d5-8372-73dda8980211',
 'Query': "CREATE TABLE IF NOT EXISTS fcsq.adopt_order_type\nWITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/adopt_order_type') AS\nSELECT event, \n        field_model, \n        value \nFROM fcsq.adoptions_disposal_fields_temporary\nWHERE field_model='A73_1'\n  OR field_model='A74_2' \n  OR field_model='A80_4' \n  OR field_model='G63_1' \n  OR field_model='ORDNOM_5' \n  OR field_model='ORDREF_5' \n  OR field_model='A81_5'",
 'StatementType': 'DDL',
 'ResultConfiguration': {'OutputLocation': 's3://aws-athena-query-results-593291632749-eu-west-1/tables/3f1209d2-85a6-48d5-8372-73dda8980211'},
 'QueryExecutionContext': {},
 'Status': {'State': 'SUCCEEDED',
  'SubmissionDateTime': datetime.datetime(2022, 4, 6, 17, 10, 27, 214000, tzinfo=tzlocal()),
  'CompletionDateTime': datetime.datetime(2022, 4, 6, 17, 10, 30, 819000, tzinfo=tzlocal())},
 'Statistics': {'EngineExecutionTimeInMilli

#### adopt_order_type validation

In [62]:
adopt_order_type_count = pydb.read_sql_query("select count(*) as count from fcsq.adopt_order_type")
adopt_order_type_count

Unnamed: 0,count
0,683448


## adopt_disposals_type table
<a name="adopt_disposals_type"></a>

### Drop the adopt_disposals_type table if it already exists and remove its data from the S3 bucket

In [63]:
drop_adopt_disposals_type = f"""
DROP TABLE IF EXISTS fcsq.adopt_disposals_type;
"""
pydb.start_query_execution_and_wait(drop_adopt_disposals_type)

# clean up previous adopt_disposals_type files
bucket.objects.filter(Prefix="fcsq_processing/Adoption/adopt_disposals_type/").delete()

[{'ResponseMetadata': {'RequestId': 'VMPQ82KSEPY25S8Q',
   'HostId': 'jl8rqHM85DgaAXBbOBvDd0Md/WrK0jPEZmNlECKKuMaHMAe5ojXKVdUxznYNegwfOoOaCKWkhIE=',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'x-amz-id-2': 'jl8rqHM85DgaAXBbOBvDd0Md/WrK0jPEZmNlECKKuMaHMAe5ojXKVdUxznYNegwfOoOaCKWkhIE=',
    'x-amz-request-id': 'VMPQ82KSEPY25S8Q',
    'date': 'Wed, 06 Apr 2022 17:10:40 GMT',
    'content-type': 'application/xml',
    'transfer-encoding': 'chunked',
    'server': 'AmazonS3',
    'connection': 'close'},
   'RetryAttempts': 0},
  'Deleted': [{'Key': 'fcsq_processing/Adoption/adopt_disposals_type/20211108_164416_00047_2nnh4_828eb710-7c70-43cc-ab80-b5e7969ded9e',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'biBASyh8uG92ztqBCA1IGFnHwFuRogzN'},
   {'Key': 'fcsq_processing/Adoption/adopt_disposals_type/20211108_164416_00047_2nnh4_9f193e59-305e-4499-8682-1f0c4e80f202',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'uKU3PiH.ZzhpvsTieAJztpRNYGFLzpsf'},
   {'Key': 'fcsq_processi

### Create the adopt_disposals_type table in Athena

In [64]:
create_adopt_disposals_type = f"""
CREATE TABLE IF NOT EXISTS fcsq.adopt_disposals_type
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/adopt_disposals_type') AS

SELECT disposals.case_number, 
        disposals.event, 
        disposals.receipt_date2, 
        disposals.event_model, 
        disposal_fields.field_model, 
        disposal_fields.value AS order_type, 
        EXTRACT(YEAR FROM (disposals.receipt_date2)) AS year, 

        CASE WHEN EXTRACT(MONTH FROM (disposals.receipt_date2))<4 THEN 1
              WHEN EXTRACT(MONTH FROM(disposals.receipt_date2)) <7 THEN 2
              WHEN EXTRACT( MONTH FROM (disposals.receipt_date2))<10 THEN 3
              ELSE 4
              END AS quarter

FROM fcsq.adoptions_disposals disposals 
  LEFT JOIN fcsq.ADOPTIONS_DISPOSAL_FIELDS_TEMPORARY disposal_fields 
  ON disposals.event = disposal_fields.event

WHERE (((disposals.event_model)='A70' 
    OR (disposals.event_model)='A71' 
    OR (disposals.event_model)='A72' 
    OR (disposals.event_model)='A75' 
    OR (disposals.event_model)='A79') 
    AND ((disposal_fields.field_model) LIKE '%_JG')) 
  OR (((disposals.event_model)='A76') 
    AND ((disposal_fields.field_model)='A76_12')) 
  OR (((disposals.event_model)='A77') 
    AND ((disposal_fields.field_model)='A77_12')) 
  OR (((disposals.event_model)='A78') 
    AND ((disposal_fields.field_model)='A78_15')) 
  OR (((disposals.event_model)='A12' 
    OR (disposals.event_model)='A13' 
    OR (disposals.event_model)='A15') 
    AND ((disposal_fields.field_model) LIKE '%_FO')) 
  OR (((disposals.event_model)='A73' 
    OR (disposals.event_model)='G63') 
    AND ((disposal_fields.field_model) LIKE '%_1')) 
  OR (((disposals.event_model)='A74') 
    AND ((disposal_fields.field_model)='A74_2')) 
  OR (((disposals.event_model)='A80') 
    AND ((disposal_fields.field_model)='A80_4')) 
  OR (((disposals.event_model)='ORDNOM' 
    OR (disposals.event_model)='ORDREF') 
    AND ((disposal_fields.field_model) LIKE '%_5'));
"""

pydb.start_query_execution_and_wait(create_adopt_disposals_type)



{'QueryExecutionId': 'be187696-c3da-4c39-9088-b3af4997760e',
 'Query': "CREATE TABLE IF NOT EXISTS fcsq.adopt_disposals_type\nWITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/adopt_disposals_type') AS\n\nSELECT disposals.case_number, \n        disposals.event, \n        disposals.receipt_date2, \n        disposals.event_model, \n        disposal_fields.field_model, \n        disposal_fields.value AS order_type, \n        EXTRACT(YEAR FROM (disposals.receipt_date2)) AS year, \n\n        CASE WHEN EXTRACT(MONTH FROM (disposals.receipt_date2))<4 THEN 1\n              WHEN EXTRACT(MONTH FROM(disposals.receipt_date2)) <7 THEN 2\n              WHEN EXTRACT( MONTH FROM (disposals.receipt_date2))<10 THEN 3\n              ELSE 4\n              END AS quarter\n\nFROM fcsq.adoptions_disposals disposals \n  LEFT JOIN fcsq.ADOPTIONS_DISPOSAL_FIELDS_TEMPORARY disposal_fields \n  ON disposals.event = disposal_fields.event\n\nWHERE (((disposals.event_model

#### adopt_disposals_type validation

In [65]:
adopt_disposals_type_count = pydb.read_sql_query("select count(*) as count from fcsq.adopt_disposals_type")
adopt_disposals_type_count

Unnamed: 0,count
0,462616


## adopt_country_of_birth table
<a name="adopt_country_of_birth"></a>

### Drop the adopt_country_of_birth table if it already exists and remove its data from the S3 bucket

In [66]:
drop_adopt_country_of_birth = f"""
DROP TABLE IF EXISTS fcsq.adopt_country_of_birth;
"""
pydb.start_query_execution_and_wait(drop_adopt_country_of_birth)

# clean up previous adopt_country_of_birth files
bucket.objects.filter(Prefix="fcsq_processing/Adoption/adopt_country_of_birth/").delete()

[{'ResponseMetadata': {'RequestId': '0JZAYTKZ5R7G7QCQ',
   'HostId': 'BH1v607+vVowxry2q0lPM5ia9BVkWeNUNkEK3PWoN5UESi6kCsAPTHA6pIQtTFv8iaekleuqnNw=',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'x-amz-id-2': 'BH1v607+vVowxry2q0lPM5ia9BVkWeNUNkEK3PWoN5UESi6kCsAPTHA6pIQtTFv8iaekleuqnNw=',
    'x-amz-request-id': '0JZAYTKZ5R7G7QCQ',
    'date': 'Wed, 06 Apr 2022 17:10:57 GMT',
    'content-type': 'application/xml',
    'transfer-encoding': 'chunked',
    'server': 'AmazonS3',
    'connection': 'close'},
   'RetryAttempts': 0},
  'Deleted': [{'Key': 'fcsq_processing/Adoption/adopt_country_of_birth/20211111_145521_00138_6tiqd_dcb96bb4-37ed-49db-be89-b7a15341d606',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'eSRFEq.4D4iP0RKUvkgTg_KnddTGkD.3'}]}]

### Create the adopt_country_of_birth table in Athena

In [67]:
create_adopt_country_of_birth = f"""
CREATE TABLE IF NOT EXISTS fcsq.adopt_country_of_birth
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/adopt_country_of_birth') AS
SELECT disposal_fields.event, 
        disposal_fields.field_model, 
        disposal_fields.value
FROM fcsq.ADOPTIONS_DISPOSAL_FIELDS_TEMPORARY disposal_fields
WHERE (((disposal_fields.field_model)='A70_5' 
    OR (disposal_fields.field_model)='A76_2' 
    OR (disposal_fields.field_model)='A77_10' 
    OR (disposal_fields.field_model)='A78_13' 
    OR ((disposal_fields.field_model)='A12_1' 
    OR (disposal_fields.field_model)='A15_1')));
"""

pydb.start_query_execution_and_wait(create_adopt_country_of_birth)



{'QueryExecutionId': 'e3f15b3e-de11-47d9-8ec0-cf3ccae5c41d',
 'Query': "CREATE TABLE IF NOT EXISTS fcsq.adopt_country_of_birth\nWITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/adopt_country_of_birth') AS\nSELECT disposal_fields.event, \n        disposal_fields.field_model, \n        disposal_fields.value\nFROM fcsq.ADOPTIONS_DISPOSAL_FIELDS_TEMPORARY disposal_fields\nWHERE (((disposal_fields.field_model)='A70_5' \n    OR (disposal_fields.field_model)='A76_2' \n    OR (disposal_fields.field_model)='A77_10' \n    OR (disposal_fields.field_model)='A78_13' \n    OR ((disposal_fields.field_model)='A12_1' \n    OR (disposal_fields.field_model)='A15_1')))",
 'StatementType': 'DDL',
 'ResultConfiguration': {'OutputLocation': 's3://aws-athena-query-results-593291632749-eu-west-1/tables/e3f15b3e-de11-47d9-8ec0-cf3ccae5c41d'},
 'QueryExecutionContext': {},
 'Status': {'State': 'SUCCEEDED',
  'SubmissionDateTime': datetime.datetime(2022, 4, 6, 17, 10,

#### adopt_country_of_birth validation

In [68]:
adopt_country_of_birth_count = pydb.read_sql_query("select count(*) as count from fcsq.adopt_country_of_birth")
adopt_country_of_birth_count

Unnamed: 0,count
0,8494


# NOTE: LEFT OUT AGE FIX

## adopt_disposals_with_child table
<a name="adopt_disposals_with_child"></a>

### Drop the adopt_disposals_with_child table if it already exists and remove its data from the S3 bucket

In [69]:
drop_adopt_disposals_with_child = f"""
DROP TABLE IF EXISTS fcsq.adopt_disposals_with_child;
"""
pydb.start_query_execution_and_wait(drop_adopt_disposals_with_child)

# clean up previous adopt_disposals_with_child files
bucket.objects.filter(Prefix="fcsq_processing/Adoption/adopt_disposals_with_child/").delete()

[{'ResponseMetadata': {'RequestId': 'KHE107XE93084C40',
   'HostId': 'KxashoPY/MEO70FWv+Mb+rr7mxikpt/yZ0Su92ItHiq+B0F1WhJd6Wb7eAxlz7rRSJyt4YeTmRs=',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'x-amz-id-2': 'KxashoPY/MEO70FWv+Mb+rr7mxikpt/yZ0Su92ItHiq+B0F1WhJd6Wb7eAxlz7rRSJyt4YeTmRs=',
    'x-amz-request-id': 'KHE107XE93084C40',
    'date': 'Wed, 06 Apr 2022 17:11:10 GMT',
    'content-type': 'application/xml',
    'transfer-encoding': 'chunked',
    'server': 'AmazonS3',
    'connection': 'close'},
   'RetryAttempts': 0},
  'Deleted': [{'Key': 'fcsq_processing/Adoption/adopt_disposals_with_child/20211115_144054_00104_p8agt_728d6fd2-2c72-4909-90d5-6ddc1698c4ee',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'nXVAVzv1gJGvGqiNqgs7UKam8XEgFun0'},
   {'Key': 'fcsq_processing/Adoption/adopt_disposals_with_child/20211115_144054_00104_p8agt_c2ffe527-c415-462c-ba58-f7698fa201c9',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'drDq.NL1az_rsO._tBIVz76mHh8t70Nj'},
   {'Key': 'f

### Create the adopt_disposals_with_child table in Athena

In [72]:
create_adopt_disposals2 =f"""
SELECT disposals1.case_number, 
        disposals1.event, 
        disposals1.receipt_date2, 
        disposals1.event_model, 
        order_type.field_model, 
        order_type.value AS order_type, 
        country_of_birth.value AS country_of_birth, 
        EXTRACT(YEAR FROM (disposals1.receipt_date2)) AS year, 

        CASE WHEN EXTRACT(Month FROM (disposals1.receipt_date2)) <4 THEN 1
              WHEN EXTRACT(Month FROM (disposals1.receipt_date2)) <7 THEN 2
              WHEN EXTRACT(Month FROM (disposals1.receipt_date2))<10 THEN 3
              ELSE 4
              END AS quarter
              
FROM fcsq.adoptions_disposals disposals1 
  LEFT JOIN fcsq.adopt_order_type order_type 
    ON disposals1.event = Order_type.event 
  LEFT JOIN fcsq.adopt_country_of_birth country_of_birth 
    ON disposals1.event = country_of_birth.event
WHERE (((disposals1.event_model)<>'G63' 
    AND (disposals1.event_model)<>'ORDNOM' 
    AND (disposals1.event_model)<>'ORDREF')) 
  OR (((disposals1.case_number) LIKE '____A%' 
      OR (disposals1.case_number) LIKE '____Z%') 
    AND ((disposals1.event_model)='G63' 
      OR (disposals1.event_model)='ORDNOM' 
      OR (disposals1.event_model)='ORDREF'));
"""

pydb.create_temp_table(create_adopt_disposals2,"adopt_disposals2")


create_adopt_disposals3 = f"""
SELECT disposals2.case_number, 
        disposals2.year, 
        disposals2.quarter, 
        disposals2.event, 
        disposals2.receipt_date2, 
        disposals2.event_model, 
        disposals2.field_model, 
        disposals2.order_type, 
        lower(disposals2.country_of_birth) as country_of_birth, 
        applicant_type.number_applicants,
        applicant_type.min_sex, 
        applicant_type.max_sex, 
        applicant_type.adopter_type, 
        applicant_type.min_rtc, 
        applicant_type.max_rtc, 
        applicant_type.adopter 
        
FROM __temp__.adopt_disposals2 disposals2 
  LEFT JOIN fcsq.adopt_applicant_type applicant_type
    ON disposals2.case_number = applicant_type.case_number;
"""
pydb.create_temp_table(create_adopt_disposals3,"adopt_disposals3")

create_adopt_disposals4 = f"""
SELECT disposals3.case_number, 
        substr(CAST(disposals3.event as varchar(3)),1,3) AS court, 
        disposals3.year, 
        disposals3.quarter,
        MIN(disposals3.receipt_date2) AS receipt_date, 
        disposals3.event_model, 
        disposals3.field_model, 
        disposals3.order_type, 
        MAX(disposals3.country_of_birth) AS country_of_birth_new,
        disposals3.number_applicants,
        disposals3.min_sex, 
        disposals3.max_sex, 
        disposals3.adopter_type, 
        disposals3.min_rtc, 
        disposals3.max_rtc, 
        disposals3.adopter
        
FROM __temp__.adopt_disposals3 disposals3
GROUP BY disposals3.case_number, 
          substr(CAST(disposals3.event as varchar(3)),1,3), 
          disposals3.year, 
          disposals3.quarter, 
          disposals3.event_model, 
          disposals3.field_model,
          disposals3.order_type, 
          disposals3.number_applicants, 
          disposals3.min_sex, 
          disposals3.max_sex, 
          disposals3.adopter_type, 
          disposals3.min_rtc, 
          disposals3.max_rtc, 
          disposals3.adopter
HAVING (((disposals3.case_number)<>'CV11Z00105'))
ORDER BY disposals3.case_number;
"""

pydb.create_temp_table(create_adopt_disposals4,"adopt_disposals4")

create_adopt_disposals4a = f"""
SELECT disposals4.case_number, 
        MIN(disposals4.court)AS court_new, 
        disposals4.year, 
        disposals4.quarter,
        disposals4.receipt_date, 
        disposals4.event_model, 
        disposals4.field_model, 
        disposals4.order_type, 
        disposals4.country_of_birth_new,
        disposals4.number_applicants,
        disposals4.min_sex, 
        disposals4.max_sex, 
        disposals4.adopter_type, 
        disposals4.min_rtc, 
        disposals4.max_rtc, 
        disposals4.adopter
        
FROM __temp__.adopt_disposals4 disposals4
GROUP BY disposals4.case_number,  
        disposals4.year, 
        disposals4.quarter,
        disposals4.receipt_date, 
        disposals4.event_model, 
        disposals4.field_model, 
        disposals4.order_type, 
        disposals4.country_of_birth_new,
        disposals4.number_applicants,
        disposals4.min_sex, 
        disposals4.max_sex, 
        disposals4.adopter_type, 
        disposals4.min_rtc, 
        disposals4.max_rtc, 
        disposals4.adopter
ORDER BY disposals4.case_number;
"""

pydb.create_temp_table(create_adopt_disposals4a,"adopt_disposals4a")

create_adopt_disposals_with_child = f"""
CREATE TABLE IF NOT EXISTS fcsq.adopt_disposals_with_child
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/adopt_disposals_with_child') AS
SELECT disposals4a.*, 
        child_parties.dob, 
        child_parties.child_sex, 
        date_diff('day',dob,receipt_date)/365.25 AS child_age 
FROM __temp__.adopt_disposals4a Disposals4a
  LEFT JOIN fcsq.adopt_child_parties child_parties
    ON disposals4a.case_number = child_parties.case_number;
"""

pydb.start_query_execution_and_wait(create_adopt_disposals_with_child)




{'QueryExecutionId': '8f3d9b5e-357c-4cb7-a7be-99ae64eaec9c',
 'Query': "CREATE TABLE IF NOT EXISTS fcsq.adopt_disposals_with_child\nWITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/adopt_disposals_with_child') AS\nSELECT disposals4a.*, \n        child_parties.dob, \n        child_parties.child_sex, \n        date_diff('day',dob,receipt_date)/365.25 AS child_age \nFROM mojap_de_temp_alpha_user_thomasauburnmoj.adopt_disposals4a Disposals4a\n  LEFT JOIN fcsq.adopt_child_parties child_parties\n    ON disposals4a.case_number = child_parties.case_number",
 'StatementType': 'DDL',
 'ResultConfiguration': {'OutputLocation': 's3://aws-athena-query-results-593291632749-eu-west-1/tables/8f3d9b5e-357c-4cb7-a7be-99ae64eaec9c'},
 'QueryExecutionContext': {},
 'Status': {'State': 'SUCCEEDED',
  'SubmissionDateTime': datetime.datetime(2022, 4, 7, 10, 1, 31, 70000, tzinfo=tzlocal()),
  'CompletionDateTime': datetime.datetime(2022, 4, 7, 10, 1, 34, 201000, t

#### adopt_disposals_with_child validation

In [73]:
adopt_disposals_with_child_count = pydb.read_sql_query("select count(*) as count from fcsq.adopt_disposals_with_child")
adopt_disposals_with_child_count

Unnamed: 0,count
0,329483


## adopt_disposals5 table
<a name="adopt_disposals5"></a>

### Drop the adopt_disposals5 table if it already exists and remove its data from the S3 bucket

In [74]:
drop_adopt_disposals5 = f"""
DROP TABLE IF EXISTS fcsq.adopt_disposals5;
"""
pydb.start_query_execution_and_wait(drop_adopt_disposals5)

# clean up previous adopt_disposals5 files
bucket.objects.filter(Prefix="fcsq_processing/Adoption/adopt_disposals5/").delete()

[{'ResponseMetadata': {'RequestId': '82D687RNTHH4SA15',
   'HostId': 'H3zWBzFSSOEOaKsQ+NWc24FHUQ/sGXQHzN+5Whf+eH8gK5l5HGW7FIQG7THuQSmouF7zsIHpJN8=',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'x-amz-id-2': 'H3zWBzFSSOEOaKsQ+NWc24FHUQ/sGXQHzN+5Whf+eH8gK5l5HGW7FIQG7THuQSmouF7zsIHpJN8=',
    'x-amz-request-id': '82D687RNTHH4SA15',
    'date': 'Thu, 07 Apr 2022 10:02:18 GMT',
    'content-type': 'application/xml',
    'transfer-encoding': 'chunked',
    'server': 'AmazonS3',
    'connection': 'close'},
   'RetryAttempts': 0},
  'Deleted': [{'Key': 'fcsq_processing/Adoption/adopt_disposals5/20211115_144414_00034_qq5b3_58bc2f8e-1dd6-4383-8707-cc79086ab845',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'utMDYjqaMcUm6QpRmbYMJdrcEOeWhh2M'},
   {'Key': 'fcsq_processing/Adoption/adopt_disposals5/20211115_144414_00034_qq5b3_cc300b19-6c56-4220-b17c-9e14029c6fcc',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'oNwbZaZ5kmLWWqQG9RCD7dv0XAw4M4L9'},
   {'Key': 'fcsq_processing/Adopt

### Create the adopt_disposals5 table in Athena

In [75]:
create_adopt_disposals5 = f"""
CREATE TABLE IF NOT EXISTS fcsq.adopt_disposals5
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/adopt_disposals5') AS
SELECT disposals_with_child.case_number, 
        disposals_with_child.court_new AS court, 
        disposals_with_child.year, 
        disposals_with_child.quarter, 
        disposals_with_child.receipt_date, 
        disposals_with_child.event_model, 
        disposals_with_child.field_model, 
        disposals_with_child.order_type, 
        disposals_with_child.country_of_birth_new AS country_of_birth, 
        disposals_with_child.number_applicants, 
        disposals_with_child.adopter_type, 
        disposals_with_child.adopter,
        max_rtc, 
        max_sex, 
        min_rtc, 
        min_sex, 
        child_sex,

        CASE WHEN disposals_with_child.child_age IS NULL THEN 'Unknown'
              WHEN disposals_with_child.child_age<0 THEN 'Other'
              WHEN disposals_with_child.child_age<1 THEN '<1 year'
              WHEN disposals_with_child.child_age<5 THEN '1-4 years'
              WHEN disposals_with_child.child_age<10 THEN'5-9 years'
              WHEN disposals_with_child.child_age<15 THEN '10-14 years'
              WHEN disposals_with_child.child_age<18 THEN '15-17 years'
              ELSE 'Other'
              END AS age_band, 

        disposals_with_child.child_age, 

        CASE WHEN event_model='A15' 
                OR event_model='A76' 
                OR event_model='A77' 
                THEN 'Adoption'
              WHEN event_model='G63' 
                OR event_model='ORDREF' 
                OR event_model='ORDNOM' 
                THEN 'No order made'
              ELSE 'Non-adoption'
              END  AS adoption, 

        CASE WHEN event_model='A77'     
                THEN 'Convention'
              WHEN country_of_birth_new <>'' 
                And event_model='A76' 
                THEN '?Foreign?'
              WHEN event_model='A76' 
                OR event_model='A15' 
                THEN 'Standard'
              WHEN event_model='A12' 
                OR event_model='A70' 
                THEN 'Placement'
              WHEN event_model='A13' 
                OR event_model='A71' 
                OR event_model='A72' 
                THEN 'Placement_revoke_or_vary'
              WHEN order_type='CNO' 
                THEN 'Contact_s26'
              WHEN order_type='RCNO' 
                OR order_type='VCNO' 
                OR order_type='OFNC' 
                THEN 'Contact_s26_revoke_or_vary'
              WHEN order_type='CCS' 
                THEN 'Change_surname'
              WHEN order_type='RUK' 
                THEN 'Remove_child_from_UK'
              WHEN event_model='A75' 
                OR event_model='A78' 
                OR event_model='A79' 
                OR event_model='A80' 
                THEN 'Other_order_type'
              ELSE ''
              END AS Type,

        CASE WHEN adopter IS NULL 
              THEN 'Other or not stated'
              ELSE adopter
              END AS adopter_2

FROM fcsq.adopt_disposals_with_child disposals_with_child
WHERE (disposals_with_child.event_model<>'A81') 
ORDER BY disposals_with_child.receipt_date;
"""

pydb.start_query_execution_and_wait(create_adopt_disposals5)



{'QueryExecutionId': 'ee434bae-5a7c-4c97-a6d6-863e4227644e',
 'Query': "CREATE TABLE IF NOT EXISTS fcsq.adopt_disposals5\nWITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/adopt_disposals5') AS\nSELECT disposals_with_child.case_number, \n        disposals_with_child.court_new AS court, \n        disposals_with_child.year, \n        disposals_with_child.quarter, \n        disposals_with_child.receipt_date, \n        disposals_with_child.event_model, \n        disposals_with_child.field_model, \n        disposals_with_child.order_type, \n        disposals_with_child.country_of_birth_new AS country_of_birth, \n        disposals_with_child.number_applicants, \n        disposals_with_child.adopter_type, \n        disposals_with_child.adopter,\n        max_rtc, \n        max_sex, \n        min_rtc, \n        min_sex, \n        child_sex,\n\n        CASE WHEN disposals_with_child.child_age IS NULL THEN 'Unknown'\n              WHEN disposals_with_c

#### adopt_disposals5 validation

In [76]:
adopt_disposals5_count = pydb.read_sql_query("select count(*) as count from fcsq.adopt_disposals5")
adopt_disposals5_count

Unnamed: 0,count
0,156802


## adopt_disposals5_adoption table
<a name="adopt_disposals5_adoption"></a>

### Drop the adopt_disposals5_adoption table if it already exists and remove its data from the S3 bucket

In [77]:
drop_adopt_disposals5_adoption = f"""
DROP TABLE IF EXISTS fcsq.adopt_disposals5_adoption;
"""
pydb.start_query_execution_and_wait(drop_adopt_disposals5_adoption)

# clean up previous adopt_disposals5_adoption files
bucket.objects.filter(Prefix="fcsq_processing/Adoption/adopt_disposals5_adoption/").delete()

[{'ResponseMetadata': {'RequestId': '9AZ11RDXN9MM8KXQ',
   'HostId': 'KxzMutTgoA2H3Oej5pWcvkBCzxpjbxQjnCOXWzl/ocsJuuiOnUcYS0XnxW1utc+JeVFav1ty/oA=',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'x-amz-id-2': 'KxzMutTgoA2H3Oej5pWcvkBCzxpjbxQjnCOXWzl/ocsJuuiOnUcYS0XnxW1utc+JeVFav1ty/oA=',
    'x-amz-request-id': '9AZ11RDXN9MM8KXQ',
    'date': 'Thu, 07 Apr 2022 10:02:33 GMT',
    'content-type': 'application/xml',
    'transfer-encoding': 'chunked',
    'server': 'AmazonS3',
    'connection': 'close'},
   'RetryAttempts': 0},
  'Deleted': [{'Key': 'fcsq_processing/Adoption/adopt_disposals5_adoption/20211108_190310_00013_q792v_85e028bd-7912-43da-9542-34b1c3434799',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'XHbwPCp8ba.Hetw.6rF.0tvqQvJGOmnw'},
   {'Key': 'fcsq_processing/Adoption/adopt_disposals5_adoption/20211108_190310_00013_q792v_86587597-9a3f-4b9f-a8c8-1a00dc4bdc01',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': '7gxs4ou0czUS0E9kL2JYiZZr01Y_RoJF'},
   {'Key': 'fcs

### Create the adopt_disposals5_adoption table in Athena

In [78]:
create_adopt_disposals5_adoption = f"""
CREATE TABLE IF NOT EXISTS fcsq.adopt_disposals5_adoption
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/adopt_disposals5_adoption') AS
SELECT disposals5.case_number, 
        disposals5.court, 
        disposals5.year, 
        disposals5.quarter, 
        disposals5.receipt_date, 
        disposals5.event_model, 
        disposals5.country_of_birth, 
        disposals5.number_applicants, 
        disposals5.adopter_type, 
        disposals5.adopter, 

        CASE WHEN child_sex=1 THEN'M'
              WHEN child_sex=2 THEN'F'
              ELSE 'U'
              END AS ch_sex, 

        disposals5.age_band, 
        disposals5.adoption, 
        disposals5.child_sex, 
        disposals5.type, 
        disposals5.adopter_2 

FROM fcsq.adopt_disposals5 disposals5
WHERE disposals5.adoption='Adoption';
"""

pydb.start_query_execution_and_wait(create_adopt_disposals5_adoption)



{'QueryExecutionId': '46d473e7-0a1e-4841-bf42-21019d831a59',
 'Query': "CREATE TABLE IF NOT EXISTS fcsq.adopt_disposals5_adoption\nWITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/adopt_disposals5_adoption') AS\nSELECT disposals5.case_number, \n        disposals5.court, \n        disposals5.year, \n        disposals5.quarter, \n        disposals5.receipt_date, \n        disposals5.event_model, \n        disposals5.country_of_birth, \n        disposals5.number_applicants, \n        disposals5.adopter_type, \n        disposals5.adopter, \n\n        CASE WHEN child_sex=1 THEN'M'\n              WHEN child_sex=2 THEN'F'\n              ELSE 'U'\n              END AS ch_sex, \n\n        disposals5.age_band, \n        disposals5.adoption, \n        disposals5.child_sex, \n        disposals5.type, \n        disposals5.adopter_2 \n\nFROM fcsq.adopt_disposals5 disposals5\nWHERE disposals5.adoption='Adoption'",
 'StatementType': 'DDL',
 'ResultConfigur

#### adopt_disposals5_adoption validation

In [79]:
adopt_disposals5_adoption_count = pydb.read_sql_query("select count(*) as count from fcsq.adopt_disposals5_adoption")
adopt_disposals5_adoption_count

Unnamed: 0,count
0,81437


## adopt_disposals5_non_adoption table
<a name="adopt_disposals5_non_adoption"></a>

### Drop the adopt_disposals5_non_adoption table if it already exists and remove its data from the S3 bucket

In [80]:
drop_adopt_disposals5_non_adoption = f"""
DROP TABLE IF EXISTS fcsq.adopt_disposals5_non_adoption;
"""
pydb.start_query_execution_and_wait(drop_adopt_disposals5_non_adoption)

# clean up previous adopt_disposals5_non_adoption files
bucket.objects.filter(Prefix="fcsq_processing/Adoption/adopt_disposals5_non_adoption/").delete()

[{'ResponseMetadata': {'RequestId': 'THP68E0C449KEBEF',
   'HostId': 'QADGjCum2Zq65Y4dQ0jfNp0A2RTEMZPnw30HBAgV6GLZpLXddDRAXs+lYlOtpqY2FHKCznGJQ2U=',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'x-amz-id-2': 'QADGjCum2Zq65Y4dQ0jfNp0A2RTEMZPnw30HBAgV6GLZpLXddDRAXs+lYlOtpqY2FHKCznGJQ2U=',
    'x-amz-request-id': 'THP68E0C449KEBEF',
    'date': 'Thu, 07 Apr 2022 10:02:47 GMT',
    'content-type': 'application/xml',
    'transfer-encoding': 'chunked',
    'server': 'AmazonS3',
    'connection': 'close'},
   'RetryAttempts': 0},
  'Deleted': [{'Key': 'fcsq_processing/Adoption/adopt_disposals5_non_adoption/20211108_190401_00094_26y5d_456cc1d8-a717-4fd1-98b3-ca9e27624bc1',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': '1bxyD7N0PrPEg2u3EgT4xwyBs8ts2Ge2'},
   {'Key': 'fcsq_processing/Adoption/adopt_disposals5_non_adoption/20211108_190401_00094_26y5d_46131d1a-6248-465d-9899-9c045d24dc20',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'dx1sCUgbtqkW276en5b.oN6_LMVNl8ez'},
   {'Ke

### Create the adopt_disposals5_non_adoption table in Athena

In [81]:
create_adopt_disposals5_non_adoption = f"""
CREATE TABLE IF NOT EXISTS fcsq.adopt_disposals5_non_adoption
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/adopt_disposals5_non_adoption') AS
SELECT disposals5.case_number, 
        disposals5.court, 
        disposals5.year, 
        disposals5.quarter, 
        disposals5.receipt_date,
        disposals5.event_model, 
        disposals5.order_type, 
        disposals5.adoption, 
        disposals5.type, 
        disposals5.adopter_2,

        CASE WHEN type='Placement' THEN type
              ELSE 'Other Order'
              END  placement

FROM fcsq.adopt_disposals5 disposals5
WHERE (disposals5.event_model<>'A81') 
  AND (disposals5.adoption='Non-adoption');
"""

pydb.start_query_execution_and_wait(create_adopt_disposals5_non_adoption)



{'QueryExecutionId': 'fc757bec-35d9-4f2f-b7c9-9afc96ba0fbe',
 'Query': "CREATE TABLE IF NOT EXISTS fcsq.adopt_disposals5_non_adoption\nWITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/adopt_disposals5_non_adoption') AS\nSELECT disposals5.case_number, \n        disposals5.court, \n        disposals5.year, \n        disposals5.quarter, \n        disposals5.receipt_date,\n        disposals5.event_model, \n        disposals5.order_type, \n        disposals5.adoption, \n        disposals5.type, \n        disposals5.adopter_2,\n\n        CASE WHEN type='Placement' THEN type\n              ELSE 'Other Order'\n              END  placement\n\nFROM fcsq.adopt_disposals5 disposals5\nWHERE (disposals5.event_model<>'A81') \n  AND (disposals5.adoption='Non-adoption')",
 'StatementType': 'DDL',
 'ResultConfiguration': {'OutputLocation': 's3://aws-athena-query-results-593291632749-eu-west-1/tables/fc757bec-35d9-4f2f-b7c9-9afc96ba0fbe'},
 'QueryExecutionCon

#### adopt_disposals5_non_adoption validation

In [82]:
adopt_disposals5_non_adoption_count = pydb.read_sql_query("select count(*) as count from fcsq.adopt_disposals5_non_adoption")
adopt_disposals5_non_adoption_count

Unnamed: 0,count
0,69187


## adopt_disposals5_non_orders table
<a name="adopt_disposals5_non_orders"></a>

### Drop the adopt_disposals5_non_orders table if it already exists and remove its data from the S3 bucket

In [83]:
drop_adopt_disposals5_non_orders = f"""
DROP TABLE IF EXISTS fcsq.adopt_disposals5_non_orders;
"""
pydb.start_query_execution_and_wait(drop_adopt_disposals5_non_orders)

# clean up previous adopt_disposals5_non_orders files
bucket.objects.filter(Prefix="fcsq_processing/Adoption/adopt_disposals5_non_orders/").delete()

[{'ResponseMetadata': {'RequestId': 'TMQJFDW3CZ949K7R',
   'HostId': 'lLtf3FxZKZAGXkBrqNETVKePXZSLmhUtpua3dQluzp7Qn8QnQ91QAU6YEAIcgJ2Hi2dQplCIj7o=',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'x-amz-id-2': 'lLtf3FxZKZAGXkBrqNETVKePXZSLmhUtpua3dQluzp7Qn8QnQ91QAU6YEAIcgJ2Hi2dQplCIj7o=',
    'x-amz-request-id': 'TMQJFDW3CZ949K7R',
    'date': 'Thu, 07 Apr 2022 10:03:02 GMT',
    'content-type': 'application/xml',
    'transfer-encoding': 'chunked',
    'server': 'AmazonS3',
    'connection': 'close'},
   'RetryAttempts': 0},
  'Deleted': [{'Key': 'fcsq_processing/Adoption/adopt_disposals5_non_orders/20211108_190459_00016_kukkn_e98cdffa-0a77-4436-a58b-6c4110eed64d',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'fdiXI4mCLLisq8DEVDue8BCRVUhUYBhT'}]}]

### Create the adopt_disposals5_non_orders table in Athena

In [84]:
create_adopt_disposals5_non_orders = f"""
CREATE TABLE IF NOT EXISTS fcsq.adopt_disposals5_non_orders
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/adopt_disposals5_non_orders') AS
SELECT disposals5.case_number, 
        disposals5.court, 
        disposals5.year, 
        disposals5.quarter, 
        disposals5.receipt_date, 
        disposals5.event_model,
        disposals5.adoption, 
        disposals5.adopter_2 
FROM fcsq.adopt_disposals5 disposals5
WHERE disposals5.adoption='No order made';
"""

pydb.start_query_execution_and_wait(create_adopt_disposals5_non_orders)



{'QueryExecutionId': '48a98c38-f1e6-4e86-8cae-383a973fd5fb',
 'Query': "CREATE TABLE IF NOT EXISTS fcsq.adopt_disposals5_non_orders\nWITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/adopt_disposals5_non_orders') AS\nSELECT disposals5.case_number, \n        disposals5.court, \n        disposals5.year, \n        disposals5.quarter, \n        disposals5.receipt_date, \n        disposals5.event_model,\n        disposals5.adoption, \n        disposals5.adopter_2 \nFROM fcsq.adopt_disposals5 disposals5\nWHERE disposals5.adoption='No order made'",
 'StatementType': 'DDL',
 'ResultConfiguration': {'OutputLocation': 's3://aws-athena-query-results-593291632749-eu-west-1/tables/48a98c38-f1e6-4e86-8cae-383a973fd5fb'},
 'QueryExecutionContext': {},
 'Status': {'State': 'SUCCEEDED',
  'SubmissionDateTime': datetime.datetime(2022, 4, 7, 10, 3, 3, 472000, tzinfo=tzlocal()),
  'CompletionDateTime': datetime.datetime(2022, 4, 7, 10, 3, 5, 696000, tzinfo=tzlo

#### adopt_disposals5_non_orders validation

In [85]:
adopt_disposals5_non_orders_count = pydb.read_sql_query("select count(*) as count from fcsq.adopt_disposals5_non_orders")
adopt_disposals5_non_orders_count

Unnamed: 0,count
0,6178


In [86]:
#Summary 4
summary4 = f"""
SELECT year, 
        quarter, 
        SUM(CASE WHEN adopter_2 ='Other or not stated' THEN 1 ELSE 0 END) other_or_not_stated,
        SUM(CASE WHEN adopter_2 ='Sole applicant' THEN 1 ELSE 0 END) sole_applicant,
        SUM(CASE WHEN adopter_2 ='Step parent' THEN 1 ELSE 0 END) step_parent,
        SUM(CASE WHEN adopter_2 ='mixed-sex couple' THEN 1 ELSE 0 END) mixed_sex_couple,
        SUM(CASE WHEN adopter_2 ='same-sex couple' THEN 1 ELSE 0 END) same_sex_couple,
        COUNT(*) adopter_2_total
from fcsq.adopt_disposals5_adoption
where year > 2010
GROUP BY year, 
          quarter
ORDER BY year,
          quarter;
"""

pydb.start_query_execution_and_wait(summary4)


{'QueryExecutionId': '47ec8edb-abad-4a2c-b5e0-57b11a0bf3a8',
 'Query': "SELECT year, \n        quarter, \n        SUM(CASE WHEN adopter_2 ='Other or not stated' THEN 1 ELSE 0 END) other_or_not_stated,\n        SUM(CASE WHEN adopter_2 ='Sole applicant' THEN 1 ELSE 0 END) sole_applicant,\n        SUM(CASE WHEN adopter_2 ='Step parent' THEN 1 ELSE 0 END) step_parent,\n        SUM(CASE WHEN adopter_2 ='mixed-sex couple' THEN 1 ELSE 0 END) mixed_sex_couple,\n        SUM(CASE WHEN adopter_2 ='same-sex couple' THEN 1 ELSE 0 END) same_sex_couple,\n        COUNT(*) adopter_2_total\nfrom fcsq.adopt_disposals5_adoption\nwhere year > 2010\nGROUP BY year, \n          quarter\nORDER BY year,\n          quarter",
 'StatementType': 'DML',
 'ResultConfiguration': {'OutputLocation': 's3://aws-athena-query-results-593291632749-eu-west-1/47ec8edb-abad-4a2c-b5e0-57b11a0bf3a8.csv'},
 'QueryExecutionContext': {},
 'Status': {'State': 'SUCCEEDED',
  'SubmissionDateTime': datetime.datetime(2022, 4, 7, 10, 3, 1

In [87]:
summary5 = f"""
SELECT year, 
        quarter, 
        COUNT (case_number) AS count_of_case

FROM fcsq.adopt_disposals5_adoption
GROUP BY year, quarter
ORDER BY year, quarter;
"""
pydb.start_query_execution_and_wait(summary5)

{'QueryExecutionId': '80d4845d-1cbe-4a3c-8889-6f60241365e1',
 'Query': 'SELECT year, \n        quarter, \n        COUNT (case_number) AS count_of_case\n\nFROM fcsq.adopt_disposals5_adoption\nGROUP BY year, quarter\nORDER BY year, quarter',
 'StatementType': 'DML',
 'ResultConfiguration': {'OutputLocation': 's3://aws-athena-query-results-593291632749-eu-west-1/80d4845d-1cbe-4a3c-8889-6f60241365e1.csv'},
 'QueryExecutionContext': {},
 'Status': {'State': 'SUCCEEDED',
  'SubmissionDateTime': datetime.datetime(2022, 4, 7, 10, 3, 17, 849000, tzinfo=tzlocal()),
  'CompletionDateTime': datetime.datetime(2022, 4, 7, 10, 3, 18, 510000, tzinfo=tzlocal())},
 'Statistics': {'EngineExecutionTimeInMillis': 497,
  'DataScannedInBytes': 291298,
  'TotalExecutionTimeInMillis': 661,
  'QueryQueueTimeInMillis': 96,
  'QueryPlanningTimeInMillis': 92,
  'ServiceProcessingTimeInMillis': 68},
 'WorkGroup': 'primary',
 'EngineVersion': {'SelectedEngineVersion': 'Athena engine version 2',
  'EffectiveEngineVer

In [90]:
summary6 = f"""
SELECT year, 
        quarter,
        SUM(CASE WHEN ch_sex='F' THEN 1 ELSE 0 END) f,
        SUM(CASE WHEN ch_sex='M' THEN 1 ELSE 0 END) m,
        SUM(CASE WHEN ch_sex='U' THEN 1 ELSE 0 END) u,
        COUNT(*) total
from fcsq.adopt_disposals5_adoption
WHERE year > 2010
GROUP BY year, quarter
ORDER BY year,quarter;
"""
pydb.read_sql_query(summary6)

Unnamed: 0,year,quarter,f,m,u,total
0,2011,1,538,539,12,1089
1,2011,2,611,600,0,1211
2,2011,3,583,584,2,1169
3,2011,4,630,604,0,1234
4,2012,1,585,624,1,1210
5,2012,2,623,633,0,1256
6,2012,3,663,679,0,1342
7,2012,4,736,715,0,1451
8,2013,1,668,666,3,1337
9,2013,2,823,807,0,1630


In [91]:
summary7 = f"""
SELECT year, 
        quarter,
        SUM(CASE WHEN age_band ='<1 year' THEN 1 ELSE 0 END) less_1_year,
        SUM(CASE WHEN age_band='1-4 years' THEN 1 ELSE 0 END) years_1_4,
        SUM(CASE WHEN age_band ='5-9 years' THEN 1 ELSE 0 END) years_5_9,
        SUM(CASE WHEN age_band ='10-14 years' THEN 1 ELSE 0 END) years_10_14,
        SUM(CASE WHEN age_band ='15-17 years' THEN 1 ELSE 0 END) year_15_17,
        SUM(CASE WHEN age_band = 'Other' THEN 1 ELSE 0 END) other,
        SUM(CASE WHEN age_band = 'Unknown' THEN 1 ELSE 0 END) unknown,
        COUNT(*) total
FROM fcsq.adopt_disposals5_adoption
WHERE year > 2010
GROUP BY year, quarter
ORDER BY year,quarter;

"""
pydb.read_sql_query(summary7)

Unnamed: 0,year,quarter,less_1_year,years_1_4,years_5_9,years_10_14,year_15_17,other,unknown,total
0,2011,1,26,685,247,85,31,3,12,1089
1,2011,2,20,777,268,120,22,4,0,1211
2,2011,3,23,671,304,129,36,4,2,1169
3,2011,4,21,790,255,121,42,5,0,1234
4,2012,1,33,745,285,98,45,3,1,1210
5,2012,2,25,777,288,124,35,7,0,1256
6,2012,3,20,834,323,122,39,4,0,1342
7,2012,4,43,928,344,96,37,3,0,1451
8,2013,1,22,895,309,76,27,5,3,1337
9,2013,2,53,1081,353,105,33,5,0,1630


In [92]:
summary8 = f"""
SELECT year, 
        quarter,
        SUM(CASE WHEN placement ='Other Order' THEN 1 ELSE 0 END) other_order,
        SUM(CASE WHEN placement ='Placement' THEN 1 ELSE 0 END) placement,
        COUNT(*) total
FROM  fcsq.adopt_disposals5_non_adoption
WHERE year>2010
GROUP BY year, quarter
ORDER BY year, quarter;
"""
pydb.read_sql_query(summary8)

Unnamed: 0,year,quarter,other_order,placement,total
0,2011,1,36,1204,1240
1,2011,2,49,1182,1231
2,2011,3,66,1307,1373
3,2011,4,81,1424,1505
4,2012,1,76,1472,1548
5,2012,2,75,1514,1589
6,2012,3,117,1738,1855
7,2012,4,98,1753,1851
8,2013,1,139,1520,1659
9,2013,2,190,1651,1841


In [93]:
summary9 = f"""
SELECT year,
        quarter, 
        COUNT(*) total_non_orders
FROM fcsq.adopt_disposals5_non_orders
WHERE year>2010
GROUP BY year, quarter
ORDER BY year, quarter;

"""
pydb.read_sql_query(summary9)

Unnamed: 0,year,quarter,total_non_orders
0,2011,1,44
1,2011,2,52
2,2011,3,50
3,2011,4,40
4,2012,1,70
5,2012,2,43
6,2012,3,61
7,2012,4,80
8,2013,1,100
9,2013,2,113


In [94]:
#Summary 10: of disposal case counts


create_adopt_disposal_cases = f"""
SELECT disposals5.case_number, 
        MIN(disposals5.receipt_date) AS minofreceipt_date 
FROM fcsq.adopt_disposals5 disposals5
GROUP BY disposals5.case_number;
"""

pydb.create_temp_table(create_adopt_disposal_cases, "adopt_disposal_cases")

create_adopt_disposal_cases_b = f"""
SELECT disposal_cases.minofreceipt_date,
        disposal_cases.case_number,
        cast(EXTRACT(YEAR FROM (disposal_cases.minofreceipt_date)) as varchar(3)) ||' - '||

        CASE WHEN EXTRACT(MONTH FROM (disposal_cases.minofreceipt_date))<4 THEN '1'
              WHEN EXTRACT(MONTH FROM (disposal_cases.minofreceipt_date))<7 THEN '2'
              WHEN EXTRACT(MONTH FROM (disposal_cases.minofreceipt_date))<10 THEN '3'
              ELSE '4'
              END AS quarter

FROM __temp__.adopt_disposal_cases disposal_cases;
"""
pydb.create_temp_table(create_adopt_disposal_cases_b, "adopt_disposal_cases_b")

create_adopt_disp_case_count_total ="""
SELECT quarter, 
        COUNT(case_number) as count_of_case
FROM __temp__.adopt_disposal_cases_b
GROUP BY  quarter;

"""
pydb.read_sql_query(create_adopt_disp_case_count_total)


Unnamed: 0,quarter,count_of_case
0,1998 - 4,1
1,2019 - 2,2485
2,2013 - 3,3596
3,2003 - 1,1133
4,2003 - 2,1414
...,...,...
86,2017 - 4,2592
87,,32
88,2020 - 4,2288
89,2008 - 4,183


## adopt_disposals5_2011 table
<a name="adopt_disposals5_2011"></a>

### Drop the adopt_disposals5_2011 table if it already exists and remove its data from the S3 bucket

In [95]:
drop_adopt_disposals5_2011 = f"""
DROP TABLE IF EXISTS fcsq.adopt_disposals5_2011;
"""
pydb.start_query_execution_and_wait(drop_adopt_disposals5_2011)

# clean up previous adopt_disposals5_2011 files
bucket.objects.filter(Prefix="fcsq_processing/Adoption/adopt_disposals5_2011/").delete()

[{'ResponseMetadata': {'RequestId': 'DFNERRH9T3TTG43R',
   'HostId': '0YGjR6am5NlaLh9B1zBnWSZyAYILZAkfM2fn/Wp6JSQ5f7mbnAPsen9r0KMNMVR2Bxrcy915ojs=',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'x-amz-id-2': '0YGjR6am5NlaLh9B1zBnWSZyAYILZAkfM2fn/Wp6JSQ5f7mbnAPsen9r0KMNMVR2Bxrcy915ojs=',
    'x-amz-request-id': 'DFNERRH9T3TTG43R',
    'date': 'Thu, 07 Apr 2022 10:08:39 GMT',
    'content-type': 'application/xml',
    'transfer-encoding': 'chunked',
    'server': 'AmazonS3',
    'connection': 'close'},
   'RetryAttempts': 0},
  'Deleted': [{'Key': 'fcsq_processing/Adoption/adopt_disposals5_2011/20211109_135140_00014_swnaz_7a8e0f6b-9114-4b92-8f18-3c27e5abd6da',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'RmLSiE3Wny8fSePw47y.Nv8tQIrHpkR8'},
   {'Key': 'fcsq_processing/Adoption/adopt_disposals5_2011/20211109_135140_00014_swnaz_893a71c8-a9df-4152-986f-59c8f28c13d6',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'o4IgICkKBofKE_ixnon7VFg4RR80eHA2'},
   {'Key': 'fcsq_proces

### Create the adopt_disposals5_2011 table in Athena

In [96]:
create_adopt_disposals5_2011 = f"""
CREATE TABLE IF NOT EXISTS fcsq.adopt_disposals5_2011
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/adopt_disposals5_2011') AS
SELECT t1.*
FROM fcsq.adopt_Disposals5 t1
WHERE year >2010;
"""

pydb.start_query_execution_and_wait(create_adopt_disposals5_2011)



{'QueryExecutionId': '0ab60fa7-15b9-41fd-8a8f-a1da33e4dfd8',
 'Query': "CREATE TABLE IF NOT EXISTS fcsq.adopt_disposals5_2011\nWITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/adopt_disposals5_2011') AS\nSELECT t1.*\nFROM fcsq.adopt_Disposals5 t1\nWHERE year >2010",
 'StatementType': 'DDL',
 'ResultConfiguration': {'OutputLocation': 's3://aws-athena-query-results-593291632749-eu-west-1/tables/0ab60fa7-15b9-41fd-8a8f-a1da33e4dfd8'},
 'QueryExecutionContext': {},
 'Status': {'State': 'SUCCEEDED',
  'SubmissionDateTime': datetime.datetime(2022, 4, 7, 10, 8, 41, 30000, tzinfo=tzlocal()),
  'CompletionDateTime': datetime.datetime(2022, 4, 7, 10, 8, 43, 147000, tzinfo=tzlocal())},
 'Statistics': {'EngineExecutionTimeInMillis': 1929,
  'DataScannedInBytes': 1711845,
  'DataManifestLocation': 's3://aws-athena-query-results-593291632749-eu-west-1/tables/0ab60fa7-15b9-41fd-8a8f-a1da33e4dfd8-manifest.csv',
  'TotalExecutionTimeInMillis': 2117,
  'Quer

#### adopt_disposals5_2011 validation

In [97]:
adopt_disposals5_2011_count = pydb.read_sql_query("select count(*) as count from fcsq.adopt_disposals5_2011")
adopt_disposals5_2011_count

Unnamed: 0,count
0,117623


In [98]:
"""
/* The next couple of queries are here to find the court level data for the 
court level/DFJ file. We do not use the below for FCSQ anymore - table names will be wrong*/

/*Query 18
Firstly the disposals court level file*/

/*

select year, 
        quarter, 
        court, 
        count(*) court_Total
from Adoptions_Disposals5_adoption
where year>2010
group by year, quarter, court
order by year, quarter, court;
     
/*Secondly the applications case level court level file*/

/*
drop table adopt_Disposal_case_count_A; 
Create table adopt_Disposal_case_count_A AS

SELECT t1.CASE_NUMBER, 
        Min(t1.Receipt_date) AS MinOfReceipt_date,
        t1.Court 
FROM adopt_Disposals5 t1 
GROUP BY t1.CASE_NUMBER, t1.Court;

drop table adopt_Disposal_case_count_B;
Create table adopt_Disposal_case_count_B AS

SELECT CASE_NUMBER, 
        extract (Year from (MinOfReceipt_date)) AS YEAR,

        CASE WHEN extract(Month from(MinOfReceipt_date))<4 THEN 1
              WHEN extract(Month from(MinOfReceipt_date))<7 THEN 2
              WHEN extract(Month from(MinOfReceipt_date))<10 THEN 3
              ELSE 4
              END AS Quarter,
        
        MinOfReceipt_date,
        Court 
        
FROM adopt_Disposal_case_count_A;
"""

'\n/* The next couple of queries are here to find the court level data for the \ncourt level/DFJ file. We do not use the below for FCSQ anymore - table names will be wrong*/\n\n/*Query 18\nFirstly the disposals court level file*/\n\n/*\n\nselect year, \n        quarter, \n        court, \n        count(*) court_Total\nfrom Adoptions_Disposals5_adoption\nwhere year>2010\ngroup by year, quarter, court\norder by year, quarter, court;\n     \n/*Secondly the applications case level court level file*/\n\n/*\ndrop table adopt_Disposal_case_count_A; \nCreate table adopt_Disposal_case_count_A AS\n\nSELECT t1.CASE_NUMBER, \n        Min(t1.Receipt_date) AS MinOfReceipt_date,\n        t1.Court \nFROM adopt_Disposals5 t1 \nGROUP BY t1.CASE_NUMBER, t1.Court;\n\ndrop table adopt_Disposal_case_count_B;\nCreate table adopt_Disposal_case_count_B AS\n\nSELECT CASE_NUMBER, \n        extract (Year from (MinOfReceipt_date)) AS YEAR,\n\n        CASE WHEN extract(Month from(MinOfReceipt_date))<4 THEN 1\n     

# Stage 4 - Now to update the country of birth lookup
<a name="Country of birth lookup update"></a>

In [99]:
#Create parquet file to add into athena for birth_country_lookup table
data = pd.read_csv('s3://alpha-family-data/fcsq_processing/Adoption/birth_country_lookup_csv/Adopt_birth_country_lookup.csv')
import os
os.environ['AWS_DEFAULT_REGION'] = 'eu-west-2'
import awswrangler as wr
wr.s3.to_parquet(  # Storing the data and metadata to Data Lake
    df=data,
    path="s3://alpha-family-data/fcsq_processing/Adoption/birth_country_lookup_csv/birth_country_lookup_parquet/",
    dataset=True, database = "default", table = "birth_country_lookup"
)



{'paths': ['s3://alpha-family-data/fcsq_processing/Adoption/birth_country_lookup_csv/birth_country_lookup_parquet/6e99137e307a475fbe560b3d0698e2b2.snappy.parquet'],
 'partitions_values': {}}

In [100]:
#Create parquet file to add into athena for event_model_desc table
data = pd.read_csv('s3://alpha-family-data/fcsq_processing/Adoption/event_model_desc_csv/Event_model_desc.csv')
import os
os.environ['AWS_DEFAULT_REGION'] = 'eu-west-2'
import awswrangler as wr
wr.s3.to_parquet(  # Storing the data and metadata to Data Lake
    df=data,
    path="s3://alpha-family-data/fcsq_processing/Adoption/event_model_desc_csv/Event_model_desc_parquet/",
    dataset=True, database = "default", table = "adoption_event_model_desc"
)


{'paths': ['s3://alpha-family-data/fcsq_processing/Adoption/event_model_desc_csv/Event_model_desc_parquet/c1f5ec052aa24d229d8bd82ead6d08af.snappy.parquet'],
 'partitions_values': {}}

## adopt_birth_country_unknown table
<a name="adopt_birth_country_unknown"></a>

### Create the adopt_birth_country_unknown temp table

In [101]:
create_adopt_birth_country_unknown = f"""
SELECT DISTINCT
        a.country_of_birth,
        b.nationality
FROM fcsq.adopt_disposals5_2011 a

LEFT JOIN fcsq.adopt_birth_country_lookup b
  ON a.country_of_birth = b.country_of_birth
  
WHERE a.country_of_birth IS NOT NULL
  AND b.nationality IS NULL;
"""

pydb.create_temp_table(create_adopt_birth_country_unknown,"adopt_birth_country_unknown")



#### adopt_birth_country_unknown validation

In [102]:
adopt_birth_country_unknown_count = pydb.read_sql_query("select count(*) as count from __temp__.adopt_birth_country_unknown")
adopt_birth_country_unknown_count

Unnamed: 0,count
0,4


## adopt_birth_country_added table
<a name="adopt_birth_country_added"></a>

### Create the adopt_birth_country_added temp table

In [103]:
create_adopt_birth_country_added = f"""
SELECT country_of_birth,
        CASE WHEN country_of_birth IN ('lithunia', 'republic of the philipines', 'united states of america (the city of new york)', 'morroco', 'saint lucia') /*update countries listed according to list in 'adopt_birth_country_unknown'*/
                THEN 2 /* For foreign countries only */
              /*WHEN country_of_birth in ('...')
                THEN 1*/ /*UK countries only - add to when necessary*/
              END AS nationality

FROM __temp__.adopt_birth_country_unknown;
"""

pydb.create_temp_table(create_adopt_birth_country_added,'adopt_birth_country_added')



#### adopt_birth_country_added validation

In [104]:
adopt_birth_country_added_count = pydb.read_sql_query("select count(*) as count from __temp__.adopt_birth_country_added")
adopt_birth_country_added_count

Unnamed: 0,count
0,4


## adopt_country_lookup_update table
<a name="adopt_country_lookup_update"></a>

### Create the adopt_country_lookup_update temp table

In [105]:
create_adopt_country_lookup_update = f"""
SELECT country_of_birth, nationality      /* To account for the columns being in a different order from the '_added' table */
FROM fcsq.adopt_birth_country_lookup

UNION
SELECT *
FROM __temp__.adopt_birth_country_added

ORDER BY country_of_birth;

"""

pydb.create_temp_table(create_adopt_country_lookup_update,'adopt_country_lookup_update')



#### adopt_country_lookup_update validation

In [106]:
adopt_country_lookup_update_count = pydb.read_sql_query("select count(*) as count from __temp__.adopt_country_lookup_update")
adopt_country_lookup_update_count

Unnamed: 0,count
0,599


## adopt_birth_country_lookup table
<a name="adopt_birth_country_lookup"></a>

### Drop the adopt_birth_country_lookup table if it already exists and remove its data from the S3 bucket

In [107]:
drop_adopt_birth_country_lookup = f"""
DROP TABLE IF EXISTS fcsq.adopt_birth_country_lookup;
"""
pydb.start_query_execution_and_wait(drop_adopt_birth_country_lookup)

# clean up previous adopt_birth_country_lookup files
bucket.objects.filter(Prefix="fcsq_processing/Adoption/adopt_birth_country_lookup/").delete()

[{'ResponseMetadata': {'RequestId': 'WZ1MZZ0ZZCPXEDDF',
   'HostId': '3ByYf72jV1eS4lcKitQjNNsai17Xi5r1eBFIOAj6uwpGw8EWSjyyIcPGctrwyBId4Diej4URSJ8=',
   'HTTPStatusCode': 200,
   'HTTPHeaders': {'x-amz-id-2': '3ByYf72jV1eS4lcKitQjNNsai17Xi5r1eBFIOAj6uwpGw8EWSjyyIcPGctrwyBId4Diej4URSJ8=',
    'x-amz-request-id': 'WZ1MZZ0ZZCPXEDDF',
    'date': 'Thu, 07 Apr 2022 10:09:32 GMT',
    'content-type': 'application/xml',
    'transfer-encoding': 'chunked',
    'server': 'AmazonS3',
    'connection': 'close'},
   'RetryAttempts': 0},
  'Deleted': [{'Key': 'fcsq_processing/Adoption/adopt_birth_country_lookup/20211118_104656_00011_yb9hc_bac4fedc-28d9-4367-bc63-8f499b3e5818',
    'DeleteMarker': True,
    'DeleteMarkerVersionId': 'hIVolRYM5hZuj68nC68icThRtNoZohbI'}]}]

### Update the adopt_birth_country_lookup table in Athena

In [108]:
create_adopt_birth_country_lookup = f"""
CREATE TABLE IF NOT EXISTS fcsq.adopt_birth_country_lookup
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/adopt_birth_country_lookup') AS
SELECT *
FROM __temp__.adopt_country_lookup_update;
"""

pydb.start_query_execution_and_wait(create_adopt_birth_country_lookup)



{'QueryExecutionId': '895cbf20-f5ee-4399-8f94-7bd4b657b2d7',
 'Query': "CREATE TABLE IF NOT EXISTS fcsq.adopt_birth_country_lookup\nWITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/adopt_birth_country_lookup') AS\nSELECT *\nFROM mojap_de_temp_alpha_user_thomasauburnmoj.adopt_country_lookup_update",
 'StatementType': 'DDL',
 'ResultConfiguration': {'OutputLocation': 's3://aws-athena-query-results-593291632749-eu-west-1/tables/895cbf20-f5ee-4399-8f94-7bd4b657b2d7'},
 'QueryExecutionContext': {},
 'Status': {'State': 'SUCCEEDED',
  'SubmissionDateTime': datetime.datetime(2022, 4, 7, 10, 9, 33, 247000, tzinfo=tzlocal()),
  'CompletionDateTime': datetime.datetime(2022, 4, 7, 10, 9, 34, 801000, tzinfo=tzlocal())},
 'Statistics': {'EngineExecutionTimeInMillis': 1402,
  'DataScannedInBytes': 8547,
  'DataManifestLocation': 's3://aws-athena-query-results-593291632749-eu-west-1/tables/895cbf20-f5ee-4399-8f94-7bd4b657b2d7-manifest.csv',
  'TotalExecut

#### adopt_birth_country_lookup validation

In [109]:
adopt_birth_country_lookup_count = pydb.read_sql_query("select count(*) as count from fcsq.adopt_birth_country_lookup")
adopt_birth_country_lookup_count

Unnamed: 0,count
0,599


# Stage 5 - Now to prepare the final output
<a name="Stage 5 - Preparing the final output"></a>

## adopt_adoptions_only_standard table
<a name="adopt_adoptions_only_standard"></a>

### Create the adopt_adoptions_only_standard temp table

In [110]:
create_adopt_adoptions_only_standard = f"""
SELECT type,
        year,
        quarter,
        application,
        order_type,
        adopter_3,
        COUNT(*) AS count
FROM(

SELECT
  'Application' as type,
  year,
  quarter,
  adoption_cases AS application, /*see where clause below*/
  'Standard' AS order_type,
  CASE WHEN adopter_2 IN ('','Other')
         THEN 'Other or not stated'
       WHEN adopter_2 = 'mixed-sex couple'    
         THEN 'M/F couple'
       WHEN adopter_2 = 'same-sex couple'
         THEN 'Same sex couple'
     ELSE adopter_2
    END AS adopter_3
FROM
  fcsq.adopt_apps_6_adoptions_only
WHERE
  standard = 1 /*and adoption_cases = 'Adoption'*//*Adopt+other and foreign use adoption, standard and convention use adoption cases (copied from excel process*/
)

GROUP BY
  type,
  year,
  quarter,
  application,
  order_type,
  adopter_3;
"""

pydb.create_temp_table(create_adopt_adoptions_only_standard,"adopt_adoptions_only_standard")



#### adopt_adoptions_only_standard validation

In [111]:
adopt_adoptions_only_standard_count = pydb.read_sql_query("select count(*) as count from __temp__.adopt_adoptions_only_standard")
adopt_adoptions_only_standard_count

Unnamed: 0,count
0,351


## adopt_adoption_only_convention table
<a name="adopt_adoption_only_convention"></a>

### Create the adopt_adoption_only_convention temp table

In [112]:
create_adopt_adoptions_only_convention = f"""
SELECT type,
        year,
        quarter,
        application,
        order_type,
        adopter_3,
        COUNT(*) AS count
FROM(
  
SELECT
  'Application' as type,
  year,
  quarter,
  adoption_cases AS application, /*see where clause below*/
  'Convention' AS order_type,
  CASE WHEN adopter_2 IN ('','Other')
         THEN 'Other or not stated'
       WHEN adopter_2 = 'mixed-sex couple'
         THEN 'M/F couple'
       WHEN adopter_2 = 'same-sex couple'
         THEN 'Same sex couple'
     ELSE adopter_2
    END AS adopter_3
FROM
  fcsq.adopt_apps_6_adoptions_only
WHERE
  convention = 1 /*and adoption_cases = 'Adoption'*//*Adopt+other and foreign use adoption, standard and convention use adoption cases (copied from excel process*/
)

GROUP BY
  type,
  year,
  quarter,
  application,
  order_type,
  adopter_3;
"""

pydb.create_temp_table(create_adopt_adoptions_only_convention,'adopt_adoptions_only_convention')

#### adopt_adoption_only_convention validation

In [113]:
pydb.read_sql_query("select count(*) as count from __temp__.adopt_adoptions_only_convention")


Unnamed: 0,count
0,92


## adopt_adoptions_only_foreign table
<a name="adopt_adoption_only_foreign"></a>

### Create the adopt_adoptions_only_foreign temp table

In [114]:
create_adopt_adoptions_only_foreign = f"""
SELECT type,
        year,
        quarter,
        application,
        order_type,
        adopter_3,
        COUNT(*) AS count
FROM(  
  
SELECT
  'Application' as type,
  year,
  quarter,
  adoption AS application, /*see where clause below*/
  'Foreign' AS order_type,
  CASE WHEN adopter_2 IN ('','Other')
         THEN 'Other or not stated'
       WHEN adopter_2 = 'mixed-sex couple'    
         THEN 'M/F couple'
       WHEN adopter_2 = 'same-sex couple'
         THEN 'Same sex couple'
     ELSE adopter_2
    END AS adopter_3
FROM
  fcsq.adopt_apps_6_adoptions_only
WHERE
  foreign = 1 and adoption = 'Adoption'/*Adopt+other and foreign use adoption, standard and convention use adoption cases (copied from excel process*/
)  
  
GROUP BY
  type,
  year,
  quarter,
  application,
  order_type,
  adopter_3;
"""

pydb.create_temp_table(create_adopt_adoptions_only_foreign,'adopt_adoptions_only_foreign')



#### adopt_adoptions_only_foreign validation

In [115]:
adopt_adoptions_only_foreign_count = pydb.read_sql_query("select count(*) as count from __temp__.adopt_adoptions_only_foreign")
adopt_adoptions_only_foreign_count

Unnamed: 0,count
0,139


## adopt_adoptions_other_only table
<a name="adopt_adoptions_other_only"></a>

### Create the adopt_adoptions_other_only temp table

In [116]:
create_adopt_adoptions_other_only = f"""
SELECT type,
        year,
        quarter,
        application,
        order_type,
        adopter_3,
        COUNT(*) AS COUNT
FROM(  
  
SELECT
  'Application' as type,
  year,
  quarter,
  adoption AS application, /*see where clause below*/
  'Foreign' AS order_type,
  CASE WHEN adopter_2 IN ('','Other')
         THEN 'Other or not stated'
       WHEN adopter_2 = 'mixed-sex couple'    
         THEN 'M/F couple'
       WHEN adopter_2 = 'same-sex couple'
         THEN 'Same sex couple'
     ELSE adopter_2
    END AS adopter_3
FROM
  fcsq.adopt_apps_6_adoptions_only
WHERE
  foreign = 1 AND adoption = 'Adoption+other'/*Adopt+other and foreign use adoption, standard and convention use adoption cases (copied from excel process*/
)

GROUP BY
        type,
        year,
        quarter,
        application,
        order_type,
        adopter_3;
"""

pydb.create_temp_table(create_adopt_adoptions_other_only,'adopt_adoptions_other_only')



#### adopt_adoptions_other_only validation

In [117]:
adopt_adoptions_other_only_count = pydb.read_sql_query("select count(*) as count from __temp__.adopt_adoptions_other_only")
adopt_adoptions_other_only_count

Unnamed: 0,count
0,19


## adopt_adoption_only table
<a name="adopt_adoption_only"></a>

### Create the adopt_adoption_only temp table

In [118]:
create_adopt_adoption_only = f"""
SELECT
  *
FROM
  __temp__.adopt_adoptions_only_standard
UNION ALL 
SELECT
  *
FROM
  __temp__.adopt_adoptions_only_convention
UNION ALL 
SELECT
  *
FROM
  __temp__.adopt_adoptions_only_foreign
UNION ALL 
SELECT
  *
FROM
  __temp__.adopt_adoptions_other_only;
"""

pydb.create_temp_table(create_adopt_adoption_only,'adopt_adoption_only')



#### adopt_adoption_only validation

In [119]:
adopt_adoption_only_count = pydb.read_sql_query("select count(*) as count from __temp__.adopt_adoption_only")
adopt_adoption_only_count

Unnamed: 0,count
0,601


## adopt_adoption_placement table
<a name="adopt_adoption_placement"></a>

### Create the adopt_adoption_placement temp table

In [120]:
create_adopt_adoption_placement = f"""
SELECT
  'Application' AS type,
  year,
  quarter,
  'Non-adoption' as application,
  'Placement' AS order_type,
  'n/a' AS adopter,
   placement AS count
FROM
 fcsq.adopt_apps_6_non_adoptions
WHERE
  placement > 0;

"""

pydb.create_temp_table(create_adopt_adoption_placement,'adopt_adoption_placement')



#### adopt_adoption_placement validation

In [121]:
adopt_adoption_placement_count = pydb.read_sql_query("select count(*) as count from __temp__.adopt_adoption_placement")
adopt_adoption_placement_count

Unnamed: 0,count
0,76376


## adopt_placement_revoke_or_vary table
<a name="adopt_placement_revoke_or_vary"></a>

### Create the adopt_placement_revoke_or_vary temp table

In [122]:
create_adopt_placement_revoke_or_vary = f"""
SELECT
  'Application' AS type,
  year,
  quarter,
  'Non-adoption' as application,
  'Placement_revoke_or_vary' AS order_type,
  'n/a' AS adopter,
  placement_revoke_or_vary AS count
FROM
  fcsq.adopt_apps_6_non_adoptions
WHERE
  placement_revoke_or_vary > 0;
"""

pydb.create_temp_table(create_adopt_placement_revoke_or_vary,'adopt_placement_revoke_or_vary')



#### adopt_placement_revoke_or_vary validation

In [123]:
adopt_placement_revoke_or_vary_count = pydb.read_sql_query("select count(*) as count from __temp__.adopt_placement_revoke_or_vary")
adopt_placement_revoke_or_vary_count

Unnamed: 0,count
0,8657


## adopt_adoption_contact_s26 table
<a name="adopt_adoption_contact_s26"></a>

### Create the adopt_adoption_contact_s26 temp table

In [124]:
create_adopt_adoption_contact_s26 = f"""
SELECT
  'Application' AS type,
  year,
  quarter,
  'Non-adoption' as application,
  'Contact_s26' AS order_type,
  'n/a' AS adopter,
  contact_s26 AS count
FROM
  fcsq.adopt_apps_6_non_adoptions
WHERE
  contact_s26 > 0;
"""

pydb.create_temp_table(create_adopt_adoption_contact_s26,'adopt_adoption_contact_s26')



#### adopt_adoption_contact_s26 validation

In [125]:
adopt_adoption_contact_s26_count = pydb.read_sql_query("select count(*) as count from __temp__.adopt_adoption_contact_s26")
adopt_adoption_contact_s26_count

Unnamed: 0,count
0,341


## adopt_contact_s26_revoke_vary table
<a name="adopt_contact_s26_revoke_vary"></a>

### Create the adopt_contact_s26_revoke_vary temp table

In [126]:
create_adopt_contact_s26_revoke_vary = f"""
SELECT
  'Application' AS type,
  year,
  quarter,
  'Non-adoption' as application,
  'Contact_s26_revoke_or_vary' AS order_type,
  'n/a' AS adopter,
  contact_s26_revoke_or_vary AS count
FROM
  fcsq.adopt_apps_6_non_adoptions
WHERE
  contact_s26_revoke_or_vary > 0;
"""

pydb.create_temp_table(create_adopt_contact_s26_revoke_vary,'adopt_contact_s26_revoke_vary')



#### adopt_contact_s26_revoke_vary validation

In [127]:
adopt_contact_s26_revoke_vary_count = pydb.read_sql_query("select count(*) as count from __temp__.adopt_contact_s26_revoke_vary")
adopt_contact_s26_revoke_vary_count

Unnamed: 0,count
0,46


## adopt_change_surname table
<a name="adopt_change_surname"></a>

### Create the adopt_change_surname temp table

In [128]:
create_adopt_change_surname = f"""
SELECT
  'Application' AS type,
  year,
  quarter,
  'Non-adoption' as application,
  'Change_surname' AS order_type,
  'n/a' AS adopter,
  change_surname AS count
FROM
  fcsq.adopt_apps_6_non_adoptions
WHERE
  change_surname > 0;
"""

pydb.create_temp_table(create_adopt_change_surname,'adopt_change_surname')



#### adopt_change_surname validation

In [129]:
adopt_change_surname_count = pydb.read_sql_query("select count(*) as count from __temp__.adopt_change_surname")
adopt_change_surname_count

Unnamed: 0,count
0,486


## adopt_remove_child_from_uk table
<a name="adopt_remove_child_from_uk"></a>

### Create the adopt_remove_child_from_uk temp table

In [130]:
create_adopt_remove_child_from_uk = f"""
SELECT
  'Application' AS type,
  year,
  quarter,
  'Non-adoption' as application,
  'Remove_child_from_UK' AS order_type,
  'n/a' AS adopter,
  remove_child_from_uk AS count
FROM
  fcsq.adopt_apps_6_non_adoptions
WHERE
  remove_child_from_uk > 0;
"""

pydb.create_temp_table(create_adopt_remove_child_from_uk,'adopt_remove_child_from_uk')



#### adopt_remove_child_from_uk validation

In [131]:
adopt_remove_child_from_uk_count = pydb.read_sql_query("select count(*) as count from __temp__.adopt_remove_child_from_uk")
adopt_remove_child_from_uk_count

Unnamed: 0,count
0,81


## adopt_other_order_type table
<a name="adopt_other_order_type"></a>

### Create the adopt_other_order_type temp table

In [132]:
create_adopt_other_order_type = f"""
SELECT
  'Application' AS type,
  year,
  quarter,
  'Non-adoption' as application,
  'Other_order_type' AS order_type,
  'n/a' AS adopter,
  other_order_type AS count
FROM
  fcsq.adopt_apps_6_non_adoptions
WHERE
  other_order_type > 0;
"""

pydb.create_temp_table(create_adopt_other_order_type,'adopt_other_order_type')



#### adopt_other_order_type validation

In [133]:
adopt_other_order_type_count = pydb.read_sql_query("select count(*) as count from __temp__.adopt_other_order_type")
adopt_other_order_type_count

Unnamed: 0,count
0,589


## adopt_non_adopt_types table
<a name="adopt_non_adopt_types"></a>

### Create the adopt_non_adopt_types temp table

In [134]:
create_adopt_non_adopt_types = f"""
SELECT
  *
FROM
  __temp__.adopt_adoption_placement
UNION ALL
SELECT
  *
FROM
  __temp__.adopt_placement_revoke_or_vary
UNION ALL
SELECT
  *
FROM
  __temp__.adopt_adoption_contact_s26
UNION ALL
SELECT
  *
FROM
  __temp__.adopt_contact_s26_revoke_vary
UNION ALL
SELECT
  *
FROM
  __temp__.adopt_change_surname
UNION ALL
SELECT
  *
FROM
  __temp__.adopt_remove_child_from_uk
UNION ALL
SELECT
  *
FROM
  __temp__.adopt_other_order_type;
"""

pydb.create_temp_table(create_adopt_non_adopt_types,'adopt_non_adopt_types')



#### adopt_non_adopt_types validation

In [135]:
adopt_non_adopt_types_count = pydb.read_sql_query("select count(*) as count from __temp__.adopt_non_adopt_types")
adopt_non_adopt_types_count

Unnamed: 0,count
0,86576


## adopt_non_adoptions table
<a name="adopt_non_adoptions"></a>

### Create the adopt_non_adoptions temp table

In [136]:
create_adopt_non_adoptions = f"""
SELECT
  type,
  year,
  quarter,
  application,
  order_type,
  adopter,
  SUM (count) AS count
FROM
  __temp__.adopt_non_adopt_types
GROUP BY
  type,
  year,
  quarter,
  application,
  order_type,
  adopter;
"""

pydb.create_temp_table(create_adopt_non_adoptions,'adopt_non_adoptions')



#### adopt_non_adoptions validation

In [137]:
adopt_non_adoptions_count = pydb.read_sql_query("select count(*) as count from __temp__.adopt_non_adoptions")
adopt_non_adoptions_count

Unnamed: 0,count
0,365


## adopt_applications table
<a name="adopt_applications"></a>

### Create the adopt_applications temp table

In [138]:
create_adopt_applications = f"""

SELECT
  type,
  year,
  quarter,
  application,
  order_type,
  adopter_3 AS adopter,
  'n/a' as adopted_child_sex,    
  'n/a' as adopted_child_age,
  count
FROM
  __temp__.adopt_adoption_only
UNION ALL
SELECT
  type,
  year,
  quarter,
  application,
  order_type,
  adopter,
  'n/a' as adopted_child_sex,    
  'n/a' as adopted_child_age,
  count
FROM
  __temp__.adopt_non_adoptions;

"""

pydb.create_temp_table(create_adopt_applications,'adopt_applications')



#### adopt_applications validation

In [139]:
adopt_applications_count = pydb.read_sql_query("select count(*) as count from __temp__.adopt_applications")
adopt_applications_count

Unnamed: 0,count
0,966


# Disposals

## adopt_disp_details table
<a name="adopt_disp_details"></a>

### Create the adopt_disp_details temp table

In [140]:
create_adopt_disp_details = f"""
SELECT
  a.*,
  CASE WHEN b.nationality = 2
    THEN 'Foreign'
    ELSE 'N'
  END AS foreign,
  c.event_model_desc
FROM
  fcsq.adopt_disposals5_2011 a
   LEFT JOIN fcsq.adopt_birth_country_lookup b
   ON a.country_of_birth = b.country_of_birth
   LEFT JOIN fcsq.adoption_event_model_desc c
    ON a.event_model = c.event_model;
"""

pydb.create_temp_table(create_adopt_disp_details,'adopt_disp_details')



#### adopt_disp_details validation

In [141]:
adopt_disp_details_count = pydb.read_sql_query("select count(*) as count from __temp__.adopt_disp_details")
adopt_disp_details_count

Unnamed: 0,count
0,235246


## adopt_disp_csv_data table
<a name="adopt_disp_csv_data"></a>

### Create the adopt_disp_csv_data temp table

In [142]:
create_adopt_disp_csv_data = f"""
SELECT
  CASE WHEN adoption IN ('Adoption', 'Non-adoption') THEN 'Order granted'
       WHEN adoption = 'No order made' THEN 'No order granted'
     ELSE 'Check'
    END AS type,
  year,
  quarter,
  adoption,
  CASE WHEN adoption = 'Adoption'
         AND event_model = 'A77'
      THEN 'Convention'
       WHEN adoption = 'Adoption'
         AND foreign = 'Foreign'
      THEN 'Foreign'
       WHEN adoption = 'Adoption'
      THEN 'Standard'
     ELSE type
    END AS type_of_adoption,
  event_model_desc,
  CASE WHEN adoption IN ('No order made','Non-adoption')
         THEN 'n/a'
       WHEN adopter_2 IN ('','Other')
         THEN 'Other or not stated'
       WHEN adopter_2 = 'mixed-sex couple'    
         THEN 'M/F couple'
       WHEN adopter_2 = 'same-sex couple'
         THEN 'Same sex couple'
     ELSE adopter_2
    END AS adopter,
  CASE WHEN adoption IN ('No order made','Non-adoption')
         THEN 'n/a'
       WHEN child_sex = 1 THEN 'Male'
       WHEN child_sex = 2 THEN 'Female'
       ELSE 'Unknown'
     END AS adopted_child_sex,
  CASE WHEN adoption IN ('No order made','Non-adoption')
         THEN 'n/a'
       WHEN age_band IN ('<1 year', '10-14 years', '1-4 years', '15-17 years', '5-9 years', 'Unknown')
        THEN age_band
      ELSE 'Other'
   END AS adopted_child_age
 FROM
   __temp__.adopt_disp_details;
"""

pydb.create_temp_table(create_adopt_disp_csv_data,'adopt_disp_csv_data')



#### adopt_disp_csv_data validation

In [143]:
adopt_disp_csv_data_count = pydb.read_sql_query("select count(*) as count from __temp__.adopt_disp_csv_data")
adopt_disp_csv_data_count

Unnamed: 0,count
0,235246


## adopt_disp_order_type table
<a name="adopt_disp_order_type"></a>

### Create the adopt_disp_order_type temp table

In [144]:
create_adopt_disp_order_type = f"""
SELECT
  a.*,
  CASE WHEN a.adoption IN ('Adoption', 'Non-adoption')
      THEN type_of_adoption
       WHEN a.adoption = 'No order made'
      THEN event_model_desc
     END AS order_type
FROM
  __temp__.adopt_disp_csv_data a;
"""

pydb.create_temp_table(create_adopt_disp_order_type,'adopt_disp_order_type')



#### adopt_disp_order_type validation

In [145]:
adopt_disp_order_type_count = pydb.read_sql_query("select count(*) as count from __temp__.adopt_disp_order_type")
adopt_disp_order_type_count

Unnamed: 0,count
0,235246


## adopt_disposals table
<a name="adopt_disposals"></a>

### Create the adopt_disposals temp table

In [146]:
create_adopt_disposals = f"""
SELECT
  type,
  year,
  quarter,
  adoption,
  order_type,
  adopter,
  adopted_child_sex,
  adopted_child_age,
  COUNT (*) AS count
FROM
  __temp__.adopt_disp_order_type
GROUP BY
  type,
  year,
  quarter,
  adoption,
  order_type,
  adopter,
  adopted_child_sex,
  adopted_child_age;
"""

pydb.create_temp_table(create_adopt_disposals,'adopt_disposals')



#### adopt_disposals validation

In [147]:
adopt_disposals_count = pydb.read_sql_query("select count(*) as count from __temp__.adopt_disposals")
adopt_disposals_count

Unnamed: 0,count
0,2986


# Cases

## adopt_application_case_count table
<a name="adopt_application_case_count"></a>

### Create the adopt_application_case_count temp table

In [148]:
adopt_application_case_count_a = f"""
SELECT case_number,
        court, 
        MIN(app_date) AS minofapp_date
FROM fcsq.adopt_application_4 
WHERE year > 2010
GROUP BY case_number,court
ORDER BY 1,2;
"""
pydb.create_temp_table(adopt_application_case_count_a,"adopt_application_case_count_a")

In [149]:
create_adopt_application_case_count_table= f"""
SELECT type,
        year,
        quarter,
        adoption,
        order_type,
        adopter,
        adopted_child_sex,
        adopted_child_age,
        COUNT(*) AS count
FROM(        

SELECT 
  'Cases started' AS type,
  EXTRACT(YEAR FROM minofapp_date) year,

  CASE WHEN  Minofapp_date is null THEN null
        WHEN EXTRACT(MONTH FROM minofapp_date) BETWEEN 1 AND 3 THEN 1
        WHEN EXTRACT(MONTH FROM minofapp_date) BETWEEN 4 AND 6 THEN 2
        WHEN EXTRACT(MONTH FROM minofapp_date) BETWEEN 7 AND 9 THEN 3
        WHEN EXTRACT(MONTH FROM minofapp_date) BETWEEN 10 AND 12 THEN 4
        END quarter,
  
  'n/a' AS adoption,
  'n/a' AS order_type,
  'n/a' AS adopter,
  'n/a' AS adopted_child_sex,
  'n/a' AS adopted_child_age
FROM 
  __temp__.adopt_application_case_count_a
)  
  
GROUP BY
  type,
  year,
  quarter,
  adoption,
  order_type,
  adopter,
  adopted_child_sex,
  adopted_child_age;
"""
#table1 = pydb.start_query_execution_and_wait(create_adopt_application_case_count)
pydb.create_temp_table(create_adopt_application_case_count_table,"adopt_application_case_count_table")


#### adopt_application_case_count validation

In [150]:
adopt_application_case_count_count = pydb.read_sql_query("select count(*) as count from __temp__.adopt_application_case_count_table")
adopt_application_case_count_count

Unnamed: 0,count
0,43


## adopt_disposal_case_count table
<a name="adopt_disposal_case_count"></a>

### Create the adopt_disposal_case_count temp table

In [151]:
create_adopt_disposal_case_count = f"""
SELECT type,
        year,
        quarter,
        adoption,
        order_type,
        adopter,
        adopted_child_sex,
        adopted_child_age,
        COUNT(*) AS count
FROM( 

SELECT 
  'Cases closed' as type,
  EXTRACT(YEAR FROM minofreceipt_date) year,

  CASE WHEN  minofreceipt_date IS NULL THEN NULL
        WHEN EXTRACT(Month FROM minofreceipt_date) BETWEEN 1 AND 3 THEN 1
        WHEN EXTRACT(Month FROM minofreceipt_date) BETWEEN 4 AND 6 THEN 2
        WHEN EXTRACT(Month FROM minofreceipt_date) BETWEEN 7 AND 9 THEN 3
        WHEN EXTRACT(Month FROM minofreceipt_date) BETWEEN 10 AND 12 THEN 4
        END quarter,
  'n/a' AS adoption,
  'n/a' AS order_type,
  'n/a' AS adopter,
  'n/a' AS adopted_child_sex,
  'n/a' AS adopted_child_age
FROM 
  __temp__.adopt_disposal_cases
WHERE
  (EXTRACT(YEAR FROM minofreceipt_date)) > 2010
)  
  
GROUP BY
  type,
  year,
  quarter,
  adoption,
  order_type,
  adopter,
  adopted_child_sex,
  adopted_child_age;
"""

pydb.create_temp_table(create_adopt_disposal_case_count,'adopt_disposal_case_count')



#### adopt_disposal_case_count validation

In [152]:
adopt_disposal_case_count_count = pydb.read_sql_query("select count(*) as count from __temp__.adopt_disposal_case_count")
adopt_disposal_case_count_count

Unnamed: 0,count
0,43


## adopt_csv table
<a name="adopt_csv"></a>

### Create the adopt_csv temp table

In [153]:
create_adopt_csv = f"""
SELECT
  *
FROM
  __temp__.adopt_applications
UNION ALL
SELECT
  *
FROM
  __temp__.adopt_disposals
UNION ALL
SELECT
  *
FROM
  __temp__.adopt_application_case_count_table
UNION ALL
SELECT
  *
FROM
  __temp__.adopt_disposal_case_count;
"""

pydb.create_temp_table(create_adopt_csv,'adopt_csv')



In [154]:
pydb.read_sql_query("SELECT distinct type from __temp__.adopt_csv")

Unnamed: 0,type
0,Cases closed
1,Order granted
2,Cases started
3,Application
4,No order granted


#### adopt_csv validation

In [155]:
pydb.read_sql_query("select count(*) as count from __temp__.adopt_csv")

Unnamed: 0,count
0,4038


## adopt_csv2 table
<a name="adopt_csv2"></a>

### Create the adopt_csv2 table csv

In [159]:
create_adopt_csv2 = f"""
SELECT
  *
FROM
  __temp__.adopt_csv
WHERE
  year > 2010
  AND NOT (year = 2021 AND quarter = 3)
ORDER BY
  type,
  application,
  order_type,
  year,
  quarter,
  adopter,
  adopted_child_sex,
  adopted_child_age;
"""
temp_table = pydb.create_temp_table(create_adopt_csv2,'adopt_csv2')
table = pydb.read_sql_query(create_adopt_csv2)
table.to_csv(path_or_buf = '~/FCSQ_data/adopt_csv2.csv')
table

Unnamed: 0,type,year,quarter,application,order_type,adopter,adopted_child_sex,adopted_child_age,count
0,Application,2011,1,Adoption,Convention,M/F couple,,,5
1,Application,2011,1,Adoption,Convention,Sole applicant,,,3
2,Application,2011,2,Adoption,Convention,M/F couple,,,3
3,Application,2011,3,Adoption,Convention,M/F couple,,,1
4,Application,2011,4,Adoption,Convention,M/F couple,,,1
...,...,...,...,...,...,...,...,...,...
3652,Order granted,2016,3,Non-adoption,Remove_child_from_UK,,,,2
3653,Order granted,2016,4,Non-adoption,Remove_child_from_UK,,,,4
3654,Order granted,2017,3,Non-adoption,Remove_child_from_UK,,,,4
3655,Order granted,2017,4,Non-adoption,Remove_child_from_UK,,,,2


In [160]:
adopt_csv2_count = pydb.read_sql_query("select count(*) as count from __temp__.adopt_csv2")
adopt_csv2_count

Unnamed: 0,count
0,3657
