In [None]:
# Adoption extraction tables

In [None]:
## Import packages and set options

In [None]:
import pandas as pd #for the data structures to store and manipulate tables
import pydbtools as pydb # see https://github.com/moj-analytical-services/pydbtools
import boto3 #for working with AWS

# few things for viewing dataframes better
pd.set_option('display.max_columns', 100)
pd.set_option('display.width', 900)
pd.set_option('display.max_colwidth', 200)

In [None]:
## Define some variables to be used throughout the notebook

In [None]:
database = "familyman_dev_v2"
snapshot_date = "2021-08-19"
database_derived = "familyman_derived_dev_v1"

fcsq_database = "fcsq"

s3 = boto3.resource('s3')
bucket = s3.Bucket('alpha-family-data')

In [None]:
## Drop the adoptions_parties table if it already exists and remove its data from the S3 bucket

In [None]:
drop_adoptions_parties = f"""
DROP TABLE IF EXISTS fcsq.ADOPTIONS_PARTIES;
"""
pydb.read_sql_query(drop_adoptions_parties)

#clean up previous adoptions_parties files
bucket.objects.filter(Prefix="fcsq_processing/Adoption/Adoptions_Parties/").delete()

In [None]:
## Create the adoptions_parties table in Athena

In [None]:
create_adoptions_parties = f"""
CREATE TABLE IF NOT EXISTS fcsq.ADOPTIONS_PARTIES
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/Adoptions_Parties') AS
SELECT R.CASE_NUMBER,
  R.ROLE,
  R.ROLE_MODEL,
  R.REPRESENTATIVE_ROLE AS Representative,
  F.FIELD_MODEL,
  F.VALUE AS Rel_to_child,
  R.PARTY,
  P.DOB,
  P.GENDER,
  P.MOJAP_SNAPSHOT_DATE
FROM {database}.parties P
INNER JOIN {database}.roles R
ON P.PARTY = R.PARTY
INNER JOIN {database}.role_fields F
ON R.ROLE               = F.ROLE
WHERE ((R.ROLE_MODEL     = 'APLZ'
AND F.FIELD_MODEL = 'APLZ_RTC')
OR (R.ROLE_MODEL        = 'CHLDZ'
AND F.FIELD_MODEL = 'CHLDZ_CP'))
AND R.mojap_snapshot_date = date '{snapshot_date}'
AND F.mojap_snapshot_date = date '{snapshot_date}'
AND P.mojap_snapshot_date = date '{snapshot_date}'
"""

pydb.read_sql_query(create_adoptions_parties)

# Query the temporary table just created. The database to query is called __temp__, this is
# an alias for a sandbox database that is created for each user. For more details, see
# the pydbtools docs

temp_table = pydb.read_sql_query("select count(*) as count from fcsq.adoptions_parties")
temp_table

In [None]:
## Drop the adoptions_applications_u21 table if it already exists and remove its data from the S3 bucket

In [None]:
drop_adoptions_applications_u21 = "DROP TABLE IF EXISTS fcsq.ADOPTIONS_APPLICATIONS_U21;"
pydb.read_sql_query(drop_adoptions_applications_u21)

bucket.objects.filter(Prefix="fcsq_processing/Adoption/Adoptions_applications_u21/").delete()

In [None]:
## Create the adoptions_applications_u21 table in Athena

In [None]:
create_adoptions_applications_u21 = f"""
CREATE TABLE IF NOT EXISTS fcsq.ADOPTIONS_APPLICATIONS_U21
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/Adoptions_applications_u21') AS
SELECT E.CASE_NUMBER,
  E.EVENT,
  E.RECEIPT_DATE,
  E.ENTRY_DATE,
  CASE E.RECEIPT_DATE
      WHEN NULL THEN E.ENTRY_DATE
      ELSE E.RECEIPT_DATE
  END AS Receipt_date2,
  E.EVENT_MODEL,
  F.VALUE  AS App_Type,
  date_parse(F1.VALUE, '%Y-%m-%d') AS Date_of_issue,
  F2.VALUE AS High_court
FROM {database}.events E
INNER JOIN {database}.event_fields F2
ON E.EVENT = F2.EVENT
INNER JOIN {database}.event_fields F1
ON E.EVENT = F1.EVENT
INNER JOIN {database}.event_fields F
ON E.EVENT               = F.EVENT
WHERE (E.EVENT_MODEL      = 'U21'
AND F.FIELD_MODEL  = 'U21_1'
AND F1.FIELD_MODEL = 'U21_2'
AND F2.FIELD_MODEL = 'U21_HC');
"""

"""
Changes made to original code (not including variable name changes):

Changed decode statement to case statement as decode is not supported
Changed TO_DATE to date_parse as TO_DATE not supported
(date_parse currently gives time as well, I will look to change this)


"""


pydb.read_sql_query(create_adoptions_applications_u21)


In [None]:
## Drop the adoptions_applications_G50 table if it already exists and remove its data from the S3 bucket

In [None]:
drop_adoptions_applications_G50 = "DROP TABLE IF EXISTS fcsq.ADOPTIONS_APPLICATIONS_G50;"
pydb.read_sql_query(drop_adoptions_applications_G50)

bucket.objects.filter(Prefix="fcsq_processing/Adoption/Adoptions_applications_G50/").delete()

In [None]:
## Create the adoptions_applications_G50 table in Athena

In [None]:
create_adoptions_applications_G50 = f"""
CREATE TABLE IF NOT EXISTS fcsq.ADOPTIONS_APPLICATIONS_G50
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/Adoptions_applications_G50') AS
SELECT
  E.CASE_NUMBER,
  E.EVENT,
  E.RECEIPT_DATE,
  E.ENTRY_DATE,
  CASE E.RECEIPT_DATE
      WHEN NULL THEN E.ENTRY_DATE
      ELSE E.RECEIPT_DATE
  END AS Receipt_date2,
  E.EVENT_MODEL,
  CASE WHEN F.VALUE like '%RUK%' and F.VALUE like '%CCS%' THEN 'RUK,CCS'
       WHEN F.VALUE like '%RUK%' THEN 'RUK'
       WHEN F.VALUE like '%CCS%' THEN 'CCS'
        END AS App_Type, /*Have done it like this so that we do not include non adoption app types*/
  CASE E.RECEIPT_DATE
      WHEN NULL THEN E.ENTRY_DATE
      ELSE E.RECEIPT_DATE
  END AS Date_of_issue,
  'N' AS High_court
FROM
  {database}.events E
  INNER JOIN {database}.event_fields F
    ON E.EVENT = F.EVENT
WHERE
  F.FIELD_MODEL = 'G50_AT'
  AND (F.VALUE like '%RUK%'
   OR F.VALUE like '%CCS%');
"""

pydb.read_sql_query(create_adoptions_applications_G50)

In [None]:
## Drop the adoptions_applications table if it already exists and remove its data from the S3 bucket

In [None]:
drop_adoptions_applications = "DROP TABLE IF EXISTS fcsq.ADOPTIONS_APPLICATIONS;"
pydb.read_sql_query(drop_adoptions_applications)

bucket.objects.filter(Prefix="fcsq_processing/Adoption/Adoptions_applications/").delete()

In [None]:
## Create the adoptions_applications table in Athena

In [None]:
create_adoptions_applications = f"""
CREATE TABLE IF NOT EXISTS fcsq.ADOPTIONS_APPLICATIONS
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/Adoptions_applications') AS
SELECT
  *
FROM
  fcsq.ADOPTIONS_APPLICATIONS_U21
WHERE case_number <> 'CV11Z00105'
UNION ALL
SELECT
  *
FROM
  fcsq.ADOPTIONS_APPLICATIONS_G50
WHERE case_number <> 'CV11Z00105';
"""

pydb.read_sql_query(create_adoptions_applications)

In [None]:
## Drop the adoptions_cases table if it already exists and remove its data from the S3 bucket

In [None]:
drop_adoptions_cases = "DROP TABLE IF EXISTS fcsq.ADOPTIONS_CASES;"
pydb.read_sql_query(drop_adoptions_cases)

bucket.objects.filter(Prefix="fcsq_processing/Adoption/Adoptions_cases/").delete()

In [None]:
## Create the adoptions_cases table in Athena

In [None]:
create_adoptions_cases = f"""
CREATE TABLE IF NOT EXISTS fcsq.ADOPTIONS_CASES
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/Adoptions_cases') AS
SELECT F.CASE_NUMBER,
  F.VALUE AS Contested,
  F1.VALUE AS Case_issue_date,
  F2.VALUE AS Case_app_type,
  CASE F.CASE_NUMBER
      WHEN 'BM08Z09028' THEN '2008-09-19'
      ELSE F1.VALUE
  END AS Case_issue_date2
FROM {database}.case_fields F
INNER JOIN {database}.case_fields F2
ON F.CASE_NUMBER = F2.CASE_NUMBER
INNER JOIN {database}.case_fields F1
ON F.CASE_NUMBER    = F1.CASE_NUMBER
WHERE F.FIELD_MODEL = 'FM3A_AC'
AND F1.FIELD_MODEL  = 'FM3A_DOI'
AND F2.FIELD_MODEL  = 'FM3A_AT'
AND F.case_number <> 'CV11Z00105';
"""
pydb.read_sql_query(create_adoptions_cases)

In [None]:
## Drop the adoptions_disposals table if it already exists and remove its data from the S3 bucket

In [None]:
drop_adoptions_disposals = "DROP TABLE IF EXISTS fcsq.ADOPTIONS_DISPOSALS;"
pydb.read_sql_query(drop_adoptions_disposals)

bucket.objects.filter(Prefix="fcsq_processing/Adoption/Adoptions_disposals/").delete()

In [None]:
## Create the adoptions_disposals table in Athena

In [None]:
create_adoptions_disposals = f"""
CREATE TABLE IF NOT EXISTS fcsq.ADOPTIONS_DISPOSALS
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/Adoptions_disposals') AS
SELECT E.CASE_NUMBER,
  E.EVENT,
  E.RECEIPT_DATE,
  E.ENTRY_DATE,
  CASE E.RECEIPT_DATE
      WHEN NULL THEN E.ENTRY_DATE
      ELSE E.RECEIPT_DATE  
  END AS Receipt_date2,
  E.EVENT_MODEL
FROM {database}.events E
WHERE ((E.EVENT_MODEL IN ('A70', 'A71', 'A72', 'A73', 'A74', 'A75', 'A76', 'A77', 'A78', 'A79', 'A80', 'A81', 'A12', 'A13', 'A15')
AND E.ERROR          = 'N')
OR (E.EVENT_MODEL    IN ('G63', 'ORDREF', 'ORDNOM')
AND E.ERROR          = 'N')
OR (E.EVENT_MODEL    IN ('A12', 'A13', 'A15')
AND E.ERROR          = 'N'))
AND E.case_number <> 'CV11Z00105';
"""
pydb.read_sql_query(create_adoptions_disposals)

In [None]:
## Drop the adoptions_disposal_fields table if it already exists and remove its data from the S3 bucket

In [None]:
drop_adoptions_disposal_fields = "DROP TABLE IF EXISTS fcsq.ADOPTIONS_DISPOSALS_FIELDS;"
pydb.read_sql_query(drop_adoptions_disposal_fields)

#bucket.objects.filter(Prefix="fcsq_processing/Adoption/Adoptions_disposal_fields/").delete()

In [None]:
## Create the adoptions_disposal_fields table in Athena

In [None]:
create_adoptions_disposal_fields = f"""
CREATE TABLE IF NOT EXISTS fcsq.ADOPTIONS_DISPOSAL_FIELDS
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Adoption/Adoptions_disposal_fields') AS
SELECT EVENT,
FIELD_MODEL,
VALUE
FROM {database}.event_fields
WHERE (FIELD_MODEL LIKE 'A1%'
  OR FIELD_MODEL LIKE 'A7%'
  OR FIELD_MODEL IN ('G63_1', 'ORDNOM_5', 'ORDREF_5', 'A80_4', 'A81_5'));
"""
pydb.read_sql_query(create_adoptions_disposal_fields)