In [1]:
import pandas as pd  # for the data structures to store and manipulate tables
import pydbtools as pydb  # see https://github.com/moj-analytical-services/pydbtools
import boto3  # for working with AWS

# few things for viewing dataframes better
pd.set_option("display.max_columns", 100)
pd.set_option("display.width", 900)
pd.set_option("display.max_colwidth", 200)

database = "familyman_dev_v2"
snapshot_date = "2021-08-19"
database_derived = "familyman_derived_dev_v1"

fcsq_database = "fcsq"

s3 = boto3.resource("s3")
bucket = s3.Bucket("alpha-family-data")

In [4]:
create_adopt_case_data_v1 = f"""
SELECT T1.YEAR,
            T1.QUARTER,
            T1.COURT,
            T1.CASE_NUMBER,
            T1.APP_TYPE,
            T1.CASE_APP_TYPE,
            T1.ADOPTION,
            T1.HIGH_COURT,
            T1.CONTESTED,
            T1.NUMBER_APPLICANTS,
            T1.ADOPTER_TYPE,
            date_format(T1.APP_DATE,'%d-%m-%Y') AS APP_DATE2
    FROM fcsq.adopt_apps_6_adoptions_only AS t1
    ORDER BY case_number, app_date2, court;
    
"""

create_adopt_case_data_v2 = f"""
SELECT *,(case when row_number() over (partition by case_number order by 
        APP_DATE2) = 1 then 1 else 0 end) as case_number_id
FROM __temp__.adopt_case_data_v1
"""

create_adopt_case_data_v3 = f"""
SELECT *
FROM __temp__.adopt_case_data_v2
where case_number_id = 1 and year > 2010;
"""
pydb.create_temp_table(create_adopt_case_data_v1,'adopt_case_data_v1')

pydb.create_temp_table(create_adopt_case_data_v2,'adopt_case_data_v2')
pydb.create_temp_table(create_adopt_case_data_v3,'adopt_case_data_v3')


## Applicant_Info table

### Create the Applicant_Info table in Athena

In [3]:
create_Adopt_Applicant_Info = f"""
SELECT DISTINCT
   {database}.roles.ROLE, 
   {database}.roles.REPRESENTATIVE_ROLE, 
   {database}.roles.ROLE_MODEL, 
   {database}.roles.PARTY, 
   {database}.roles.CASE_NUMBER, 
   {database}.parties.PERSON_GIVEN_FIRST_NAME, 
   {database}.parties.PERSON_FAMILY_NAME, 
   {database}.parties.COMPANY, 
   {database}.addresses.POSTCODE, 
   {database}.parties.GENDER, 
   {database}.roles.DELETE_FLAG
FROM 
  ({database}.roles INNER JOIN {database}.parties ON {database}.roles.PARTY = {database}.parties.PARTY) 
  LEFT JOIN {database}.addresses ON {database}.roles.ADDRESS = {database}.addresses.ADDRESS
WHERE 
    (((({database}.roles.ROLE_MODEL)= 'APLZ') AND (({database}.roles.DELETE_FLAG)= 'N')) 
    OR ((({database}.roles.ROLE_MODEL)= 'APLA') AND (({database}.roles.DELETE_FLAG)= 'N'))
    OR ((({database}.roles.ROLE_MODEL)= 'APLC') AND (({database}.roles.DELETE_FLAG)= 'N')))
    AND {database}.roles.mojap_snapshot_date = date '{snapshot_date}'
    AND {database}.parties.mojap_snapshot_date = date '{snapshot_date}'
    AND {database}.addresses.mojap_snapshot_date = date '{snapshot_date}';
"""

pydb.create_temp_table(create_Adopt_Applicant_Info,'Adopt_Applicant_Info')



#### Applicant_Info validation

In [2]:
Adopt_Applicant_Info_count = pydb.read_sql_query("select count(*) from __temp__.Adopt_Applicant_Info")
Adopt_Applicant_Info_count

Unnamed: 0,_col0
0,1986081


## adopt_respondent_info table

### Create the adopt_respondent_info table in Athena

In [5]:
create_adopt_respondent_info = f"""
CREATE TABLE IF NOT EXISTS __temp__.adopt_respondent_info
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/__temp___processing/Adoption/adopt_respondent_info') AS
SELECT DISTINCT
  {database}.roles.ROLE, 
  {database}.roles.REPRESENTATIVE_ROLE, 
  {database}.roles.ROLE_MODEL, 
  {database}.roles.PARTY, 
  {database}.roles.CASE_NUMBER, 
  {database}.parties.GENDER, 
  {database}.addresses.POSTCODE, 
  {database}.roles.DELETE_FLAG
FROM 
  ({database}.roles INNER JOIN {database}.parties ON {database}.roles.PARTY = {database}.parties.PARTY) 
    LEFT JOIN {database}.addresses ON {database}.roles.ADDRESS = {database}.addresses.ADDRESS
WHERE 
    ((({database}.roles.ROLE_MODEL)='RSPA') AND (({database}.roles.DELETE_FLAG)='N')) 
    OR ((({database}.roles.ROLE_MODEL)='RSPZ') AND (({database}.roles.DELETE_FLAG)='N'))
    OR ((({database}.roles.ROLE_MODEL)='RSPC') AND (({database}.roles.DELETE_FLAG)='N'))
    AND {database}.roles.mojap_snapshot_date = date '{snapshot_date}'
    AND {database}.parties.mojap_snapshot_date = date '{snapshot_date}'
    AND {database}.addresses.mojap_snapshot_date = date '{snapshot_date}';
"""

pydb.read_sql_query(create_adopt_respondent_info)



#### adopt_respondent_info validation

In [3]:
adopt_respondent_info_count = pydb.read_sql_query("select count(*) as count from __temp__.adopt_respondent_info")
adopt_respondent_info_count

Unnamed: 0,count
0,2382225


## applicants 3 table

In [7]:
create_adopt_applicants_1 = f"""
SELECT 	T1.role,
    T1.representative_role,
    T1.role_model,
    T1.party,
    T1.case_number,
    T1.gender,
    case when cast(gender as varchar(1)) = '1' then 'Male'
    when cast(gender as varchar(1)) = '2' then 'Female'
    else 'Unknown' end as Gender_Decode

    FROM __temp__.adopt_applicant_info AS t1
    ORDER BY t1.Case_Number;
"""
#pydb.read_sql_query(create_adopt_applicants_1)
pydb.create_temp_table(create_adopt_applicants_1,'adopt_applicants_1')



create_adopt_applicants_2 = f"""
SELECT DISTINCT 
    T1.case_number,
    T1.party,
    max(T1.representative_role) as Rep_Role,
    max(T1.gender_decode) as Gender_Max
    from __temp__.adopt_applicants_1 AS t1
    group by Case_number, party;
"""

pydb.create_temp_table(create_adopt_applicants_2,'adopt_applicants_2')
#pydb.read_sql_query(create_adopt_applicants_2)


create_adopt_applicants_3= f"""
SELECT t1.case_number,
    t1.party as App_Party_ID,
    t1.Rep_Role,
    t1.Gender_Max,
    case when t1.Rep_Role IS NULL then 'N'
    when t1.Rep_Role IS NOT NULL then 'Y'
    End as REPRESENTATION,
    case when Rep_Role IS NULL AND Gender_Max = 'Female' then 'Unrep_Female'
    when Rep_Role IS NULL AND Gender_Max = 'Male' then 'Unrep_Male'
    when Rep_Role IS NULL AND Gender_Max = 'Unknown' then 'Unrep_Unknown'
    when Rep_Role IS NOT NULL AND Gender_Max = 'Female' then 'Rep_Female'
    when Rep_Role IS NOT NULL AND Gender_Max = 'Male' then 'Rep_Male'
    when Rep_Role IS NOT NULL AND Gender_Max = 'Unknown' then 'Rep_Unknown'
    else '' end as App_Rep_Cat
    
    from __temp__.adopt_applicants_2 AS t1;


"""
#pydb.read_sql_query(create_adopt_applicants_3)
pydb.create_temp_table(create_adopt_applicants_3,'adopt_applicants_3')


In [8]:
pydb.read_sql_query("SELECT count(*) as count from __temp__.adopt_applicants_3")

Unnamed: 0,count
0,1986062


In [9]:
create_adopt_respondents_1 = f"""
SELECT T1.role,
    T1.representative_role,
    T1.role_model,
    T1.party,
    T1.case_number,
    T1.gender,
    case when cast(gender as varchar(1)) = '1' then 'Male'
    when cast(gender as varchar(1)) = '2' then 'Female'
    else 'Unknown' end as Gender_Decode

    FROM __temp__.adopt_respondent_info AS t1
    ORDER BY t1.Case_Number;
"""
#pydb.read_sql_query(create_adopt_respondents_1)
pydb.create_temp_table(create_adopt_respondents_1,'adopt_respondents_1')



create_adopt_respondents_2 = f"""
    SELECT DISTINCT T1.case_number,
        T1.party,
        max(T1.representative_role) as Rep_Role,
        max(T1.gender_decode) as Gender_Max
    from __temp__.adopt_respondents_1 AS t1
    group by Case_number, party;
"""

pydb.create_temp_table(create_adopt_respondents_2,'adopt_respondents_2')
#pydb.read_sql_query(create_adopt_respondents_2)


create_adopt_respondents_3= f"""
SELECT t1.case_number,
    t1.party as Resp_Party_ID,
    t1.Rep_Role,
    t1.Gender_Max,
    case when t1.Rep_Role IS NULL then 'N'
    when t1.Rep_Role IS NOT NULL then 'Y'
    End as REPRESENTATION,
    case when Rep_Role IS NULL AND Gender_Max = 'Female' then 'Unrep_Female'
    when Rep_Role IS NULL AND Gender_Max = 'Male' then 'Unrep_Male'
    when Rep_Role IS NULL AND Gender_Max = 'Unknown' then 'Unrep_Unknown'
    when Rep_Role IS NOT NULL AND Gender_Max = 'Female' then 'Rep_Female'
    when Rep_Role IS NOT NULL AND Gender_Max = 'Male' then 'Rep_Male'
    when Rep_Role IS NOT NULL AND Gender_Max = 'Unknown' then 'Rep_Unknown'
    else '' end as Resp_Rep_Cat
    
    from __temp__.adopt_respondents_2 AS t1;


"""
#pydb.read_sql_query(create_adopt_respondents_3)
pydb.create_temp_table(create_adopt_respondents_3,'adopt_respondents_3')

In [5]:
adopt_app_rep_final = f"""
SELECT t1.YEAR, 
    t1.QUARTER,
    t1.CASE_NUMBER, 
    t1.Court,
    t2.App_Party_ID,
    t2.Representation,
    t2.Gender_Max as App_Gender,
    t2.App_Rep_Cat          
FROM __temp__.ADOPT_CASE_DATA_v3 t1
    LEFT JOIN __temp__.ADOPT_APPLICANTS_3 t2 ON (t1.CASE_NUMBER = t2.CASE_NUMBER);

"""

pydb.create_temp_table(adopt_app_rep_final,'adopt_app_rep_final')


In [6]:
adopt_app_rep_final_check = "SELECT COUNT(*) as Count from __temp__.adopt_app_rep_final"
pydb.read_sql_query(adopt_app_rep_final_check)

Unnamed: 0,Count
0,101789


In [7]:
adopt_resp_rep_final = f"""
   SELECT t1.YEAR, 
        t1.QUARTER,
        t1.CASE_NUMBER, 
        t1.Court,
          t2.Resp_Party_ID,
          t2.Representation,
          t2.Gender_Max as Resp_Gender,
          t2.Resp_Rep_Cat
          
      FROM __temp__.ADOPT_CASE_DATA_v3 t1
           LEFT JOIN __temp__.ADOPT_RESPONDENTS_3 t2 ON (t1.CASE_NUMBER = t2.CASE_NUMBER);
"""

pydb.create_temp_table(adopt_resp_rep_final,'adopt_resp_rep_final')

In [8]:
adopt_resp_rep_final_check = "SELECT COUNT(*) as Count from __temp__.adopt_resp_rep_final"
pydb.read_sql_query(adopt_resp_rep_final_check)

Unnamed: 0,Count
0,101199


## Adopt_Hearing_Events table

### Create the Adopt_Hearing_Events table in Athena

In [12]:
create_Adopt_Hearing_Events = f"""
SELECT {database}.hearings.EVENT,
  {database}.hearings.VACATED_FLAG,
  {database}.hearings.HEARING_TYPE,
  {database}.hearings.HEARING_DATE,
  {database}.events.RECEIPT_DATE,
  {database}.events.ERROR,
  {database}.events.CASE_NUMBER,
  {database}.events.EVENT_MODEL
FROM {database}.hearings
INNER JOIN {database}.events
ON {database}.hearings.EVENT            = {database}.events.EVENT
WHERE {database}.hearings.VACATED_FLAG IS NULL
AND {database}.events.ERROR             = 'N'
AND HEARING_DATE > date_parse('31-12-2009 00:00:00', '%d-%m-%Y %H:%i:%s')
AND (substring(case_number,5,1)='A' OR substring(case_number,5,1)='Z' OR substring(case_number,5,1)='C')
AND {database}.hearings.mojap_snapshot_date = date '{snapshot_date}' and {database}.events.mojap_snapshot_date = date '{snapshot_date}';
"""

pydb.create_temp_table(create_Adopt_Hearing_Events,'Adopt_Hearing_Events')



#### Adopt_Hearing_Events validation

In [13]:
Adopt_Hearing_Events_count = pydb.read_sql_query("select count(*) as count from __temp__.Adopt_Hearing_Events")
Adopt_Hearing_Events_count

Unnamed: 0,count
0,1974912


## Adopt_Hearings_Cases table

### Create the Adopt_Hearings_Cases table in Athena

In [14]:
"""
Equivalent to Hearings_Adopt_V3
"""

create_Adopt_Hearings_Cases = f"""
CREATE TABLE IF NOT EXISTS __temp__.Adopt_Hearings_Cases
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/__temp___processing/Adoption/Adopt_Hearings_Cases') AS
select t1.case_number,
    t1.error,
    t1.event,
    t1.event_model,
    t1.hearing_date,
    t1.hearing_type,
    t1.receipt_date,
    t1.vacated_flag,
    substring(Case_Number,5,1) AS Case_Type
    from __temp__.Adopt_Hearing_Events AS t1
    where t1.event_model in ('A8', 'A90', 'A91', 'G60')
    order by t1.case_number, t1.receipt_date;
"""

pydb.read_sql_query(create_Adopt_Hearings_Cases)

create_Adopt_Hearings_Cases_v2 = f"""
SELECT *,
(case when row_number() over (partition by Case_Number order by receipt_date) = 1 then 1 else 0 end) as Case_Number_ID
FROM __temp__.Adopt_Hearings_Cases
"""

pydb.create_temp_table(create_Adopt_Hearings_Cases_v2,'adopt_hearings_cases_v2')
pydb.read_sql_query(create_Adopt_Hearings_Cases_v2)


Unnamed: 0,case_number,error,event,event_model,hearing_date,hearing_type,receipt_date,vacated_flag,case_type,Case_Number_ID
0,AB17Z00008,N,10200182554,G60,2017-08-17,First directions appt,2017-07-13,,Z,1
1,AB17Z00008,N,10200182673,A91,2017-08-17,,2017-07-28,,Z,0
2,AB17Z00008,N,10200182674,A91,2017-08-17,,2017-07-28,,Z,0
3,AB17Z00008,N,10200182672,A91,2017-08-17,,2017-07-28,,Z,0
4,AB17Z00008,N,10200188056,G60,2017-09-19,Directions,2017-09-08,,Z,0
...,...,...,...,...,...,...,...,...,...,...
323510,ZW21Z00039,N,33200806477,G60,2021-09-15,Other,2021-07-02,,Z,0
323511,ZW21Z00050,N,33200815465,G60,2021-07-28,Other,2021-06-30,,Z,1
323512,ZW21Z00050,N,33200815468,G60,2021-09-22,Full,2021-06-30,,Z,0
323513,ZW21Z00053,N,33200811280,G60,2021-11-09,Other,2021-07-14,,Z,1


#### Adopt_Hearings_Cases validation

In [15]:
Adopt_Hearings_Cases_count = pydb.read_sql_query("select count(*) as count from __temp__.Adopt_Hearings_Cases")
Adopt_Hearings_Cases_count

Unnamed: 0,count
0,323515


## Hearing_Adopt_Applicants table

### Create the Hearing_Adopt_Applicants table in Athena

In [17]:
create_Hearing_Adopt_Applicants = f"""
SELECT t1.*,
t2.Case_Number_ID AS Hearing_Count
FROM __temp__.ADOPT_APP_REP_FINAL t1
LEFT JOIN __temp__.Adopt_Hearings_Cases_v2 t2 ON (t1.CASE_NUMBER = t2.CASE_NUMBER)
where t2.Case_Number_ID > 0;
"""

pydb.create_temp_table(create_Hearing_Adopt_Applicants,'Hearing_Adopt_Applicants')



#### Hearing_Adopt_Applicants validation

In [18]:
Hearing_Adopt_Applicants_count = pydb.read_sql_query("select count(*) as count from __temp__.Hearing_Adopt_Applicants")
Hearing_Adopt_Applicants_count

Unnamed: 0,count
0,95784


## Hearing_Adopt_Respondents table

### Create the Hearing_Adopt_Respondents table in Athena

In [19]:
create_Hearing_Adopt_Respondents = f"""
SELECT t1.*,
    t2.Case_Number_ID AS Hearing_Count
    FROM __temp__.ADOPT_RESP_REP_FINAL t1
    LEFT JOIN __temp__.Adopt_Hearings_Cases_v2 t2 ON (t1.CASE_NUMBER = t2.CASE_NUMBER)
    where t2.Case_Number_ID > 0;
"""

pydb.create_temp_table(create_Hearing_Adopt_Respondents,'Hearing_Adopt_Respondents')



#### Hearing_Adopt_Respondents validation

In [20]:
Hearing_Adopt_Respondents_count = pydb.read_sql_query("select count(*) as count from __temp__.Hearing_Adopt_Respondents")
Hearing_Adopt_Respondents_count

Unnamed: 0,count
0,94991


## Adopt_App table

### Create the Adopt_App table in Athena

In [21]:
create_Adopt_App = f"""
CREATE TABLE IF NOT EXISTS __temp__.Adopt_App
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/__temp___processing/Adoption/Adopt_App') AS
SELECT
  'Adoption' AS CASE_TYPE,
  Year,
  Quarter,
  'Party' AS Category,
  'Applicant' AS PARTY,
   App_Gender AS Gender,
  Representation,
  Count (*) AS Count
FROM
  __temp__.HEARING_ADOPT_APPLICANTS
WHERE 
  Representation <> '' /*A very small number of cases from 2011/12 look into whether these should be recoded as N (gender is also blank)*/
GROUP BY
  'Adoption',
  Year,
  Quarter,
  'Party',
  'Applicant',
  App_Gender,
  Representation;
"""

pydb.read_sql_query(create_Adopt_App)



#### Adopt_App validation

In [22]:
Adopt_App_count = pydb.read_sql_query("select count(*) as count from __temp__.Adopt_App")
Adopt_App_count

Unnamed: 0,count
0,221


## Adopt_resp table

### Create the Adopt_resp table in Athena

In [23]:
create_Adopt_resp = f"""
CREATE TABLE IF NOT EXISTS __temp__.Adopt_resp
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/__temp___processing/Adoption/Adopt_resp') AS
SELECT
  'Adoption' AS CASE_TYPE,
  Year,
  Quarter,
  'Party' AS Category,
  'Respondent' AS PARTY,
  Resp_Gender AS Gender,
  Representation,
  Count (*) AS Count
FROM
  __temp__.HEARING_ADOPT_RESPONDENTS
WHERE 
  Representation <> '' /*A very small number of cases from 2011/12 look into whether these should be recoded as N (gender is also blank)*/
GROUP BY
  'Adoption',
  Year,
  Quarter,
  'Party',
  'Applicant',
  Resp_Gender,
  Representation;
"""

pydb.read_sql_query(create_Adopt_resp)



#### Adopt_resp validation

In [24]:
Adopt_resp_count = pydb.read_sql_query("select count(*) as count from __temp__.Adopt_resp")
Adopt_resp_count

Unnamed: 0,count
0,235


## adopt_case table

### Create the adopt_case table in Athena

In [25]:
create_adopt_case = f"""
CREATE TABLE IF NOT EXISTS __temp__.Adopt_case
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/__temp___processing/Adoption/Adopt_case') AS
SELECT *,
    Count(*) as Count FROM
    (SELECT
        'Adoption' AS CASE_TYPE,
        Year,
        Quarter,
        'Cases' AS Category,
        'N/A' AS PARTY,
        'N/A' AS Gender,
        'N/A' AS Representation
    FROM
      __temp__.adopt_case_data_v3)
GROUP BY
  CASE_TYPE,
  Year,
  Quarter,
  Category,
  PARTY,
  Gender,
  Representation;
"""

pydb.read_sql_query(create_adopt_case)



#### adopt_case validation

In [26]:
adopt_case_count = pydb.read_sql_query("select count(*) as count from __temp__.adopt_case")
adopt_case_count

Unnamed: 0,count
0,43


## Adopt_Case_Hearings table

### Create the Adopt_Case_Hearings table in Athena

In [27]:
create_hearing_adopt_case =f"""
SELECT DISTINCT Year, Quarter, Case_Number
FROM __temp__.HEARING_ADOPT_Applicants;
"""

pydb.create_temp_table(create_hearing_adopt_case,'hearing_adopt_case')



create_Adopt_Case_Hearings = f"""
CREATE TABLE IF NOT EXISTS __temp__.Adopt_Case_Hearings
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/__temp___processing/Adoption/Adopt_Case_Hearings') AS
SELECT *, Count(*) as Count FROM
    (SELECT
      'Adoption' AS CASE_TYPE,
      Year,
      Quarter,
      'Cases with a hearing' AS Category,
      'N/A' AS PARTY,
      'N/A' AS Gender,
      'N/A' AS Representation
    FROM
      __temp__.Hearing_ADOPT_Case)
GROUP BY
  CASE_TYPE,
  Year,
  Quarter,
  Category,
  PARTY,
  Gender,
  Representation;
"""

pydb.read_sql_query(create_Adopt_Case_Hearings)



#### Adopt_Case_Hearings validation

In [28]:
Adopt_Case_Hearings_count = pydb.read_sql_query("select count(*) as count from __temp__.Adopt_Case_Hearings")
Adopt_Case_Hearings_count

Unnamed: 0,count
0,43


## Adoption table

### Create the Adoption table in Athena

In [29]:
create_Adoption = f"""
SELECT
  *
FROM
 __temp__.ADOPT_APP
UNION ALL
SELECT
  *
FROM
  __temp__.ADOPT_RESP
UNION ALL
SELECT
  *
FROM
  __temp__.ADOPT_CASE
UNION ALL
SELECT
  *
FROM
  __temp__.ADOPT_CASE_HEARINGS;
"""

pydb.create_temp_table(create_Adoption,'Adoption')



#### Adoption validation

In [59]:
Adoption_count = pydb.read_sql_query("select count(*) from __temp__.Adoption")
Adoption_count

Unnamed: 0,_col0
0,542


## Applicant_representation table

### Create the Applicant_representation table in Athena

In [31]:
create_applicants_1 = f"""
SELECT Distinct t1.Case_Number, t1.Party, MAX(t1.Representative_Role) as Max_Rep_Role
FROM __temp__.Adopt_Applicant_Info t1
Group by Case_Number, Party;
"""

pydb.create_temp_table(create_applicants_1,'applicants_1')

create_applicants_2 = f"""
SELECT  t1.*,
case when Max_Rep_Role IS NULL then 0
else 1
end as Rep_IND
FROM __temp__.Applicants_1 t1;
"""
pydb.create_temp_table(create_applicants_2,'applicants_2')

create_applicants_3 = f"""
SELECT Distinct t1.Case_Number,
Count(t1.Party) as CountOfParty,
SUM(t1.Rep_Ind) as SumOfRep_IND
FROM __temp__.Applicants_2 t1
Group by Case_Number;
"""
pydb.create_temp_table(create_applicants_3,'applicants_3')

create_Applicant_representation = f"""
CREATE TABLE IF NOT EXISTS __temp__.Applicant_representation
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/__temp___processing/Adoption/Applicant_representation') AS
SELECT t1.Case_Number,
t1.CountOfParty,
t1.SumOfRep_IND,
CASE WHEN t1.SumOfRep_Ind > t1.CountOfParty then 'Error'
WHEN t1.SumOfRep_Ind = t1.CountOfParty then 'All'
WHEN t1.SumOfRep_Ind =0 then 'None' else 'Some'  end as App_Rep_Cat
FROM __temp__.Applicants_3 t1;
"""

pydb.read_sql_query(create_Applicant_representation)



#### Applicant_representation validation

In [7]:
Applicant_representation_count = pydb.read_sql_query("select count(*) as count from __temp__.Applicant_representation")
Applicant_representation_count

Unnamed: 0,case_number,countofparty,sumofrep_ind,app_rep_cat
0,FD10Z00385,1,1,All
1,MT14Z00153,1,1,All
2,RG20Z00118,2,0,
3,WI99A00414,1,1,All
4,PO00A00641,2,2,All
...,...,...,...,...
178489,LS20Z00237,2,0,
178490,MA07A00316,2,0,
178491,AP12Z00031,2,0,
178492,GU01A00218,2,2,All


## Respondent_Representation table

### Create the Respondent_Representation table in Athena

In [32]:
create_respondents_1 = f"""
SELECT Distinct t1.Case_Number, t1.Party, MAX(t1.Representative_Role) as Max_Rep_Role
FROM __temp__.Adopt_Respondent_Info t1
Group by Case_Number, Party;
"""

pydb.create_temp_table(create_respondents_1,'respondents_1')

create_respondents_2 = f"""
SELECT  t1.*,
case when Max_Rep_Role IS NULL then 0
else 1
end as Rep_IND
FROM __temp__.respondents_1 t1;
"""
pydb.create_temp_table(create_respondents_2,'respondents_2')

create_respondents_3 = f"""
SELECT Distinct t1.Case_Number,
Count(t1.Party) as CountOfParty,
SUM(t1.Rep_Ind) as SumOfRep_IND
FROM __temp__.respondents_2 t1
Group by Case_Number;
"""
pydb.create_temp_table(create_respondents_3,'respondents_3')

create_Respondent_Representation = f"""
SELECT t1.Case_Number,
t1.CountOfParty,
t1.SumOfRep_IND,
CASE WHEN t1.SumOfRep_Ind > t1.CountOfParty then 'Error'
WHEN t1.SumOfRep_Ind = t1.CountOfParty then 'All'
WHEN t1.SumOfRep_Ind =0 then 'None' else 'Some'  end as Res_Rep_Cat
FROM __temp__.Respondents_3 t1
"""

pydb.create_temp_table(create_Respondent_Representation,'Respondent_Representation')



#### Respondent_Representation validation

In [33]:
Respondent_Representation_count = pydb.read_sql_query("select count(*) as count from __temp__.Respondent_Representation")
Respondent_Representation_count

Unnamed: 0,count
0,1853090


## Adopt_Disposals_Final table

In [35]:
create_ADOPT_APP_AND_ORDERS_WITH_REP = f"""
SELECT t1.*,
t2.APP_REP_CAT, 
t3.RES_REP_CAT
FROM fcsq.ADOPT_APPS_AND_ORDERS_MATCH AS t1
LEFT JOIN __temp__.Applicant_Representation AS t2 ON (t1.CASE_NUMBER = t2.CASE_NUMBER)
LEFT JOIN __temp__.RESPONDENT_REPRESENTATION as t3 ON t1.CASE_NUMBER = t3.CASE_NUMBER;
"""
pydb.create_temp_table(create_ADOPT_APP_AND_ORDERS_WITH_REP,'ADOPT_APP_AND_ORDERS_WITH_REP')

In [36]:
pydb.read_sql_query("SELECT Count(*) as COUNT from __temp__.adopt_app_and_orders_with_rep")

Unnamed: 0,COUNT
0,147875


In [37]:
create_Adopt_Disposals_Final = f"""
SELECT t1.*,
cast(t1.Year as varchar(3)) || '-Q' || cast(t1.quarter as varchar(3)) AS Quarter2, 
case when (t1.APP_REP_CAT Is Null Or t1.RES_REP_CAT Is Null)  then '5 Unknown'
    when t1.APP_REP_CAT='None' and t1.RES_REP_CAT ='None' then '4 Neither'
    when t1.APP_REP_CAT='None' and t1.RES_REP_CAT != 'None' then '3 Respondent Only'
    when t1.APP_REP_CAT != 'None' and t1.RES_REP_CAT = 'None' then '2 Applicant Only'
Else '1 Both'
End AS REP_CAT

FROM __temp__.ADOPT_APP_AND_ORDERS_WITH_REP AS t1 LEFT JOIN (SELECT * FROM {database}.courts_mv WHERE {database}.courts_mv.mojap_snapshot_date = date '{snapshot_date}')
AS t2 
ON t1.DSP_COURT = cast(t2.Code as varchar(3));

"""

pydb.create_temp_table(create_Adopt_Disposals_Final,'Adopt_Disposals_Final')

"""
Missed out these columns for now:

t2.Region_Pre2014, 
t2.Region,
Case when t1.YEAR < 2014 then t2.Region_Pre2014
Else t2.Region
End As Final_Region

"""

create_Adopt_Disposals_Final_2 = f"""
SELECT *
FROM __temp__.ADOPT_DISPOSALS_FINAL
WHERE adoption = 'Adoption';
"""

pydb.create_temp_table(create_Adopt_Disposals_Final_2,'Adopt_Disposals_Final_2')

#### Adopt_Disposals_Final validation

In [38]:
Adopt_Disposals_Final_count = pydb.read_sql_query("select count(*) as count from __temp__.Adopt_Disposals_Final_2")
Adopt_Disposals_Final_count

Unnamed: 0,count
0,82005


## Adopt_Quarterly table

### Create the Adopt_Quarterly table in Athena

In [39]:
create_Adopt_Quarterly = f"""
SELECT DISTINCT
        'Adoption' as type,
        year,
        'Q' || cast(quarter as varchar(3)) AS quarter,
        rep_cat,
        count(*) as n,
        avg(wait_weeks) as mean
    FROM 
        __temp__.ADOPT_DISPOSALS_FINAL_2
    WHERE year > 2010 
    AND quarter2 <> '2021-Q4'
    AND adoption='Adoption'
GROUP BY
    year,
    quarter,
    rep_cat
    
UNION ALL
SELECT DISTINCT
        'Adoption' as type,
        year,
        'Q' || cast(quarter as varchar(3)) AS quarter,
        'All' as rep_cat,
        count(*) as n,
        avg(wait_weeks) as mean
    FROM 
        __temp__.ADOPT_DISPOSALS_FINAL_2
    WHERE year > 2010 
    AND quarter2 <> '2021-Q4'
    AND adoption='Adoption'
GROUP BY
    year,
    quarter
"""


pydb.create_temp_table(create_Adopt_Quarterly,'Adopt_Quarterly')




#### Adopt_Quarterly validation

In [40]:
Adopt_Quarterly_count = pydb.read_sql_query("select count(*) as count from __temp__.Adopt_Quarterly")
Adopt_Quarterly_count

Unnamed: 0,count
0,258


## Adopt_Annual table

### Create the Adopt_Annual table in Athena

In [42]:
create_Adopt_Annual = f"""
SELECT DISTINCT
        'Adoption' as type,
        year,
        'N/A' as quarter,
        rep_cat,
        count(*) as n,
        avg(wait_weeks) as mean
    FROM 
        __temp__.ADOPT_DISPOSALS_FINAL_2
    WHERE year > 2010 
    AND year < 2021
    AND adoption='Adoption'
GROUP BY
    year,
    rep_cat
    
UNION ALL
SELECT DISTINCT
        'Adoption' as type,
        year,
        'N/A' as quarter,
        'All' as rep_cat,
        count(*) as n,
        avg(wait_weeks) as mean
    FROM 
        __temp__.ADOPT_DISPOSALS_FINAL_2
    WHERE year > 2010 
    AND year < 2021
    AND adoption='Adoption'
GROUP BY
    year
"""

pydb.create_temp_table(create_Adopt_Annual,'Adopt_Annual')



#### Adopt_Annual validation

In [43]:
Adopt_Annual_count = pydb.read_sql_query("select count(*) as count from __temp__.Adopt_Annual")
Adopt_Annual_count



Unnamed: 0,count
0,60


In [44]:
df = pydb.read_sql_query("SELECT * FROM __temp__.Adoption")
#df.to_csv(path_or_buf = 's3://alpha-family-data/CSVs/Adoption_legrep.csv',index=False)
df.to_csv('CAlegreptest.csv')


## adopt_timeliness_combined table

### Create the adopt_timeliness_combined table in Athena

In [46]:
create_adopt_timeliness_combined = f"""
SELECT * FROM __temp__.Adopt_Annual 
UNION ALL 
SELECT * FROM __temp__.Adopt_Quarterly ORDER BY type,year,quarter,rep_cat
"""

pydb.create_temp_table(create_adopt_timeliness_combined,'adopt_timeliness_combined')



#### adopt_timeliness_combined validation

In [48]:
adopt_timeliness_combined_count = pydb.read_sql_query("select count(*) as count from __temp__.adopt_timeliness_combined")
adopt_timeliness_combined_count

Unnamed: 0,count
0,318


In [58]:
df = pydb.read_sql_query("SELECT * FROM __temp__.adopt_timeliness_combined")
column_order = ['n','mean']
df = df.pivot_table(index=['type','year','quarter'],columns=['rep_cat'],values = ['n','mean'],aggfunc=sum, fill_value=0).swaplevel(axis=1).sort_index(axis=1)
df

Unnamed: 0_level_0,Unnamed: 1_level_0,rep_cat,1 Both,1 Both,2 Applicant Only,2 Applicant Only,3 Respondent Only,3 Respondent Only,4 Neither,4 Neither,5 Unknown,5 Unknown,All,All
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,n,mean,n,mean,n,mean,n,mean,n,mean,n
type,year,quarter,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2
Adoption,2011,,28.405941,101,18.853755,253,18.583333,180,15.042532,4091,14.797688,173,15.648812,4798
Adoption,2011,Q1,30.909091,22,21.619048,63,20.225,40,15.474576,944,14.319149,47,16.247312,1116
Adoption,2011,Q2,35.125,16,18.923077,52,16.181818,44,14.240331,1086,14.947368,38,14.798544,1236
Adoption,2011,Q3,25.392857,28,16.2,65,19.590909,44,15.320888,991,13.931034,58,15.697302,1186
Adoption,2011,Q4,26.171429,35,18.780822,73,18.5,52,15.217757,1070,17.033333,30,15.907143,1260
Adoption,2012,,29.315789,76,22.883621,232,18.552795,161,13.916424,4798,18.52381,105,14.750558,5372
Adoption,2012,Q1,26.071429,14,21.688525,61,19.692308,39,14.20841,1094,17.08,25,14.94485,1233
Adoption,2012,Q2,29.733333,15,25.888889,63,17.589744,39,14.015929,1130,20.205882,34,15.056987,1281
Adoption,2012,Q3,33.0,23,20.745098,51,21.459459,37,14.152979,1242,18.84,25,14.992743,1378
Adoption,2012,Q4,27.416667,24,22.754386,57,16.065217,46,13.371622,1332,17.142857,21,14.097973,1480
