# SAS extraction & Manipulation


In [None]:
# libraries
import pandas as pd
import pydbtools as pydb
import boto3
from datetime import datetime
#import re

In [None]:
# Structured  dataframes
pd.set_option("display.max_columns", 100)
pd.set_option("display.width", 900)
pd.set_option("display.max_colwidth", 200)

In [None]:
# Assigning databases and S3 bucket
db1 = "familyman_dev_v2"
fcsq_db = "fcsq"
s3 = boto3.resource("s3")
bucket = s3.Bucket("alpha-family-data")
snapshot_date = "2021-08-19"

# Query 1.1:
# ORD_EVENTS_ALL
created as temp table

In [None]:

ss = f"""
SELECT event,
    case_number,
    event_model,
    receipt_date,
    entry_date,
    Event_error,
    creating_court,
    Court_code,
    field_model,
    value
from fcsq.CH_ACT_ORDS_EVENTS_2
UNION ALL
SELECT event,
    case_number,
    event_model,
    receipt_date,
    entry_date,
    Event_error,
    creating_court,
    Court_code,  
    null as field_model,
    null as value
from fcsq.CH_ACT_ORDS_EVENTS_1
"""

pydb.create_temp_table(ss ,"ORD_EVENTS_ALL")

In [None]:
df = pydb.read_sql_query("select * from __temp__.ORD_EVENTS_ALL")
df.head()

In [None]:
# row counting
counter = pydb.read_sql_query("select count(*) as count from __temp__.ORD_EVENTS_ALL")
counter

# Query 1.2:
# ORD_EVENTS_WITH_EXTRA_INFO

In [None]:
# In_respect_of_which_children = CH_ACT_CHILDREN_INV
# Consent_data = CH_CONSENT_INF
# Code= Court_code

dd = f"""
SELECT a.EVENT,                                       
       a.CASE_NUMBER, 
       a.EVENT_MODEL, 
       a.RECEIPT_DATE, 
       a.ENTRY_DATE, 
       a.CREATING_COURT, 
       a.Court_code, 
       a.FIELD_MODEL AS EVENT_FIELD_MODEL, 
       a.VALUE  AS EVENT_VALUE, 
       b.FIELD_MODEL AS CHILD_FIELD_MODEL, 
       b.VALUE AS CHILD_VALUE, 
       c.FIELD_MODEL AS CONSENT_FIELD_MODEL, 
       c.VALUE  AS CONSENT_VALUE, 
       d.FIELD_MODEL AS FIN_ORD_FIELD_MODEL, 
       d.VALUE AS FIN_ORD_VALUE
FROM   __temp__.ORD_EVENTS_ALL a
LEFT JOIN fcsq.In_respect_of_which_children b ON a.EVENT = b.EVENT
LEFT JOIN fcsq.Consent_data c ON  a.EVENT = c.EVENT
LEFT JOIN fcsq.final_order_data d ON a.EVENT = d.EVENT
GROUP BY a.EVENT, a.CASE_NUMBER, a.EVENT_MODEL, a.RECEIPT_DATE, 
         a.ENTRY_DATE, a.CREATING_COURT, a.Court_code, a.FIELD_MODEL, 
         a.VALUE, b.FIELD_MODEL, b.VALUE, c.FIELD_MODEL, c.VALUE,
         d.FIELD_MODEL, d.VALUE;
"""
            
pydb.create_temp_table(dd ,"ORD_EVENTS_WITH_EXTRA_INFO")

In [None]:
df = pydb.read_sql_query("select * from __temp__.ORD_EVENTS_WITH_EXTRA_INFO")
df.head()

In [None]:
# row counting
counter = pydb.read_sql_query("select count(*) as count from __temp__.ORD_EVENTS_WITH_EXTRA_INFO")
counter

# Query 1.3:
# ORD_EVENTS_WITH_CALC_FIELDS

In [None]:
aa = f"""
SELECT DISTINCT 
          (Substr(CASE_NUMBER,5,1)) AS CASE_TYPE, 
          CASE_NUMBER, 
          CASE WHEN RECEIPT_DATE  IS NOT NULL then ENTRY_DATE 
          else RECEIPT_DATE end as DATE, 
          CAST(EVENT/100000000 AS INT) AS EVENT_CODE, 
          EVENT, 
          EVENT_MODEL, 
          EVENT_VALUE, 
          CHILD_VALUE, 
          CONSENT_VALUE, 
          FIN_ORD_VALUE
FROM __temp__.ORD_EVENTS_WITH_EXTRA_INFO
"""
pydb.create_temp_table(aa,"ORD_EVENTS_WITH_CALC_FIELDS")

In [None]:
df = pydb.read_sql_query("select * from __temp__.ORD_EVENTS_WITH_CALC_FIELDS")
df.head()

In [None]:
# row counting
counter = pydb.read_sql_query("select count(*) as count from __temp__.ORD_EVENTS_WITH_CALC_FIELDS")
counter


# Query 1.4:

# ORD_FINAL_SUPS_DATA

In [None]:
kk = f"""
SELECT a.Date, 
       a.CASE_TYPE, 
       a.CASE_NUMBER, 
       a.EVENT_CODE, 
       a.EVENT, 
       a.EVENT_MODEL, 
       a.EVENT_VALUE, 
       a.CHILD_VALUE, 
       a.CONSENT_VALUE, 
       a.FIN_ORD_VALUE, 
       b.order_type
FROM __temp__.ORD_EVENTS_WITH_CALC_FIELDS a
LEFT JOIN fcsq.order_type_decode b ON b.event_model = a.event_model 
AND a.EVENT_VALUE = b.Value;
"""
pydb.create_temp_table(kk,"ORD_FINAL_SUPS_DATA")
# The lookup order_type_decode was created in athena.

In [None]:
df = pydb.read_sql_query("select * from __temp__.ORD_FINAL_SUPS_DATA")
df.head()

In [None]:
# row counting
counter = pydb.read_sql_query("select count(*) as count from __temp__.ORD_FINAL_SUPS_DATA")
counter


# ORD_APP_WTHD_EVENTS_EXTRA_INFO

# Query 2.1:

In [None]:
ff = f"""
SELECT t1.EVENT, 
       t1.CASE_NUMBER, 
       t1.EVENT_MODEL, 
       t1.RECEIPT_DATE,
       t1.ENTRY_DATE,
       t1.CREATING_COURT, 
       t1.Court_code, 
       t1.FIELD_MODEL AS EVENT_FIELD_MODEL,
       t5.VALUE AS EVENT_VALUE, 
       t2.FIELD_MODEL AS CHILD_FIELD_MODEL, 
       t2.VALUE AS CHILD_VALUE, 
       t3.FIELD_MODEL AS FIN_ORD_FIELD_MODEL, 
       t3.VALUE AS FIN_ORD_VALUE  
         
FROM fcsq.CH_ACT_APP_WITHDRAW_EVENTS t1 
LEFT JOIN fcsq.In_respect_of_which_children t2 ON t1.EVENT = t2.EVENT
LEFT JOIN fcsq.Application_wrt_final_order t3 ON t1.EVENT = t3.EVENT
LEFT JOIN fcsq.Application_wrt_order_type t5 ON t1.VALUE2 = t5.EVENT
GROUP BY t1.EVENT,t1.CASE_NUMBER,t1.EVENT_MODEL,t1.RECEIPT_DATE,t1.ENTRY_DATE,t1.CREATING_COURT,
t1.court_code,t1.FIELD_MODEL,t5.VALUE,t2.FIELD_MODEL,t2.VALUE,t3.FIELD_MODEL,t3.VALUE;
"""
pydb.create_temp_table(ff, "ORD_APP_WTHD_EVENTS_EXTRA_INFO")
    

In [None]:
df = pydb.read_sql_query("select * from __temp__.ORD_APP_WTHD_EVENTS_EXTRA_INFO")
df.head()

In [None]:
counter = pydb.read_sql_query("select count(*) as count from __temp__.ORD_APP_WTHD_EVENTS_EXTRA_INFO")
counter

# Query 2.2:
# ORD_WTHD_EVENTS_WITH_CALC_FIELDS

In [None]:
tt = f"""
SELECT 
     CASE when receipt_date is not null then CAST(entry_date AS DATE) else receipt_date end as Date, 
    (SUBSTR(CASE_NUMBER, 5,1)) AS CASE_TYPE, 
     CASE_NUMBER, 
     CAST(EVENT/100000000 AS INT) AS EVENT_CODE, 
     EVENT, 
     EVENT_FIELD_MODEL, 
     EVENT_VALUE, 
     CHILD_VALUE, 
     FIN_ORD_VALUE
     FROM __temp__.ORD_APP_WTHD_EVENTS_EXTRA_INFO ;
"""
pydb.create_temp_table(tt,"ORD_WTHD_EVENTS_WITH_CALC_FIELDS")

In [None]:
df = pydb.read_sql_query("select * from __temp__.ORD_WTHD_EVENTS_WITH_CALC_FIELDS")
df

In [None]:
counter = pydb.read_sql_query("select count(*) as count from __temp__.ORD_WTHD_EVENTS_WITH_CALC_FIELDS")
counter

# Query 2.3:
# ORD_APP_WTHD_FINAL_SUPS_DATA

In [None]:
pp= f"""
SELECT Date, 
       CASE_TYPE, 
       CASE_NUMBER, 
       EVENT_CODE, 
       EVENT, 
       EVENT_FIELD_MODEL AS EVENT_MODEL, 
       EVENT_VALUE, 
       CHILD_VALUE, 
       FIN_ORD_VALUE
FROM __temp__.ORD_WTHD_EVENTS_WITH_CALC_FIELDS
"""
pydb.create_temp_table(pp,"ORD_APP_WTHD_FINAL_SUPS_DATA")

In [None]:
df = pydb.read_sql_query("select * from __temp__.ORD_APP_WTHD_FINAL_SUPS_DATA")
df.head()

In [None]:
counter = pydb.read_sql_query("select count(*) as count from __temp__.ORD_APP_WTHD_FINAL_SUPS_DATA")
counter

# Query 3.1:
# DISPS_ALL

In [None]:
yy = f"""
SELECT Date, 
       CASE_TYPE, 
       CASE_NUMBER, 
       EVENT_CODE, 
       EVENT, 
       EVENT_MODEL, 
       EVENT_VALUE, 
       CHILD_VALUE, 
       CONSENT_VALUE, 
       FIN_ORD_VALUE, 
       order_type
FROM  __temp__.ORD_FINAL_SUPS_DATA
UNION ALL
SELECT Date, 
       CASE_TYPE, 
       CASE_NUMBER, 
       EVENT_CODE, 
       EVENT, 
       EVENT_MODEL, 
       EVENT_VALUE, 
       CHILD_VALUE, 
       null as CONSENT_VALUE,
       FIN_ORD_VALUE,
       null as order_type
FROM __temp__.ORD_APP_WTHD_FINAL_SUPS_DATA
"""
pydb.create_temp_table(yy,"DISPS_ALL")

In [None]:
df = pydb.read_sql_query("select * from __temp__.DISPS_ALL")
df.head()

In [None]:
counter = pydb.read_sql_query("select count(*) as count from __temp__.DISPS_ALL")
counter

# Query 3.2:
# CHILD_DETAILS_AMENDED

In [None]:
zz = f"""
SELECT 
      ROLE AS ROLE_ID, 
      CASE_NUMBER, 
      PARTY, 
      ROLE_MODEL, 
      GENDER, 
      DOB, 
      DELETE_FLAG
      FROM fcsq.child_party_details ;
"""
pydb.create_temp_table(zz,"CHILD_DETAILS_AMENDED")

In [None]:
df = pydb.read_sql_query("select * from __temp__.CHILD_DETAILS_AMENDED")
df.head()

In [None]:
counter = pydb.read_sql_query("select count(*) as count from __temp__.CHILD_DETAILS_AMENDED")
counter

# Query 3.3:
# DISPS_CHILD_SPLIT

In [None]:
qq = f"""
SELECT t1.Date, 
       t1.CASE_TYPE, 
       t1.CASE_NUMBER, 
       t1.EVENT_CODE, 
       t1.EVENT, 
       t1.EVENT_MODEL, 
       t1.EVENT_VALUE, 
       t1.CONSENT_VALUE, 
       t1.FIN_ORD_VALUE, 
       t1.Order_Type, 
       case when cast(t1.child_value as varchar(50)) is not null then cast(t2.role_id as varchar(50)) 
       else cast(t1.child_value as varchar(50)) end as Child_id,
       t2.GENDER, 
       t2.DOB
FROM __temp__.DISPS_ALL t1 
LEFT JOIN __temp__.CHILD_DETAILS_AMENDED t2 ON t1.CASE_NUMBER = t2.CASE_NUMBER
where t2.role_id is not null;
"""

pydb.create_temp_table(qq,"DISPS_CHILD_SPLIT")


In [None]:
df = pydb.read_sql_query("select * from __temp__.DISPS_CHILD_SPLIT")
df.head()

In [None]:
counter = pydb.read_sql_query("select count(*) as count from __temp__.DISPS_CHILD_SPLIT")
counter

# Query 3.4:
# NONSEC8DISPS

In [None]:
uu=f"""
SELECT t1.Date, 
       t1.CASE_TYPE, 
       t1.CASE_NUMBER, 
       t1.EVENT_CODE, 
       t1.EVENT, 
       t1.EVENT_MODEL, 
       t1.EVENT_VALUE, 
       t1.CONSENT_VALUE, 
       t1.FIN_ORD_VALUE, 
       t1.order_type, 
       t1.CHILD_ID, 
       t1.GENDER, 
       t1.DOB
FROM __temp__.DISPS_CHILD_SPLIT t1
WHERE t1.EVENT_MODEL NOT IN ('C43', 'G63_1', 'ORDNOM', 'ORDREF');
"""
pydb.create_temp_table(uu,"NONSEC8DISPS")

In [None]:
df = pydb.read_sql_query("select * from __temp__.NONSEC8DISPS")
df.head()

In [None]:
counter = pydb.read_sql_query("select count(*) as count from __temp__.NONSEC8DISPS")
counter

# 3.5:
# SEC8DISPS

In [None]:
dd = f"""
SELECT Date,
       CASE_TYPE, 
       CASE_NUMBER, 
       EVENT_CODE, 
       EVENT, 
       EVENT_MODEL, 
       EVENT_VALUE AS EVENT_VALUE2, 
       CONSENT_VALUE, 
       FIN_ORD_VALUE, 
       order_type, 
       CHILD_ID, 
       GENDER, 
       DOB
FROM __temp__.DISPS_CHILD_SPLIT 
WHERE EVENT_MODEL IN ('C43', 'G63_1', 'ORDNOM', 'ORDREF');
"""
pydb.create_temp_table(dd,"SEC8DISPS")

In [None]:
df = pydb.read_sql_query("select * from __temp__.SEC8DISPS")
df.head()

In [None]:
counter = pydb.read_sql_query("select count(*) as count from __temp__.SEC8DISPS")
counter

# Query 3.6
# SEC8DISPS_TYPESPLIT

In [None]:
 fg = f"""
SELECT t1.Date, 
       t1.CASE_TYPE, 
       t1.CASE_NUMBER, 
       t1.EVENT_CODE, 
       t1.EVENT, 
       t1.EVENT_MODEL, 
       t1.EVENT_VALUE2, 
       t1.CONSENT_VALUE, 
       t1.FIN_ORD_VALUE, 
       t1.CHILD_ID, 
       t1.GENDER, 
       t1.DOB,
       case when cast(t1.event_value2 as varchar(50)) is not null then cast(t2.order_code as varchar(50)) 
           else cast(t2.order_name as varchar(50)) end as order_type1
FROM __temp__.SEC8DISPS t1
LEFT JOIN fcsq.sec8ordtypelookup t2 ON t1.CASE_TYPE = t2.CASE_TYPE
where 
(case when cast(t1.event_value2 as varchar(50)) is not null then cast(t2.order_code as varchar(50)) 
           else cast(t2.order_name as varchar(50)) end ) is not null
"""
pydb.create_temp_table(fg,"SEC8DISPS_TYPESPLIT")   

In [None]:
df = pydb.read_sql_query("select * from __temp__.SEC8DISPS_TYPESPLIT")
df.head()

In [None]:
counter = pydb.read_sql_query("select count(*) as count from __temp__.SEC8DISPS_TYPESPLIT")
counter

# 3.7:
# SEC8DISPS_TYPESPLIT_AMEND

In [None]:
gg = f"""
SELECT t1.Date, 
    t1.CASE_TYPE, 
          t1.CASE_NUMBER, 
          t1.EVENT_CODE, 
          t1.EVENT, 
          t1.EVENT_MODEL, 
          t1.EVENT_VALUE2 AS EVENT_VALUE, 
          t1.CONSENT_VALUE, 
          t1.FIN_ORD_VALUE, 
          t1.CHILD_ID, 
          t1.GENDER, 
          t1.DOB, 
          t1.ORDER_TYPE1 AS order_type
      FROM __temp__.SEC8DISPS_TYPESPLIT t1;
"""
pydb.create_temp_table(gg,"SEC8DISPS_TYPESPLIT_AMEND") 

In [None]:
df = pydb.read_sql_query("select * from __temp__.SEC8DISPS_TYPESPLIT_AMEND")
df.head()

In [None]:
counter = pydb.read_sql_query("select count(*) as count from __temp__.SEC8DISPS_TYPESPLIT_AMEND")
counter

# 3.8:
# DISPS_ALL_WITH_SPLITS

In [None]:
hh = f"""
SELECT Date, 
       CASE_TYPE, 
       CASE_NUMBER, 
       EVENT_CODE, 
       EVENT, 
       EVENT_MODEL, 
       EVENT_VALUE,
       CONSENT_VALUE, 
       FIN_ORD_VALUE, 
       CHILD_ID, 
       GENDER, 
       DOB,
       order_type
FROM  __temp__.NONSEC8DISPS
UNION ALL
SELECT Date, 
        CASE_TYPE, 
          CASE_NUMBER, 
          EVENT_CODE, 
          EVENT, 
          EVENT_MODEL, 
          EVENT_VALUE, 
          CONSENT_VALUE, 
          FIN_ORD_VALUE, 
          CHILD_ID, 
          GENDER, 
          DOB, 
          order_type
FROM __temp__.SEC8DISPS_TYPESPLIT_AMEND
"""
pydb.create_temp_table(hh,"DISPS_ALL_WITH_SPLITS") 

In [None]:
df = pydb.read_sql_query("select * from __temp__.DISPS_ALL_WITH_SPLITS")
df.head()

In [None]:
counter = pydb.read_sql_query("select count(*) as count from __temp__.DISPS_ALL_WITH_SPLITS")
counter

# Query 3.8:
# DISP_CHILD_COUNT_DATA

In [None]:
#checking if table exist then drops for another table to be created thereafter.
drop_DISP_CHILD_COUNT_DATA = f"""
DROP TABLE IF EXISTS fcsq.DISP_CHILD_COUNT_DATA;
"""
# confirming the table existence
pydb.read_sql_query(drop_DISP_CHILD_COUNT_DATA)

# clean up previous children_act_disposals files
bucket.objects.filter(Prefix="fcsq_processing/childrens_act/DISP_CHILD_COUNT_DATA").delete() 


In [None]:
Creating_DISP_CHILD_COUNT_DATA = f""" CREATE TABLE IF NOT EXISTS fcsq.DISP_CHILD_COUNT_DATA
WITH (format = 'PARQUET', external_location =
's3://alpha-family-data/fcsq_processing/childrens_act/DISP_CHILD_COUNT_DATA') AS
SELECT   
            EXTRACT (YEAR FROM t1.Date) AS YEAR,
            CASE WHEN (EXTRACT (MONTH FROM t1.Date)) IN (1,2,3) THEN 1
            WHEN (EXTRACT (MONTH FROM receipt_date)) IN (4,5,6) THEN 2   
            WHEN (EXTRACT (MONTH FROM receipt_date)) IN (7,8,9) THEN 3
            WHEN (EXTRACT (MONTH FROM receipt_date)) IN (10,11,12) THEN 4        
            End AS Quarter
            
            (IFC(INPUT(t1.'Order Type'n,2.0)<19,'C',IFC(t1.CASE_TYPE='C','C','P'))) AS CASE_TYPE2, 
          t1.CASE_NUMBER, 
          t1.EVENT_CODE, 
  t1.EVENT_MODEL,
          t1.EVENT, 
          /* DISP_TYPE */
            (IfC(EVENT_MODEL='G63_1','1 Withdrawn',IfC(EVENT_MODEL='ORDREF','2 Refused',IfC(EVENT_MODEL='ORDNOM',
            '3 No Order',IFC(FIND(t1.'Order Type'n, 'Interim')>0,'5 Interim Order','4 Order'))))) AS DISP_TYPE, 
          t1.'Order Type'n, 
        (SUBSTR(t1.'Order Type'n, 1,2)) AS Order_Type2,
          t1.CONSENT_VALUE, 
          t1.FIN_ORD_VALUE, 
          t1.CHILD_ID, 
          t1.GENDER, 
          t1.DOB
      FROM CHILDINT.DISPS_ALL_WITH_SPLITS AS t1;
"""

 # Still working on it.