# SAS extraction & Manipulation


In [None]:
# libraries
import pandas as pd
import pydbtools as pydb
import boto3
from datetime import datetime
#import re

In [None]:
# Structured  dataframes
pd.set_option("display.max_columns", 100)
pd.set_option("display.width", 900)
pd.set_option("display.max_colwidth", 200)

In [None]:
# Assigning databases and S3 bucket
db1 = "familyman_dev_v2"
fcsq_db = "fcsq"
s3 = boto3.resource("s3")
bucket = s3.Bucket("alpha-family-data")
snapshot_date = "2021-08-19"

# Query 1.1:
# ORD_EVENTS_ALL
created as temp table

In [None]:

ss = f"""
SELECT event,
    case_number,
    event_model,
    receipt_date,
    entry_date,
    Event_error,
    creating_court,
    Court_code,
    field_model,
    value
from fcsq.CH_ACT_ORDS_EVENTS_2
UNION ALL
SELECT event,
    case_number,
    event_model,
    receipt_date,
    entry_date,
    Event_error,
    creating_court,
    Court_code,  
    null as field_model,
    null as value
from fcsq.CH_ACT_ORDS_EVENTS_1
"""

pydb.create_temp_table(ss ,"ORD_EVENTS_ALL")

In [None]:
df = pydb.read_sql_query("select * from __temp__.ORD_EVENTS_ALL")
df.head()

In [None]:
# row counting
counter = pydb.read_sql_query("select count(*) as count from __temp__.ORD_EVENTS_ALL")
counter

# Query 1.2:

In [None]:
# In_respect_of_which_children = CH_ACT_CHILDREN_INV
# Consent_data = CH_CONSENT_INF
# Code= Court_code

dd = f"""
SELECT a.EVENT,                                       
       a.CASE_NUMBER, 
       a.EVENT_MODEL, 
       a.RECEIPT_DATE, 
       a.ENTRY_DATE, 
       a.CREATING_COURT, 
       a.Court_code, 
       a.FIELD_MODEL AS EVENT_FIELD_MODEL, 
       a.VALUE  AS EVENT_VALUE, 
       b.FIELD_MODEL AS CHILD_FIELD_MODEL, 
       b.VALUE AS CHILD_VALUE, 
       c.FIELD_MODEL AS CONSENT_FIELD_MODEL, 
       c.VALUE  AS CONSENT_VALUE, 
       d.FIELD_MODEL AS FIN_ORD_FIELD_MODEL, 
       d.VALUE AS FIN_ORD_VALUE
FROM   __temp__.ORD_EVENTS_ALL a
LEFT JOIN fcsq.In_respect_of_which_children b ON a.EVENT = b.EVENT
LEFT JOIN fcsq.Consent_data c ON  a.EVENT = c.EVENT
LEFT JOIN fcsq.final_order_data d ON a.EVENT = d.EVENT
GROUP BY a.EVENT, a.CASE_NUMBER, a.EVENT_MODEL, a.RECEIPT_DATE, 
         a.ENTRY_DATE, a.CREATING_COURT, a.Court_code, a.FIELD_MODEL, 
         a.VALUE, b.FIELD_MODEL, b.VALUE, c.FIELD_MODEL, c.VALUE,
         d.FIELD_MODEL, d.VALUE;
"""
            
pydb.create_temp_table(dd ,"ORD_EVENTS_WITH_EXTRA_INFO")

In [None]:
df = pydb.read_sql_query("select * from __temp__.ORD_EVENTS_WITH_EXTRA_INFO")
df.head()

In [None]:
# row counting
counter = pydb.read_sql_query("select count(*) as count from __temp__.ORD_EVENTS_WITH_EXTRA_INFO")
counter

# Query 1.3:
ORD_EVENTS_WITH_CALC_FIELDS

In [None]:
#checking if table existence, dropping and cleaning up the folder. 

drop_ORD_EVENTS_WITH_CALC_FIELDS = f"""
DROP TABLE IF EXISTS fcsq.ORD_EVENTS_WITH_CALC_FIELDS;
"""
pydb.read_sql_query(drop_ORD_EVENTS_WITH_CALC_FIELDS)
bucket.objects.filter(Prefix="fcsq_processing/childrens_act/ORD_EVENTS_WITH_CALC_FIELDS/").delete()

In [None]:
create_ORD_EVENTS_WITH_CALC_FIELDS = f""" CREATE TABLE IF NOT EXISTS fcsq.ORD_EVENTS_WITH_CALC_FIELDS
WITH (format = 'PARQUET', external_location =
's3://alpha-family-data/fcsq_processing/childrens_act/ORD_EVENTS_WITH_CALC_FIELDS') AS
SELECT DISTINCT 
          (Substr(CASE_NUMBER,5,1)) AS CASE_TYPE, 
          CASE_NUMBER, 
          CASE WHEN RECEIPT_DATE  IS NULL then ENTRY_DATE 
          else RECEIPT_DATE end as DATE, 
          CAST(EVENT/100000000 AS INT) AS EVENT_CODE, 
          EVENT, 
          EVENT_MODEL, 
          EVENT_VALUE, 
          CHILD_VALUE, 
          CONSENT_VALUE, 
          FIN_ORD_VALUE
FROM __temp__.ORD_EVENTS_WITH_EXTRA_INFO
"""
pydb.read_sql_query(create_ORD_EVENTS_WITH_CALC_FIELDS)
#pydb.create_temp_table(aa,"ORD_EVENTS_WITH_CALC_FIELDS")

In [None]:
# Note: reading from temp data incase we still maintain the temptable.

#df = pydb.read_sql_query("select * from __temp__.ORD_EVENTS_WITH_CALC_FIELDS")
#df.head()

In [None]:
# row counting
#counter = pydb.read_sql_query("select count(*) as count from __temp__.ORD_EVENTS_WITH_CALC_FIELDS")
#counter
counter = pydb.read_sql_query("select count(*) as count from fcsq.ORD_EVENTS_WITH_CALC_FIELDS")
counter

# Query 1.4:

In [None]:
#checking if table existence, dropping and cleaning up the folder. 

drop_ORD_FINAL_SUPS_DATA = f"""
DROP TABLE IF EXISTS fcsq.ORD_FINAL_SUPS_DATA;
"""
pydb.read_sql_query(drop_ORD_FINAL_SUPS_DATA)
bucket.objects.filter(Prefix="fcsq_processing/childrens_act/ORD_FINAL_SUPS_DATA/").delete()

In [None]:
create_ORD_FINAL_SUPS_DATA = f""" CREATE TABLE IF NOT EXISTS fcsq.ORD_FINAL_SUPS_DATA
WITH (format = 'PARQUET', external_location =
's3://alpha-family-data/fcsq_processing/childrens_act/ORD_FINAL_SUPS_DATA') AS
SELECT a.Date, 
       a.CASE_TYPE, 
       a.CASE_NUMBER, 
       a.EVENT_CODE, 
       a.EVENT, 
       a.EVENT_MODEL, 
       a.EVENT_VALUE, 
       a.CHILD_VALUE, 
       a.CONSENT_VALUE, 
       a.FIN_ORD_VALUE, 
       b.order_type
FROM fcsq.ORD_EVENTS_WITH_CALC_FIELDS a



# The lookup order_type_lookup does not have event_model as a column.

# Query 1.5:

In [None]:
ff = f"""
SELECT a.EVENT, 
          a.CASE_NUMBER, 
          a.EVENT_MODEL, 
          a.RECEIPT_DATE, 
          a.ENTRY_DATE, 
          a.CREATING_COURT, 
          a.court_code, 
          a.FIELD_MODEL AS EVENT_FIELD_MODEL, 
          b.VALUE AS EVENT_VALUE, 
          c.FIELD_MODEL AS CHILD_FIELD_MODEL, 
          c.VALUE AS CHILD_VALUE, 
          d.FIELD_MODEL AS FIN_ORD_FIELD_MODEL, 
          d.VALUE AS FIN_ORD_VALUE
          
FROM fcsq.Appilication_withdrawal_events a
LEFT JOIN fcsq.Appilication_withdrawal_WRT_children c ON  a.EVENT = c.EVENT
LEFT JOIN fcsq.Application_wrt_final_order d ON a.EVENT = d.EVENT
LEFT JOIN xxxxxxxxxxxxxx b ON a.VALUE2 = b.EVENT
GROUP BY a.EVENT, a.CASE_NUMBER, a.EVENT_MODEL, a.RECEIPT_DATE, 
a.ENTRY_DATE, a.CREATING_COURT, a.court_code,a.FIELD_MODEL, b.VALUE, 
c.FIELD_MODEL, c.VALUE, d.FIELD_MODEL, d.VALUE;
"""
# CHILDIN.ORD_APP_WTHD_TYPE_APP_FOR_INFO has not been previously created