# Domestic Violence Extractions

## 1. Import packages and set options 
<a name="import_packages"></a>

In [2]:
import pandas as pd  # a module which provides the data structures and functions to store and manipulate tables in dataframes
import pydbtools as pydb  # A module which allows SQL queries to be run on the Analytical Platform from Python, see https://github.com/moj-analytical-services/pydbtools
import boto3  # allows you to directly create, update, and delete AWS resources from Python scripts

# sets parameters to view dataframes for tables easier
pd.set_option("display.max_columns", 100)
pd.set_option("display.width", 900)
pd.set_option("display.max_colwidth", 200)


## 2. Define key variables to be used throughout the notebook 
<a name="define_key_variables"></a>

In [3]:
#this is the database we will be extracting from
database = "familyman_dev_v2"

#this is the snapshot date of familyman we will be extracting from
snapshot_date = "2022-05-23"
#snapshot_date = "2021-08-19"
#this is the athena database we will be storing our tables in
fcsq_database = "fcsq"

#this is the s3 bucket we will be saving data to
s3 = boto3.resource("s3")
bucket = s3.Bucket("alpha-family-data")

## 3. DV_APPS1 table - extracts the domestic violence application details from the event and events_fields table <a name=DV_APPS1></a>

### Drop the DV_APPS1 table if it already exists and remove its data from the S3 bucket

In [5]:
drop_DV_APPS1 = "DROP TABLE IF EXISTS fcsq.DV_APPS1"
pydb.start_query_execution_and_wait(drop_DV_APPS1)
bucket.objects.filter(Prefix="fcsq_processing/Domestic_Violence/DV_APPS1").delete();

### Create the DV_APPS1 table

In [11]:
create_DV_APPS1_table =f"""
CREATE TABLE IF NOT EXISTS fcsq.DV_APPS1
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Domestic_Violence/DV_APPS1') AS
SELECT /*csv*/ 
  TTE.RECEIPT_DATE, 
  TTE.CASE_NUMBER, 
  TTE.EVENT, 
  TTE.CREATING_COURT, 
  TTF.FIELD_MODEL, 
  TTF.VALUE, 
  TTE.Error
FROM 
  {database}.events TTE
  INNER JOIN {database}.event_fields TTF
     ON TTE.EVENT = TTF.EVENT
WHERE 
   TTE.Error= 'N' 
     AND TTF.FIELD_MODEL In ('U22_AT','G50_AT')
     AND (TTE.mojap_snapshot_date = date'{snapshot_date}' AND TTF.mojap_snapshot_date= date'{snapshot_date}');
"""
pydb.start_query_execution_and_wait(create_DV_APPS1_table);

#### DV_APPS1 validation

In [12]:
DV_APPS1_count = pydb.read_sql_query("SELECT count(*) as count from fcsq.DV_APPS1")
DV_APPS1_count

Unnamed: 0,count
0,2730396


## 4. DV_Ords1 table -  extracts the domestic violence order details from the event and events_fields table ¶ <a name=DV_Ords1></a>

### Drop the DV_Ords1 table if it already exists and remove its data from the S3 bucket

In [None]:
drop_DV_Ords1 = "DROP TABLE IF EXISTS fcsq.DV_Ords1"
pydb.start_query_execution_and_wait(drop_DV_Ords1)
bucket.objects.filter(Prefix="fcsq_processing/Domestic_Violence/DV_Ords1").delete();

### Create the DV_Ords1 table

In [13]:
create_DV_Ords1_table =f"""
CREATE TABLE IF NOT EXISTS fcsq.DV_Ords1
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Domestic_Violence/DV_Ords1') AS
SELECT /*csv*/
  TTE.RECEIPT_DATE, 
  TTE.CASE_NUMBER, 
  TTE.EVENT, 
  TTE.CREATING_COURT, 
  TTF.FIELD_MODEL, 
  TTF.VALUE, 
  TTE.Error
FROM 
  {database}.events TTE
  INNER JOIN {database}.event_fields  TTF
    ON TTE.EVENT = TTF.EVENT
WHERE 
  TTE.Error= 'N' 
   AND TTF.FIELD_MODEL In ('FL404B_7','FL404_79')
    AND (TTE.mojap_snapshot_date = date'{snapshot_date}' AND TTF.mojap_snapshot_date= date'{snapshot_date}');
"""
pydb.start_query_execution_and_wait(create_DV_Ords1_table);

#### DV_Ords1 validation

In [None]:
DV_Ords1_count = pydb.read_sql_query("SELECT count(*) as count from fcsq.DV_Ords1")
DV_Ords1_count

## 5. RES_ATTENDANCE_INFO table - Extra orders info on Respondent attendance <a name=RES_ATTENDANCE_INFO></a>

### Drop the RES_ATTENDANCE_INFO table if it already exists and remove its data from the S3 bucket

In [None]:
drop_RES_ATTENDANCE_INFO = "DROP TABLE IF EXISTS fcsq.RES_ATTENDANCE_INFO"
pydb.start_query_execution_and_wait(drop_RES_ATTENDANCE_INFO)
bucket.objects.filter(Prefix="fcsq_processing/Domestic_Violence/RES_ATTENDANCE_INFO").delete();

### Create the RES_ATTENDANCE_INFO table

In [15]:
create_RES_ATTENDANCE_INFO_table =f"""
CREATE TABLE IF NOT EXISTS fcsq.RES_ATTENDANCE_INFO
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Domestic_Violence/RES_ATTENDANCE_INFO') AS
SELECT /*csv*/
  TTE.EVENT, 
  TTE.RECEIPT_DATE, 
  TTE.ENTRY_DATE, 
  TTE.Error, 
  TTE.CASE_NUMBER, 
  TTE.EVENT_MODEL, 
  TTF.FIELD_MODEL, 
  TTF.VALUE        
FROM 
  {database}.events TTE
  INNER JOIN {database}.event_fields TTF
    ON TTE.EVENT = TTF.EVENT
WHERE 
   TTE.Error='N' 
   AND TTF.FIELD_MODEL In ('FL404_5','FL404B_5')
   AND (TTE.mojap_snapshot_date = date'{snapshot_date}' AND TTF.mojap_snapshot_date= date'{snapshot_date}'); 
"""
pydb.start_query_execution_and_wait(create_RES_ATTENDANCE_INFO_table);

#### RES_ATTENDANCE_INFO validation

In [16]:
RES_ATTENDANCE_INFO_count = pydb.read_sql_query("SELECT count(*) as count from fcsq.RES_ATTENDANCE_INFO")
RES_ATTENDANCE_INFO_count

Unnamed: 0,count
0,889929


## 6. DV_Applications_1 table - adds a comma to the start and the end of the value variable for later queries <a name=DV_Applications_1></a>

### Create the DV_Applications_1 table

In [4]:
create_DV_Applications_1_table =f"""
SELECT
receipt_date,
case_number,
event,
creating_court,
field_model,
','|| value || ',' as Adjusted_Value,
error
from FCSQ.DV_Apps1
"""
pydb.create_temp_table(create_DV_Applications_1_table,'DV_Applications_1')

#### DV_Applications_1 validation

In [25]:
DV_Applications_1_count = pydb.read_sql_query("SELECT count(*) as count from __temp__.DV_Applications_1")
DV_Applications_1_count

Unnamed: 0,count
0,2730396


## 7. DV_Applications_2 table - Limits the data to only Domestic Violence applications <a name=DV_Applications_2></a>

### Create the DV_Applications_2 table

In [22]:
create_DV_Applications_2_table =f"""
SELECT 
receipt_date,
case_number,
event,
creating_court,
field_model,
Adjusted_Value,
error
FROM __temp__.DV_Applications_1
Where strpos(Adjusted_Value, ', ENM') <> 0
Or strpos(Adjusted_Value,', ONM') <> 0
Or strpos(Adjusted_Value,', EO,') <> 0
Or strpos(Adjusted_Value,', EO ,') <> 0
Or strpos(Adjusted_Value,', ONO')<> 0;
"""

pydb.create_temp_table(create_DV_Applications_2_table,'DV_Applications_2')

#### DV_Applications_2 validation

In [23]:
DV_Applications_2_count = pydb.read_sql_query("SELECT * from __temp__.DV_Applications_2")
DV_Applications_2_count

Unnamed: 0,receipt_date,case_number,event,creating_court,field_model,adjusted_value,error
0,2009-07-14,WD09F01356,36200763008,WD,U22_AT,",ENM, EO,",N
1,2013-04-30,TF13F03166,36400598129,TF,U22_AT,",ENM, EO,",N
2,2016-11-22,SO16F00756,32801890602,SO,U22_AT,",ONM, ONO,",N
3,2010-03-17,SK10F00198,33600379961,SK,U22_AT,",ENM, EO,",N
4,2008-07-31,WF08F00413,35700226385,WF,U22_AT,",ENM, EO,",N
...,...,...,...,...,...,...,...
71666,2007-09-13,WA07F00751,36000224814,WA,U22_AT,",ONM, ONO,",N
71667,2012-05-29,WD12F00998,36201099835,WD,U22_AT,",EO, ENM,",N
71668,2010-09-03,WI10F00923,37500411052,WI,U22_AT,",ENM, EO,",N
71669,2007-04-03,WV07F00146,37800428566,WV,U22_AT,",ONM, ONO,",N


## 8. DV_APPLICATION_EVENTS table - Calculates year and quarter of receipt date, deletes duplicates and calculates whether each case is a domestic violence or a childrens act case <a name=DV_APPLICATION_EVENTS></a>

### Drop the DV_APPLICATION_EVENTS table if it already exists and remove its data from the S3 bucket

In [None]:
drop_DV_APPLICATION_EVENTS = "DROP TABLE IF EXISTS fcsq.DV_APPLICATION_EVENTS"
pydb.start_query_execution_and_wait(drop_DV_APPLICATION_EVENTS)
bucket.objects.filter(Prefix="fcsq_processing/Domestic_Violence/DV_APPLICATION_EVENTS").delete();

### Create the DV_APPLICATION_EVENTS table

In [32]:
create_DV_APPLICATION_EVENTS_table =f"""
CREATE TABLE IF NOT EXISTS fcsq.DV_APPLICATION_EVENTS
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Domestic_Violence/DV_APPLICATION_EVENTS') AS
SELECT DISTINCT /*YEAR*/
                EXTRACT(YEAR FROM (t1.receipt_date)) AS year,
				/*QUARTER*/
        CASE WHEN EXTRACT(Month FROM (t1.receipt_date)) <4 THEN 1
              WHEN EXTRACT(Month FROM (t1.receipt_date)) <7 THEN 2
              WHEN EXTRACT(Month FROM (t1.receipt_date))<10 THEN 3
              ELSE 4
              END AS quarter,
                t1.RECEIPT_DATE,
                t1.CASE_NUMBER,
	            t1.EVENT,
				/*EVENT_COURT*/
                cast((t1.EVENT / 100000000) as int)  AS EVENT_COURT,
	            t1.FIELD_model,
	            t1.ADJUSTED_VALUE,
			    CASE WHEN (substr(t1.CASE_NUMBER,5,1)) = 'F'
                THEN 'Domestic Violence'
                WHEN (Substr(t1.CASE_NUMBER,5,1)) IN ('C', 'P')
				THEN 'Childrens Act'
				WHEN (Substr(t1.CASE_NUMBER,5,1)) IN ('A', 'Z')
				THEN 'Adoption'
                ELSE 'Other' END AS CASE_TYPE
FROM __temp__.DV_APPLICATIONS_2 AS t1;
"""
pydb.start_query_execution_and_wait(create_DV_APPLICATION_EVENTS_table);

#### DV_APPLICATION_EVENTS validation

In [33]:
DV_APPLICATION_EVENTS_count = pydb.read_sql_query("SELECT count(*) as count from fcsq.DV_APPLICATION_EVENTS")
DV_APPLICATION_EVENTS_count

Unnamed: 0,count
0,71671


## 8. APP_LOOKUP1 table - groups the adjusted values so that there's one record per value <a name=APP_LOOKUP1></a>

### Create the APP_LOOKUP1 table

In [38]:
create_APP_LOOKUP1_table =f"""
SELECT DISTINCT t1.ADJUSTED_VALUE
FROM fcsq.DV_APPLICATION_EVENTS AS t1;
"""
pydb.create_temp_table(create_APP_LOOKUP1_table,'App_lookup1')

#### APP_LOOKUP1 validation

In [39]:
APP_LOOKUP1_count = pydb.read_sql_query("SELECT count(*) as count from __temp__.APP_LOOKUP1")
APP_LOOKUP1_count

Unnamed: 0,count
0,140


## 9. APP_LOOKUP2 table - This query looks at the adjusted value to see whether it can find any of the four order types in the value string. If so, it puts the order type description. <a name=APP_LOOKUP2></a>

### Create the APP_LOOKUP2 table

In [41]:
create_APP_LOOKUP2_table =f"""
SELECT t1.ADJUSTED_VALUE,
       /*ENM*/
       CASE strpos(Adjusted_Value,'ENM') <> 0 then 'Exparte Non-Molestation' else '' as ENM,
       /*ONM*/
       CASE strpos(Adjusted_Value,'ONM') <> 0 then 'On Notice Non-Molestation' else '' as ONM,
       /*EO*/
       CASE (strpos(Adjusted_Value,', EO') <> 0 OR strpos(Adjusted_Value,', EO ,') <> 0)
       then 'Exparte Occupation' else '' as EO,
       /*ONO*/
       CASE strpos(Adjusted_Value,'ONO') <> 0 then 'On Notice Occupation' else '' as ONO
FROM __temp__.APP_LOOKUP1 AS t1;
"""
pydb.create_temp_table(create_APP_LOOKUP2_table,'APP_LOOKUP2')

InvalidRequestException: An error occurred (InvalidRequestException) when calling the StartQueryExecution operation: line 9:42: mismatched input '<>'. Expecting: 'WHEN'

#### APP_LOOKUP2 validation

In [None]:
APP_LOOKUP2_count = pydb.read_sql_query("SELECT count(*) as count from __temp__.APP_LOOKUP2")
APP_LOOKUP2_count