# Domestic Violence DFJ

## Contents
#### Setup
1. [import_packages](#import_packages) 
2. [define_key_variables](#define_key_variables) 

## 1. Import packages and set options 
<a name="import_packages"></a>

In [1]:
import pandas as pd  # a module which provides the data structures and functions to store and manipulate tables in dataframes
import pydbtools as pydb  # A module which allows SQL queries to be run on the Analytical Platform from Python, see https://github.com/moj-analytical-services/pydbtools
import boto3  # allows you to directly create, update, and delete AWS resources from Python scripts

# sets parameters to view dataframes for tables easier
pd.set_option("display.max_columns", 100)
pd.set_option("display.width", 900)
pd.set_option("display.max_colwidth", 200)

## 2. Define key variables to be used throughout the notebook 
<a name="define_key_variables"></a>

In [2]:
#this is the database we will be extracting from
database = "familyman_dev_v3" 

#this extracts the latest snapshot from athena
#get_snapshot_date = f"SELECT mojap_snapshot_date from {database}.events order by mojap_snapshot_date desc limit 1"
#snapshot_date = str(pydb.read_sql_query(get_snapshot_date)['mojap_snapshot_date'].values[0])

#this extracts the August snapshot from athena
snapshot_date = '2022-08-04'

#this is the athena database we will be storing our tables in
fcsq_database = "fcsq"

#this is the s3 bucket we will be saving data to
s3 = boto3.resource("s3")
bucket = s3.Bucket("alpha-family-data")

#change these to the current quarter and year not the quarter being published
latest_quarter = 3
latest_year = 2022

## 3. Dom_violence_Orders table 

In [3]:
create_Dom_Violence_Orders = f"""
SELECT Year, Quarter, Event_court, count(*) as count
FROM fcsq.DV_ORDS_FINAL
WHERE Year > 2010
GROUP BY Year, Quarter, Event_court
ORDER BY Year, Quarter, Event_court;
"""

pydb.create_temp_table(create_Dom_Violence_Orders,'Dom_Violence_Orders');

### Create the orders_case_count_C temporary table

In [7]:
create_orders_case_count_C = f"""
SELECT *,
ROW_NUMBER() OVER(PARTITION BY CASE_NUMBER
ORDER BY CASE_NUMBER, RECEIPT_DATE DESC, EVENT DESC) AS SEQ_NUM
FROM fcsq.DV_ORDS_FINAL
"""
pydb.create_temp_table(create_orders_case_count_C,'orders_case_count_C');

### Create the orders_case_count_D temporary table

In [12]:
create_orders_case_count_D = f"""
SELECT
t1.CASE_NUMBER, 
Year (t1.Receipt_Date) AS YEAR,
CASE WHEN Month(t1.Receipt_Date)<4
    THEN 1
        WHEN Month(t1.Receipt_Date)<7
        THEN 2
            WHEN Month(t1.Receipt_Date)<10
            THEN 3
ELSE 4
END AS Quarter,
t1.Receipt_Date,
t1.event_court

FROM __temp__.orders_case_count_C as t1

WHERE SEQ_NUM = 1

GROUP BY t1.CASE_NUMBER,
t1.Receipt_Date,
t1.event_court;

"""

pydb.create_temp_table(create_orders_case_count_D,'orders_case_count_D');

## 4. Dom_violence_Cases table 

In [9]:
create_Dom_Violence_Cases = f"""
SELECT Year, Quarter, Event_court, count(*) as cases
FROM __temp__.orders_case_count_D
WHERE Year > 2010 and substring(case_number,5,1) = 'F'
GROUP BY Year, Event_court, Quarter
ORDER BY Year, Event_court, Quarter;
"""

pydb.create_temp_table(create_Dom_Violence_Cases,'Dom_Violence_Cases');

## 5. Dom_violence_Merge table 

In [10]:
create_Dom_Violence_Merge = f"""
Select t1.Year,
t1.Quarter,
t1.Event_court,
COALESCE(t1.Count, 0) as Count,
COALESCE(t2.Cases, 0) as Cases,
'Domestic Violence' as Category,
'End' as Stage
from __temp__.Dom_Violence_Orders t1
FULL OUTER JOIN
__temp__. Dom_Violence_Cases t2 
on t1.Year = t2.Year AND t1.Quarter = t2.Quarter AND t1.Event_court = t2.Event_court
WHERE NOT (t1.year = {latest_year} AND t1.quarter = {latest_quarter})

"""

pydb.create_temp_table(create_Dom_Violence_Merge,'Dom_Violence_Merge');

In [11]:
#check = "SELECT COUNT(*) as Count from __temp__.Dom_Violence_Merge"
#pydb.read_sql_query(check)

## 6. Dom_violence_Format table 

In [12]:
create_Dom_Violence_Format = f"""
SELECT Category, 
Year, 
Quarter, 
Event_court as Court, 
Stage, 
Count,
Cases
FROM __temp__.Dom_Violence_Merge;
"""

pydb.create_temp_table(create_Dom_Violence_Format,'Dom_Violence_Format');


df = pydb.read_sql_query(create_Dom_Violence_Format)
df.to_csv(path_or_buf = 's3://alpha-family-data/CSVs/Domestic_Violence/dom_violence_format.csv',index=False)

In [13]:
#check = "SELECT COUNT(*) as Count from __temp__.Dom_Violence_Format"
#pydb.read_sql_query(check)

## 7. Dom_violence_Apps_Orders table 

In [14]:
create_Dom_Violence_Apps_Orders = f"""
SELECT Year, Quarter, Event_court, count(*) as count
FROM fcsq.DV_APPS_FINAL
WHERE Year > 2010
GROUP BY Year, Quarter, Event_court
ORDER BY Year, Quarter, Event_court;
"""

pydb.create_temp_table(create_Dom_Violence_Apps_Orders,'Dom_Violence_Apps_Orders');

In [15]:
#check = "SELECT COUNT(*) as Count from __temp__.Dom_Violence_Orders"
#pydb.read_sql_query(check)

### Create the apps_orders_case_count_C temporary table

In [40]:
create_apps_orders_case_count_C = f"""
SELECT *,
ROW_NUMBER() OVER(PARTITION BY CASE_NUMBER
ORDER BY CASE_NUMBER, RECEIPT_DATE ASC, EVENT ASC) AS SEQ_NUM
FROM fcsq.DV_APPS_FINAL;
"""

pydb.create_temp_table(create_apps_orders_case_count_C,'apps_orders_case_count_C');

### Create the apps_orders_case_count_D temporary table

In [42]:
create_apps_orders_case_count_D = f"""
SELECT
t1.CASE_NUMBER, 
Year (t1.Receipt_Date) AS YEAR,
CASE WHEN Month(t1.Receipt_Date)<4
    THEN 1
        WHEN Month(t1.Receipt_Date)<7
        THEN 2
            WHEN Month(t1.Receipt_Date)<10
            THEN 3
ELSE 4
END AS Quarter,
t1.Receipt_Date,
t1.event_court

FROM __temp__.apps_orders_case_count_C as t1

WHERE SEQ_NUM = 1

GROUP BY t1.CASE_NUMBER,
t1.Receipt_Date,
t1.event_court;

"""

pydb.create_temp_table(create_apps_orders_case_count_D,'apps_orders_case_count_D');

## 8. Dom_violence_Apps_Cases table 

In [18]:
create_Dom_Violence_Apps_Cases = f"""
SELECT Year, Quarter, Event_court, count(*) as cases
FROM __temp__.apps_orders_case_count_D
WHERE Year > 2010 and substring(case_number,5,1) = 'F'
GROUP BY Year, Event_court, Quarter
ORDER BY Year, Event_court, Quarter;
"""

pydb.create_temp_table(create_Dom_Violence_Apps_Cases,'Dom_Violence_Apps_Cases');

## 9. Dom_violence_Apps_Merge table 

In [19]:
create_Dom_Violence_Apps_Merge = f"""
Select t1.Year,
t1.Quarter,
t1.Event_court,
COALESCE(t1.Count, 0) as Count,
COALESCE(t2.Cases, 0) as Cases,
'Domestic Violence' as Category,
'Start' as Stage
from __temp__.Dom_Violence_Apps_Orders t1
FULL OUTER JOIN
__temp__. Dom_Violence_Apps_Cases t2 
on t1.Year = t2.Year AND t1.Quarter = t2.Quarter AND t1.Event_court = t2.Event_court
WHERE NOT (t1.year = {latest_year} AND t1.quarter = {latest_quarter})

"""

pydb.create_temp_table(create_Dom_Violence_Apps_Merge,'Dom_Violence_Apps_Merge');

In [20]:
#check = "SELECT COUNT(*) as Count from __temp__.Dom_Violence_Apps_Merge"
#pydb.read_sql_query(check)

## 10. Dom_violence_Apps_Format table 

In [21]:
create_Dom_Violence_Apps_Format = f"""
SELECT Category, 
Year, 
Quarter, 
Event_court as Court, 
Stage, 
Count,
Cases
FROM __temp__.Dom_Violence_Apps_Merge;
"""

pydb.create_temp_table(create_Dom_Violence_Apps_Format,'Dom_Violence_Apps_Format');


df = pydb.read_sql_query(create_Dom_Violence_Apps_Format)
df.to_csv(path_or_buf = 's3://alpha-family-data/CSVs/Domestic_Violence/dom_violence_Apps_format.csv',index=False)

In [22]:
#check = "SELECT COUNT(*) as Count from __temp__.Dom_Violence_Apps_Format"
#pydb.read_sql_query(check)

# Stage 3 - Preparing the final output
<a name="prepare_final_output"></a>

## 12. dv_court_level_append table - combines both _Applications_format and _Disposals_format tables
<a name="dv_court_level_append"></a>

### Drop the dv_court_level_append table if it already exists and remove its data from the S3 bucket

In [23]:
drop_dv_court_level_append = f"""
DROP TABLE IF EXISTS fcsq.dv_court_level_append;
"""
pydb.start_query_execution_and_wait(drop_dv_court_level_append)

# clean up previous dv_court_level_append files
bucket.objects.filter(Prefix="fcsq_processing/Domestic_Violence/dv_court_level_append/").delete();

### Create the dv_court_level_append table in Athena

In [24]:
create_dv_court_level_append = f"""
CREATE TABLE IF NOT EXISTS fcsq.dv_court_level_append 
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Domestic_Violence/dv_court_level_append') AS
SELECT * FROM __temp__.Dom_Violence_Apps_Format 
UNION 
SELECT * FROM __temp__.Dom_Violence_Format 
ORDER BY Year,Quarter,Court
"""

pydb.start_query_execution_and_wait(create_dv_court_level_append);

#### dv_court_level_append validation

In [25]:
#dv_court_level_append_count = pydb.read_sql_query("select * from __temp__.dv_court_level_append")
#dv_court_level_append_count

## 16. court_lookup table - creates a table with court information (e.g court codes and region)
<a name="court_lookup"></a>

### Create the court_lookup temporary table

In [26]:
create_court_lookup = f"""
SELECT 
code,
Region,
Region_Pre2014,
DFJ_New
FROM fcsq.court_mv_feb21_dfj;
"""

pydb.create_temp_table(create_court_lookup,'court_lookup');

#### court_lookup validation

In [27]:
#court_lookup_count = pydb.read_sql_query("select * from __temp__.court_lookup")
#court_lookup_count

## 17. court_level_merge table - joins both the dv_court_level_append and court_lookup tables
<a name="court_level_merge"></a>

### Drop the court_level_merge table if it already exists and remove its data from the S3 bucket

In [28]:
drop_court_level_merge = f"""
DROP TABLE IF EXISTS fcsq.court_level_merge;
"""
pydb.start_query_execution_and_wait(drop_court_level_merge)

# clean up previous court_level_merge files
bucket.objects.filter(Prefix="fcsq_processing/Domestic_Violence/court_level_merge/").delete();

### Create the court_level_merge table in Athena

In [29]:
create_court_level_merge = f"""
CREATE TABLE IF NOT EXISTS fcsq.court_level_merge
WITH (format = 'PARQUET', external_location = 's3://alpha-family-data/fcsq_processing/Domestic_Violence/court_level_merge') AS
SELECT
t1.Category,
t1.Stage,
t1.Year,
t1.Quarter,
t1.Count,
t1.Cases,
t2.DFJ_New,
Case when Year < 2014 then t2.Region_Pre2014
Else t2.Region
End As Final_Region
FROM 
fcsq.dv_court_level_append t1
INNER JOIN
__temp__.court_lookup t2
ON CAST(t1.court as integer) = t2.code
where CAST(t1.court as integer) in (SELECT code from __temp__.court_lookup);
"""

pydb.start_query_execution_and_wait(create_court_level_merge);

#### court_level_merge validation

In [30]:
#court_level_merge_count = pydb.read_sql_query("select * from __temp__.court_level_merge where year=2020 and quarter=3 and dfj_new='Carlisle DFJ' order by year,quarter,dfj_new")
#court_level_merge_count

## 18. court_level_sum table - this query calculates the total number of counts and cases in each quarter and region to produce the final DFJ csv output
<a name="court_level_sum"></a>

### Create the court_level_sum temporary table 

In [31]:
csv_output_table = f"""
SELECT
Category,
Stage,
Year,
Quarter,
DFJ_New,
Final_Region as region,
SUM(count) as count,
SUM(cases) as cases
FROM fcsq.court_level_merge
group by
Category,
Stage,
Year,
Quarter,
DFJ_New,
Final_Region;
"""

df = pydb.read_sql_query(csv_output_table)
df.to_csv(path_or_buf = 's3://alpha-family-data/CSVs/Domestic_Violence/domestic_violence_dfj.csv',index=False)

#### court_level_sum validation

In [None]:
court_level_sum_count = pydb.read_sql_query("select * from __temp__.court_level_sum ORDER BY Category,Year,Quarter,region,DFJ_New,Stage")
#court_level_sum_count