# Domestic Violence DFJ

## Contents
#### Setup
1. [import_packages](#import_packages) 
2. [define_key_variables](#define_key_variables) 

## 1. Import packages and set options 
<a name="import_packages"></a>

In [1]:
import pandas as pd  # a module which provides the data structures and functions to store and manipulate tables in dataframes
import pydbtools as pydb  # A module which allows SQL queries to be run on the Analytical Platform from Python, see https://github.com/moj-analytical-services/pydbtools
import boto3  # allows you to directly create, update, and delete AWS resources from Python scripts

# sets parameters to view dataframes for tables easier
pd.set_option("display.max_columns", 100)
pd.set_option("display.width", 900)
pd.set_option("display.max_colwidth", 200)

## 2. Define key variables to be used throughout the notebook 
<a name="define_key_variables"></a>

In [2]:
#this is the database we will be extracting from
database = "familyman_dev_v3" 

#this extracts the August snapshot from athena
snapshot_date = '2022-08-04'

#this is the athena database we will be storing our tables in
fcsq_database = "fcsq"

#this is the s3 bucket we will be saving data to
s3 = boto3.resource("s3")
bucket = s3.Bucket("alpha-family-data")

#change these to the current quarter and year not the quarter being published
latest_quarter = 3
latest_year = 2022

## 3.Dom_violence_Orders table 

In [11]:
create_Dom_Violence_Orders = f"""
SELECT Year, Quarter, Event_court, count(*) as Disposals
FROM fcsq.DV_ORDS_FINAL
WHERE Year > 2010
GROUP BY Year, Quarter, Event_court
ORDER BY Year, Quarter, Event_court;
"""

pydb.create_temp_table(create_Dom_Violence_Orders,'Dom_Violence_Orders');

In [12]:
check = "SELECT COUNT(*) as Count from __temp__.Dom_Violence_Orders"
pydb.read_sql_query(check)

Unnamed: 0,count
0,7027


In [13]:
create_Dom_Violence_Cases = f"""
SELECT Year, Quarter, Event_court, count(*) as Case_Count
FROM fcsq.DV_ORDS_FINAL
WHERE Year > 2010 and substring(case_number,5,1) = 'F'
GROUP BY Year, Quarter, Event_court
ORDER BY Year, Quarter, Event_court;
"""

pydb.create_temp_table(create_Dom_Violence_Cases,'Dom_Violence_Cases');

In [14]:
check = "SELECT COUNT(*) as Count from __temp__.Dom_Violence_Cases"
pydb.read_sql_query(check)

Unnamed: 0,count
0,6881


In [None]:
data Dom_Violence_Merge (drop =  _type_ _page_ _table_);
	merge Dom_Violence_Orders Dom_Violence_Cases;
	by year quarter EventCourt;
	Category = "Domestic Violence";
	Stage = "End";
	if Disposals = '.' then Disposals = 0;
	if Case_Count = '.' then Case_Count = 0;

In [15]:
create_Dom_Violence_Merge = f"""
SELECT Year, Quarter, Event_court, count(*) as Case_Count,
CASE WHEN Disposals = '.' THEN 0
     WHEN Case_Count = '.' THEN 0
else '' end as Resp_Rep_Cat

FROM __temp__.Dom_Violence_Orders

GROUP BY Year, Quarter, Event_court
ORDER BY Year, Quarter, Event_court;
"""

pydb.create_temp_table(create_Dom_Violence_Merge,'Dom_Violence_Merge');

QueryFailed: SYNTAX_ERROR: line 13:7: Column 'di' cannot be resolved. You may need to manually clean the data at location 's3://aws-athena-query-results-593291632749-eu-west-1/tables/efdd4bbc-be2b-4b8c-bca7-aa8dc78ba0c6' before retrying. Athena will not delete data in your account.

In [None]:
data Dom_Violence_Clean;
	set Dom_Violence_Merge;
	where year <> 2022 or quarter <> 3; /*Needs to be updated every quarter.*/
run;

In [None]:
create_Dom_Violence_Clean = f"""
SELECT Year, Quarter, Event_court, count(*) as Case_Count
FROM __temp__.Dom_Violence_Merge
WHERE Di
GROUP BY Year, Quarter, Event_court
ORDER BY Year, Quarter, Event_court;
"""

pydb.create_temp_table(create_Dom_Violence_Clean,'Dom_Violence_Clean');

In [None]:
data Dom_Violence_format 
		(keep =  Category Year Quarter Court Stage Count Cases);
		length Category $20 Stage $15;
	set Dom_Violence_Clean
		(rename = (	Category = OldCategory Year = OldYear Quarter = OldQuarter
					Stage = OldStage));
		Category = OldCategory;
		format Category $20.;
		Year = OldYear;
		format Year 8.;
		Quarter = OldQuarter;
		format Quarter 8.;
		Court = EventCourt;
		format Court 8.;
		Stage = OldStage;
		format Stage $15.;
		Count = Disposals;
		format Count 8.;
		Cases = Case_Count;
		format Cases 8.;
run;