# Table 10 and CSV Timeliness 

## 1. Import packages and set options 
<a name="import_packages"></a>

In [1]:
import pandas as pd  # a module which provides the data structures and functions to store and manipulate tables in dataframes
import pydbtools as pydb  # A module which allows SQL queries to be run on the Analytical Platform from Python, see https://github.com/moj-analytical-services/pydbtools
import boto3  # allows you to directly create, update, and delete AWS resources from Python scripts
import numpy as np
import re

# sets parameters to view dataframes for tables easier
pd.set_option("display.max_columns", 100)
pd.set_option("display.width", 900)
pd.set_option("display.max_colwidth", 200)

## 2. Define key variables to be used throughout the notebook 
<a name="define_key_variables"></a>

In [None]:
#this is the database we will be extracting from
database = "familyman_dev_v3"
derived = "familyman_derived_dev_v3"

#this is the athena database we will be storing our tables in
fcsq_database = "fcsq"

#this is the s3 bucket we will be saving data to
s3 = boto3.resource("s3")
bucket = s3.Bucket("alpha-family-data")

#setting current year and quarter
pub_quarter = 4
pub_year = 2022

## Stage 1 - CSV Timeliness

### Import Domestic Violence CSV Timeliness

#### Create the dv_csv_timeliness table

In [None]:
#imports DV CSV timeliness data from S3 bucket into a temporary table
dv_csv_timeliness_table = pd.read_csv("s3://alpha-family-data/CSVs/Timeliness/dv_csv_timeliness.csv", low_memory=False)

In [None]:
pydb.dataframe_to_temp_table(dv_csv_timeliness_table, "dv_csv_timeliness")

##### dv_csv_timeliness validation

In [None]:
#dv_csv_timeliness_count = pydb.read_sql_query("SELECT * from __temp__.dv_csv_timeliness limit 10")
#dv_csv_timeliness_count

### Import Adoption CSV Timeliness

#### Create the adopt_csv_timeliness table

In [None]:
#imports Adoption CSV timeliness data from S3 bucket into a temporary table
adopt_csv_timeliness_table = pd.read_csv("s3://alpha-family-data/CSVs/Timeliness/adopt_csv_timeliness.csv", low_memory=False)

In [None]:
pydb.dataframe_to_temp_table(adopt_csv_timeliness_table, "adopt_csv_timeliness")

##### adopt_csv_timeliness validation

In [None]:
#adopt_csv_timeliness_count = pydb.read_sql_query("SELECT * from __temp__.adopt_csv_timeliness limit 10")
#adopt_csv_timeliness_count

### Import Children Act CSV Timeliness

#### Create the ca_csv_timeliness table

In [None]:
#imports Children Act CSV timeliness data from S3 bucket into a temporary table
ca_csv_timeliness_table = pd.read_csv("s3://alpha-family-data/CSVs/Timeliness/ca_time_csv.csv", low_memory=False)

In [None]:
pydb.dataframe_to_temp_table(ca_csv_timeliness_table, "ca_csv_timeliness")

##### ca_csv_timeliness validation

In [None]:
#ca_csv_timeliness_count = pydb.read_sql_query("SELECT * from __temp__.ca_csv_timeliness limit 10")
#ca_csv_timeliness_count

### Import Divorce CSV Timeliness

#### Create the divorce_csv_timeliness table

In [None]:
#imports Divorce CSV timeliness data from S3 bucket into a temporary table
divorce_csv_timeliness_table = pd.read_csv("s3://alpha-family-data/CSVs/Timeliness/Nisi_rep_timeliness_csv.csv", low_memory=False)

In [None]:
pydb.dataframe_to_temp_table(divorce_csv_timeliness_table, "divorce_csv_timeliness")

##### divorce_csv_timeliness validation

In [None]:
#divorce_csv_timeliness_count = pydb.read_sql_query("SELECT * from __temp__.divorce_csv_timeliness limit 10")
#divorce_csv_timeliness_count

### Import Financial Remedy CSV Timeliness *CHECK*

#### Create the fr_csv_timeliness table

In [None]:
#imports Financial Remedy CSV timeliness data from S3 bucket into a temporary table
fr_csv_timeliness_table = pd.read_csv("s3://alpha-family-data/CSVs/Timeliness/fr_rep_timeliness_csv.csv", low_memory=False)

In [None]:
pydb.dataframe_to_temp_table(fr_csv_timeliness_table, "fr_csv_timeliness")

##### fr_csv_timeliness validation

In [None]:
fr_csv_timeliness_count = pydb.read_sql_query("SELECT * from __temp__.fr_csv_timeliness limit 10")
fr_csv_timeliness_count

### Import Children Act Public Law CSV Timeliness 

#### *Temporary - once Public law amalgamation is complete and Children Act on the AP includes Public law timeliness - this code can be removed*

#### Create the public_csv_timeliness table

In [None]:
#imports Children Act Public Law CSV timeliness data from S3 bucket into a temporary table
public_csv_timeliness_table = pd.read_csv("s3://alpha-family-data/CSVs/Timeliness/public_csv_timeliness.csv", low_memory=False)

In [None]:
pydb.dataframe_to_temp_table(public_csv_timeliness_table, "public_csv_timeliness")

##### public_csv_timeliness validation

In [None]:
#public_csv_timeliness_count = pydb.read_sql_query("SELECT * from __temp__.public_csv_timeliness limit 10")
#public_csv_timeliness_count

### Final Output CSV Timeliness

In [None]:
pydb.create_temp_table(
f""" 
SELECT * FROM __temp__.adopt_csv_timeliness

UNION

SELECT * FROM __temp__.ca_csv_timeliness

UNION

SELECT * FROM __temp__.public_csv_timeliness

UNION

SELECT * FROM __temp__.divorce_csv_timeliness

UNION

SELECT * FROM __temp__.dv_csv_timeliness

/*UNION*/

/*SELECT * FROM __temp__.fr_csv_timeliness*/
""",

"csv_timeliness_temp")

In [None]:
#csv_timeliness_temp = pydb.read_sql_query("SELECT * FROM __temp__.csv_timeliness_temp;")
#csv_timeliness_temp

In [None]:
#Orders the data by year, country, and county_ua
csv_timeliness = pydb.read_sql_query("""
SELECT *
from __temp__.csv_timeliness_temp
ORDER BY case_type,
region,
representation,
quarter
""")

In [None]:
#Export the final csv
csv_timeliness.to_csv("s3://alpha-family-data/CSVs/Timeliness/CSV Timeliness.csv", index = False)

## Stage 2 - Table 10 Timeliness

### Import Domestic Violence Table 10 Timeliness

#### Create the dv_t10_timeliness table

In [2]:
#imports DV T10 timeliness data from S3 bucket into a temporary table
dv_t10_timeliness_table = pd.read_csv("s3://alpha-family-data/CSVs/Timeliness/dv_t10_timeliness.csv", low_memory=False)

In [3]:
pydb.dataframe_to_temp_table(dv_t10_timeliness_table, "dv_t10_timeliness")

##### dv_t10_timeliness validation

In [4]:
dv_t10_timeliness_count = pydb.read_sql_query("SELECT * from __temp__.dv_t10_timeliness limit 10")
dv_t10_timeliness_count

Unnamed: 0,lookup,1_bothn,1_both_mean,2_applicant_onlyn,2_applicant_only_mean,3_respondent_onlyn,3_respondent_only_mean,4_neithern,4_neither_mean,5_unknown,5_unknown_mean,alln,all_mean
0,Domestic Violence|2011|Q1,704,1.384943,2706,0.802872,103,2.676838,528,1.130141,,,4041,0.994803
1,Domestic Violence|2011|Q2,722,1.451721,2760,0.661957,78,1.24359,492,1.660569,2.0,0.0,4054,0.934668
2,Domestic Violence|2011|,2737,1.587296,11082,0.642767,343,1.999167,2016,1.182115,2.0,0.0,16180,0.89842
3,Domestic Violence|2011|Q3,686,1.938567,3022,0.559516,88,2.681818,511,0.837573,,,4307,0.855518
4,Domestic Violence|2011|Q4,625,1.586286,2594,0.552319,74,1.040541,485,1.116348,,,3778,0.805339
5,Domestic Violence|2012|,2388,1.407394,11408,0.674527,323,1.077399,2246,0.857143,,,16365,0.814482
6,Domestic Violence|2012|Q1,641,1.29708,2778,0.780726,88,0.805195,555,0.778378,,,4062,0.862418
7,Domestic Violence|2012|Q2,597,1.282603,2821,0.529751,77,0.992579,535,0.842991,,,4030,0.691705
8,Domestic Violence|2012|Q3,602,1.616754,2996,0.736649,79,1.665461,586,0.977816,,,4263,0.911297
9,Domestic Violence|2012|Q4,548,1.442388,2813,0.648672,79,0.875226,570,0.823058,,,4010,0.786391


### Import Adoption Table 10 Timeliness

#### Create the adopt_t10_timeliness table

In [5]:
#imports Adoption T10 timeliness data from S3 bucket into a temporary table
adopt_t10_timeliness_table = pd.read_csv("s3://alpha-family-data/CSVs/Timeliness/adopt_t10_timeliness.csv", low_memory=False)

In [6]:
pydb.dataframe_to_temp_table(adopt_t10_timeliness_table, "adopt_t10_timeliness")

##### adopt_t10_timeliness validation

In [7]:
adopt_t10_timeliness_count = pydb.read_sql_query("SELECT * from __temp__.adopt_t10_timeliness limit 10")
adopt_t10_timeliness_count

Unnamed: 0,lookup,1_bothn,1_both_mean,2_applicant_onlyn,2_applicant_only_mean,3_respondent_onlyn,3_respondent_only_mean,4_neithern,4_neither_mean,5_unknown,5_unknown_mean,alln,all_mean
0,Adoption|2011|Q3,27,26.396825,61,17.224824,44,19.961039,967,15.82671,58,14.29803,1157,16.227682
1,Adoption|2011|Q4,31,27.064516,74,19.050193,52,18.945055,1037,15.746797,30,17.485714,1224,16.411648
2,Adoption|2012|,72,27.878968,229,23.284467,157,19.105551,4634,14.42518,104,18.918956,5196,15.233421
3,Adoption|2012|Q1,13,27.296703,59,22.72155,39,20.084249,1056,14.813718,24,17.630952,1191,15.571069
4,Adoption|2011|Q1,19,32.278195,63,21.970522,38,21.048872,906,15.861873,46,14.145963,1072,16.622068
5,Adoption|2011|Q2,18,32.619048,51,19.568627,44,16.62013,1049,14.730219,38,15.293233,1200,15.29131
6,Adoption|2011|,95,28.969925,249,19.448078,178,19.070626,3959,15.523292,172,15.033223,4653,16.125449
7,Adoption|2012|Q2,15,30.12381,63,26.263039,39,17.937729,1094,14.486681,35,20.220408,1246,15.53944
8,Adoption|2012|Q3,22,27.909091,52,21.293956,37,21.833977,1195,14.617693,24,19.529762,1330,15.38797
9,Adoption|2012|Q4,22,26.662338,55,22.358442,42,16.877551,1289,13.876205,21,17.52381,1429,14.541338


### Import Children Act Table 10 Timeliness

#### Create the ca_t10_timeliness table

In [8]:
#imports Children Act T10 timeliness data from S3 bucket into a temporary table
ca_t10_timeliness_table = pd.read_csv("s3://alpha-family-data/CSVs/Timeliness/ca_time_lookup.csv", low_memory=False)

In [9]:
pydb.dataframe_to_temp_table(ca_t10_timeliness_table, "ca_t10_timeliness")

##### ca_t10_timeliness validation

In [10]:
ca_t10_timeliness_count = pydb.read_sql_query("SELECT * from __temp__.ca_t10_timeliness limit 10")
ca_t10_timeliness_count

Unnamed: 0,lookup,both_n,both_mean,applicant_n,applicant_mean,respondent_n,respondent_mean,neither_n,neither_mean,all_n,all_mean,1_bothn,1_bothmean,2_applicant_onlyn,2_applicant_onlymean,3_respondent_onlyn,3_respondent_onlymean,4_neithern,4_neithermean,5_unknownn,5_unknownmean,alln,allmean
0,Private Law|2020|Q4,,,,,,,,,,,5280,23.338,6732,18.103,3512,28.148,9423,22.202,,,24947,22.174
1,Private Law|2016|Q3,,,,,,,,,,,4601,14.211,7312,10.142,2760,16.828,7440,12.251,,,22113,12.533
2,Private Law|2014|Q3,,,,,,,,,,,4542,21.267,7450,13.669,2078,20.233,5905,14.519,,,19975,16.331
3,Private Law|2018|,,,,,,,,,,,17337,16.127,27569,12.187,11071,18.871,33127,15.085,,,89104,14.861
4,Private Law|2018|Q4,,,,,,,,,,,4556,16.937,7073,12.311,2999,19.515,8554,14.765,,,23182,15.058
5,Private Law|2019|Q2,,,,,,,,,,,4680,17.814,7314,12.062,3187,20.053,9548,16.623,,,24729,15.941
6,Private Law|2013|Q3,,,,,,,,,,,7477,21.758,9832,12.042,2221,17.107,5413,10.97,,,24943,15.173
7,Private Law|2022|Q1,,,,,,,,,,,4501,31.847,6352,24.465,3985,36.82,9381,30.708,,,24219,30.288
8,Private Law|2011|,,,,,,,,,,,45459,18.733,28707,11.813,9370,19.266,10935,15.007,,,94471,16.252
9,Private Law|2015|Q3,,,,,,,,,,,4263,15.434,6465,12.092,2291,17.25,6684,12.569,,,19703,13.577


### Import Divorce Table 10 Timeliness

#### Create the divorce_t10_timeliness table

In [27]:
#imports Divorce T10 timeliness data from S3 bucket into a temporary table
divorce_t10_timeliness_table = pd.read_csv("s3://alpha-family-data/CSVs/Timeliness/DIV_TABLE_10_LOOKUP.csv", low_memory=False)

In [28]:
pydb.dataframe_to_temp_table(divorce_t10_timeliness_table, "div_t10_timeliness")

##### divorce_t10_timeliness validation

In [29]:
div_t10_timeliness_count = pydb.read_sql_query("SELECT * from __temp__.div_t10_timeliness limit 10")
div_t10_timeliness_count

Unnamed: 0,lookup,1_bothn,1_bothmean,2_applicant_onlyn,2_applicant_onlymean,3_respondent_onlyn,3_respondent_onlymean,4_neithern,4_neithermean,5_unknownn,5_unknownmean,alln,allmean
0,Divorce (incl. annulment and FR)|2011|Q1,9525,25.361,12779,26.76,917,28.837,7196,22.991,.,.,30417,25.493
1,Divorce (incl. annulment and FR)|2011|Q2,8739,24.486,12263,27.129,803,29.657,7396,22.024,.,.,29201,25.115
2,Divorce (incl. annulment and FR)|2012|Q2,8557,24.296,12558,27.019,986,29.13,8761,21.215,.,.,30862,24.684
3,Divorce (incl. annulment and FR)|2012|Q3,8488,24.404,12699,26.408,950,26.728,9041,20.58,.,.,31178,24.182
4,Divorce (incl. annulment and FR)|2012|Q4,8066,23.076,11584,26.393,893,28.803,8797,21.087,.,.,29340,23.964
5,Divorce (incl. annulment and FR)|2011|,36398,25.161,51490,26.968,3576,29.481,30997,22.196,.,.,122461,25.297
6,Divorce (incl. annulment and FR)|2011|Q3,9220,25.584,13422,26.981,929,29.956,7948,21.963,.,.,31519,25.395
7,Divorce (incl. annulment and FR)|2011|Q4,8914,25.174,13026,27.006,927,29.489,8457,21.89,.,.,31324,25.177
8,Divorce (incl. annulment and FR)|2012|,34087,24.295,49680,26.9,3791,28.784,35082,21.079,.,.,122640,24.569
9,Divorce (incl. annulment and FR)|2012|Q1,8976,25.286,12839,27.727,962,30.441,8483,21.462,.,.,31260,25.409


### Import Financial Remedy Table 10 Timeliness

#### Create the fr_t10_timeliness table

In [30]:
#imports Financial Remedy T10 timeliness data from S3 bucket into a temporary table
fr_t10_timeliness_table = pd.read_csv("s3://alpha-family-data/CSVs/Timeliness/FR_TABLE_10_LOOKUP.csv", low_memory=False)

In [31]:
pydb.dataframe_to_temp_table(fr_t10_timeliness_table, "fr_t10_timeliness")

##### fr_t10_timeliness validation

In [32]:
fr_t10_timeliness_count = pydb.read_sql_query("SELECT * from __temp__.fr_t10_timeliness limit 10")
fr_t10_timeliness_count

Unnamed: 0,lookup,both_n,both_mean,applicant_n,applicant_mean,respondent_n,respondent_mean,neither_n,neither_mean,unknown_n,unknown_mean,all_n,all_mean,1_bothn,1_bothmean,2_applicant_onlyn,2_applicant_onlymean,3_respondent_onlyn,3_respondent_onlymean,4_neithern,4_neithermean,5_unknownn,5_unknownmean,alln,allmean
0,Financial Remedy|2011|Q1,,,,,,,,,,,,,4606,27.259,2544,16.832,379,48.66,442,33.088,.,.,7977,25.272
1,Financial Remedy|2011|Q2,,,,,,,,,,,,,3918,27.966,2167,18.23,345,48.188,422,28.91,.,.,6860,26.017
2,Financial Remedy|2011|,,,,,,,,,,,,,16487,27.149,9364,16.537,1428,46.593,1764,33.823,.,.,29069,25.141
3,Financial Remedy|2011|Q3,,,,,,,,,,,,,4128,26.174,2436,14.935,374,49.531,449,34.15,.,.,7394,24.202
4,Financial Remedy|2011|Q4,,,,,,,,,,,,,3835,27.232,2217,16.304,330,39.221,451,38.816,.,.,6838,25.127
5,Financial Remedy|2012|,,,,,,,,,,,,,14720,25.458,9107,16.39,1311,40.085,1803,28.837,.,.,26965,23.353
6,Financial Remedy|2012|Q1,,,,,,,,,,,,,3815,25.639,2350,18.051,324,39.717,481,30.555,.,.,6979,24.134
7,Financial Remedy|2012|Q2,,,,,,,,,,,,,3633,25.04,2108,14.488,316,44.001,405,26.194,.,.,6470,22.615
8,Financial Remedy|2012|Q3,,,,,,,,,,,,,3627,24.326,2317,15.836,330,35.045,459,25.665,.,.,6737,22.018
9,Financial Remedy|2012|Q4,,,,,,,,,,,,,3645,26.81,2332,16.984,341,41.685,458,32.547,.,.,6779,24.578


### Import Children Act Public Law Table 10 Timeliness 

#### *Temporary - once Public law amalgamation is complete and Children Act on the AP includes Public law timeliness - this code can be removed*

#### Create the public_t10_timeliness table

In [33]:
#imports Children Act Public Law T10 timeliness data from S3 bucket into a temporary table
public_t10_timeliness_table = pd.read_csv("s3://alpha-family-data/CSVs/Timeliness/public_t10_timeliness.csv", low_memory=False)

In [34]:
pydb.dataframe_to_temp_table(public_t10_timeliness_table, "public_t10_timeliness")

##### public_t10_timeliness validation

In [35]:
public_t10_timeliness_count = pydb.read_sql_query("SELECT * from __temp__.public_t10_timeliness limit 10")
public_t10_timeliness_count

Unnamed: 0,lookup,1_bothn,1_bothmean,2_applicant_onlyn,2_applicant_onlymean,3_respondent_onlyn,3_respondent_onlymean,4_neithern,4_neithermean,5_unknownn,5_unknownmean,alln,allmean
0,Public Law|2011|Q3,4877,51.32338753,472,24.70371822,25,52.45712,6,21.92833333,1.0,0.0,5381,48.95136852
1,Public Law|2011|Q4,5353,51.86941397,403,22.21272208,17,38.63029412,5,13.3142,,,5778,49.7286234
2,Public Law|2012|,24761,47.36068309,1899,21.29679305,123,48.53544715,21,19.70747619,5.0,30.257,26809,45.49500116
3,Public Law|2012|Q1,5690,51.2700239,424,22.72644104,18,50.59527778,5,21.8,2.0,33.857,6139,49.26696139
4,Public Law|2012|Q2,5829,49.05026454,440,24.04126364,26,70.78569231,3,18.57133333,,,6298,47.37826135
5,Public Law|2012|Q3,6484,46.43093075,497,22.41249095,43,39.68783721,7,9.489857143,,,7031,44.65512288
6,Public Law|2012|Q4,6758,43.50389035,538,16.8948606,36,42.00388889,6,30.45233333,3.0,27.857,7341,41.52937624
7,Public Law|2011|,19434,51.90145024,1714,21.46959568,95,57.75034737,30,32.3428,3.0,0.095333333,21276,49.44108498
8,Public Law|2011|Q1,4596,52.49987772,429,19.37529837,23,58.09308696,12,51.32133333,,,5060,49.71411818
9,Public Law|2011|Q2,4608,51.95360504,410,19.20732195,30,72.7333,7,22.32671429,2.0,0.143,5057,49.36044809


### Final Output Table 10 Timeliness

In [36]:
pydb.create_temp_table(
f""" 
SELECT * FROM __temp__.adopt_t10_timeliness

UNION

SELECT * FROM __temp__.ca_t10_timeliness

UNION

SELECT * FROM __temp__.public_t10_timeliness

UNION

SELECT * FROM __temp__.div_t10_timeliness

UNION

SELECT * FROM __temp__.dv_t10_timeliness

UNION

SELECT * FROM __temp__.fr_t10_timeliness
""",

"t10_timeliness_temp")

QueryFailed: TYPE_MISMATCH: line 9:1: UNION query has different number of fields: 13, 23. You may need to manually clean the data at location 's3://aws-athena-query-results-593291632749-eu-west-1/tables/6ed607ad-14a0-4475-9699-dc9258c7a461' before retrying. Athena will not delete data in your account.

In [None]:
#t10_timeliness_temp = pydb.read_sql_query("SELECT * FROM __temp__.t10_timeliness_temp;")
#t10_timeliness_temp

In [None]:
#Orders the data by year, country, and county_ua
csv_timeliness = pydb.read_sql_query("""
SELECT *
from __temp__.csv_timeliness_temp
ORDER BY case_type,
region,
representation,
quarter
""")

In [None]:
#Export the final csv
csv_timeliness.to_csv("s3://alpha-family-data/CSVs/Timeliness/Table 10 Lookup.csv", index = False)