# Table 10 and CSV Timeliness 

## Contents
#### Setup
1. [import_packages](#import_packages) 

#### Stage 1 - [CSV Timeliness](#CSV_Timeliness)
2. [divorce_csv_timeliness](#divorce_csv_timeliness) - imports Divorce CSV timeliness data from S3 bucket into a temporary table
3. [fr_csv_timeliness](#fr_csv_timeliness) - imports Financial Remedy CSV timeliness data from S3 bucket into a temporary table
4. [public_csv_timeliness](#public_csv_timeliness) - imports Children Act Public Law CSV timeliness data from S3 bucket into a temporary table
5. [csv_timeliness_temp](#csv_timeliness_temp) - joins all the csv timeliness outputs from each case type (Adoption, Domestic Violence, Divorce, Financial Remedy, Private Law, and Public Law)

#### Stage 2 - [Table 10 Timeliness](#Table_10_Timeliness)
6. [div_t10_timeliness](#div_t10_timeliness) - imports Divorce Table 10 timeliness data from S3 bucket into a temporary table
7. [fin_rem_t10_timeliness](#fin_rem_t10_timeliness) - imports Financial Remedy Table 10 timeliness data from S3 bucket into a temporary table
8. [public_t10_timeliness](#public_t10_timeliness) - imports Children Act Public Law Table 10 timeliness data from S3 bucket into a temporary table
9. [t10_timeliness_temp](#t10_timeliness_temp) - joins all the table 10 timeliness outputs from each case type (Adoption, Domestic Violence, Divorce, Financial Remedy, Private Law, and Public Law) and ammends the data types of the items stored in certain columns

## 1. Import packages and set options 
<a name="import_packages"></a>

In [None]:
import pandas as pd  # a module which provides the data structures and functions to store and manipulate tables in dataframes
import pydbtools as pydb  # A module which allows SQL queries to be run on the Analytical Platform from Python, see https://github.com/moj-analytical-services/pydbtools
import boto3  # allows you to directly create, update, and delete AWS resources from Python scripts
import numpy as np
import re

# sets parameters to view dataframes for tables easier
pd.set_option("display.max_columns", 100)
pd.set_option("display.width", 900)
pd.set_option("display.max_colwidth", 200)

## Stage 1 - CSV Timeliness
<a name="CSV_Timeliness"></a>

### Import Divorce CSV Timeliness
<a name="divorce_csv_timeliness"></a>

#### Create the divorce_csv_timeliness table

In [None]:
#imports Divorce CSV timeliness data from S3 bucket into a temporary table
divorce_csv_timeliness_table = pd.read_csv("s3://alpha-family-data/CSVs/Timeliness/Nisi_rep_timeliness_csv.csv", low_memory=False)

In [None]:
pydb.dataframe_to_temp_table(divorce_csv_timeliness_table, "divorce_csv_timeliness")

##### divorce_csv_timeliness validation

In [None]:
#divorce_csv_timeliness_count = pydb.read_sql_query("SELECT * from __temp__.divorce_csv_timeliness limit 10")
#divorce_csv_timeliness_count

### Import Financial Remedy CSV Timeliness
<a name="fr_csv_timeliness"></a>

#### Create the fr_csv_timeliness table

In [None]:
#imports Financial Remedy CSV timeliness data from S3 bucket into a temporary table
fr_csv_timeliness_table = pd.read_csv("s3://alpha-family-data/CSVs/Timeliness/fr_rep_timeliness_csv.csv", low_memory=False)

In [None]:
pydb.dataframe_to_temp_table(fr_csv_timeliness_table, "fr_csv_timeliness")

##### fr_csv_timeliness validation

In [None]:
#fr_csv_timeliness_count = pydb.read_sql_query("SELECT * from __temp__.fr_csv_timeliness limit 10")
#fr_csv_timeliness_count

### Import Children Act Public Law CSV Timeliness 
<a name="public_csv_timeliness"></a>

#### *Temporary - once Public law amalgamation is complete and Children Act on the AP includes Public law timeliness - this code can be removed*

#### Create the public_csv_timeliness table

In [None]:
#imports Children Act Public Law CSV timeliness data from S3 bucket into a temporary table
public_csv_timeliness_table = pd.read_csv("s3://alpha-family-data/CSVs/Timeliness/public_csv_timeliness.csv", low_memory=False)

In [None]:
pydb.dataframe_to_temp_table(public_csv_timeliness_table, "public_csv_timeliness")

##### public_csv_timeliness validation

In [None]:
#public_csv_timeliness_count = pydb.read_sql_query("SELECT * from __temp__.public_csv_timeliness limit 10")
#public_csv_timeliness_count

### Final Output CSV Timeliness
<a name="csv_timeliness_temp"></a>

In [None]:
pydb.create_temp_table(
f""" 
SELECT * FROM fcsq.adopt_csv_timeliness

UNION

SELECT * FROM fcsq.ca_csv_timeliness

UNION

SELECT * FROM __temp__.public_csv_timeliness

UNION

SELECT * FROM __temp__.divorce_csv_timeliness

UNION

SELECT * FROM fcsq.dv_csv_timeliness

UNION

SELECT * FROM __temp__.fr_csv_timeliness
""",

"csv_timeliness_temp")

In [None]:
#csv_timeliness_temp = pydb.read_sql_query("SELECT * FROM __temp__.csv_timeliness_temp;")
#csv_timeliness_temp

In [None]:
#Orders the data by year, country, and county_ua
csv_timeliness = pydb.read_sql_query("""
SELECT *
from __temp__.csv_timeliness_temp
ORDER BY case_type,
region,
representation,
quarter
""")

In [None]:
#Export the final csv
csv_timeliness.to_csv("s3://alpha-family-data/CSVs/Timeliness/CSV Timeliness.csv", index = False)

## Stage 2 - Table 10 Timeliness
<a name="Table_10_Timeliness"></a>

### Import Divorce Table 10 Timeliness
<a name="div_t10_timeliness"></a>

#### Create the divorce_t10_timeliness table

In [None]:
#imports Divorce T10 timeliness data from S3 bucket into a temporary table
divorce_t10_timeliness_table = pd.read_csv("s3://alpha-family-data/CSVs/Timeliness/DIV_TABLE_10_LOOKUP.csv", low_memory=False)

In [None]:
pydb.dataframe_to_temp_table(divorce_t10_timeliness_table, "div_t10_timeliness")

##### divorce_t10_timeliness validation

In [None]:
#div_t10_timeliness_count = pydb.read_sql_query("SELECT * from __temp__.div_t10_timeliness limit 10")
#div_t10_timeliness_count

### Import Financial Remedy Table 10 Timeliness
<a name="fin_rem_t10_timeliness"></a>

#### Create the fr_t10_timeliness table

In [None]:
#imports Financial Remedy T10 timeliness data from S3 bucket into a temporary table
fin_rem_t10_timeliness_table = pd.read_csv("s3://alpha-family-data/CSVs/Timeliness/FR_TABLE_10_LOOKUP.csv", low_memory=False)

In [None]:
pydb.dataframe_to_temp_table(fin_rem_t10_timeliness_table, "fin_rem_t10_timeliness")

##### fr_t10_timeliness validation

In [None]:
#fin_rem_t10_timeliness_count = pydb.read_sql_query("SELECT * from __temp__.fin_rem_t10_timeliness limit 10")
#fin_rem_t10_timeliness_count

### Import Children Act Public Law Table 10 Timeliness 
<a name="public_t10_timeliness"></a>

#### *Temporary - once Public law amalgamation is complete and Children Act on the AP includes Public law timeliness - this code can be removed*

#### Create the public_t10_timeliness table

In [None]:
#imports Children Act Public Law T10 timeliness data from S3 bucket into a temporary table
public_t10_timeliness_table = pd.read_csv("s3://alpha-family-data/CSVs/Timeliness/public_t10_timeliness.csv", low_memory=False)

In [None]:
pydb.dataframe_to_temp_table(public_t10_timeliness_table, "public_t10_timeliness")

##### public_t10_timeliness validation

In [None]:
#public_t10_timeliness_count = pydb.read_sql_query("SELECT * from __temp__.public_t10_timeliness limit 10")
#public_t10_timeliness_count

### Final Output Table 10 Timeliness
<a name="t10_timeliness_temp"></a>

In [None]:
pydb.create_temp_table(
f""" 
SELECT *
FROM fcsq.adopt_t10_timeliness

UNION 

SELECT lookup,
both_n as "1_bothn",
both_mean as "1_bothmean",
applicant_n as "2_applicantn",
applicant_mean as "2_applicantmean",
respondent_n as "3_respondentn",
respondent_mean as "3_respondentmean",
neither_n as "4_neithern",
neither_mean as "4_neithermean",
NULL as "5_unknownn",
NULL as "5_unknownmean",
all_n as "alln",
all_mean as "allmean"
FROM fcsq.ca_t10_timeliness

UNION 

SELECT * FROM fcsq.dv_t10_timeliness

UNION

SELECT 
lookup,
CAST(both_n as bigint) as "1_bothn",
CAST(both_mean as double) as "1_bothmean",
CAST(applicant_n as bigint) as "2_applicantn",
CAST(applicant_mean as double) as "2_applicantmean",
CAST(respondent_n as bigint) as "3_respondentn",
CAST(respondent_mean as double) as "3_respondentmean",
CAST(neither_n as bigint) as "4_neithern",
CAST(neither_mean as double) as "4_neithermean",
CAST(REPLACE(unknown_n, '.', NULL) as bigint) as "5_unknownn",
CAST(REPLACE(unknown_mean, '.', NULL) as double) as "5_unknownmean",
CAST(all_n as bigint) as "alln",
CAST(all_mean as double) as "allmean"

FROM __temp__.fin_rem_t10_timeliness

UNION 

SELECT
lookup,
CAST(both_n as bigint) as "1_bothn",
CAST(both_mean as double) as "1_bothmean",
CAST(applicant_n as bigint) as "2_applicantn",
CAST(applicant_mean as double) as "2_applicantmean",
CAST(respondent_n as bigint) as "3_respondentn",
CAST(respondent_mean as double) as "3_respondentmean",
CAST(neither_n as bigint) as "4_neithern",
CAST(neither_mean as double) as "4_neithermean",
CAST(REPLACE(unknown_n, '.', NULL) as bigint) as "5_unknownn",
CAST(REPLACE(unknown_mean, '.', NULL) as double) as "5_unknownmean",
CAST(all_n as bigint) as "alln",
CAST(all_mean as double) as "allmean" 

FROM __temp__.div_t10_timeliness

UNION 

SELECT 
lookup,
CAST("1_bothn" as bigint) as "1_bothn",
CAST("1_bothmean" as double) as "1_bothmean",
CAST("2_applicant_onlyn" as bigint) as "2_applicantn",
CAST("2_applicant_onlymean" as double) as "2_applicantmean",
CAST("3_respondent_onlyn" as bigint) as "3_respondentn",
CAST("3_respondent_onlymean" as double) as "3_respondentmean",
CAST("4_neithern" as bigint) as "4_neithern",
CAST("4_neithermean" as double) as "4_neithermean",
CAST("5_unknownn" as bigint) as "5_unknownn",
CAST("5_unknownmean" as double) as "5_unknownmean",
CAST("alln" as bigint) as "alln",
CAST("allmean" as double) as "allmean" 

FROM __temp__.public_t10_timeliness

""",

"t10_timeliness_temp")

In [None]:
t10_timeliness_temp = pydb.read_sql_query("SELECT * FROM __temp__.t10_timeliness_temp;")
t10_timeliness_temp

In [None]:
#Orders the data by year, country, and county_ua
t10_timeliness = pydb.read_sql_query("""
SELECT *
from __temp__.t10_timeliness_temp
ORDER BY lookup
""")

In [None]:
#Export the final csv
t10_timeliness.to_csv("s3://alpha-family-data/CSVs/Timeliness/Table_10_lookup.csv", index = False)