In [None]:
import os  # for file paths
import pandas as pd
import awswrangler as wr
import pydbtools as pydb  # see https://github.com/moj-analytical-services/pydbtools

# few things for viewing dataframes better
pd.set_option("display.max_columns", 100)
pd.set_option("display.width", 900)
pd.set_option("display.max_colwidth", 200)

In [None]:
# define some variables to be used throughout the notebook
db1 = "familyman_live_v4" #database where Familyman data is stored
db2 = "fcsq" #database where tables created as part of FCSQ processing are stored where required
#snapshot dates and publication period are set in the main run file. However, if running this notebook independently you will need to set them here
#snapshot_date = "2022-08-04"
#pub_year = 2011
#pub_qtr = 4

In [None]:
folder_link = "s3://alpha-family-data/CSVs/Public_Law_SDP"


In [None]:
# pydb.delete_table_and_data(database="__temp__", table="ca_ords_lookup")
# pydb.delete_table_and_data(database="__temp__", table="ca_apps_breakdown")
# pydb.delete_table_and_data(database="__temp__", table="ca_cases_child")
# pydb.delete_table_and_data(database="__temp__", table="ca_ords_breakdown_orders")
# pydb.delete_table_and_data(database="__temp__", table="ca_apps_lookup")
# pydb.delete_table_and_data(database="__temp__", table="ca_ords_breakdown_withdrawn")

In [None]:
# pydb.delete_table_and_data(database="__temp__", table="ca_ords_join")
# pydb.delete_table_and_data(database="__temp__", table="ca_ords_withdrawn_join")
# pydb.delete_table_and_data(database="__temp__", table="ca_apps_join")
# pydb.delete_table_and_data(database="__temp__", table="pub_child_act_csv")

In [None]:
#pydb.delete_table_and_data(database="__temp__", table="ca_ords_breakdown_withdrawn")
#pydb.delete_table_and_data(database="__temp__", table="ca_ords_breakdown_orders")


In [None]:
# Loading all the parts taken from SDP and making them temporary tables

ca_apps_breakdown = pd.read_csv(f"""{folder_link}/ca_apps_breakdown.csv""", keep_default_na = False, na_values = ['', 'NULL'])
ca_apps_breakdown.columns = ca_apps_breakdown.columns.str.lower()
ca_apps_breakdown = ca_apps_breakdown.astype({'qtr': 'str'})
pydb.dataframe_to_temp_table(ca_apps_breakdown, "ca_apps_breakdown")

ca_cases_child = pd.read_csv(f"""{folder_link}/ca_cases_child.csv""", keep_default_na = False, na_values = ['', 'NULL'])
ca_cases_child.columns = ca_cases_child.columns.str.lower()
#ca_cases_child = ca_cases_child.astype({'year': 'int'})                                           
pydb.dataframe_to_temp_table(ca_cases_child, "ca_cases_child")


ca_apps_lookup = pd.read_csv(f"""{folder_link}/ca_apps_lookup.csv""")
pydb.dataframe_to_temp_table(ca_apps_lookup, "ca_apps_lookup")

ca_ords_lookup = pd.read_csv(f"""{folder_link}/ca_ords_lookup.csv""")
pydb.dataframe_to_temp_table(ca_ords_lookup, "ca_ords_lookup")



In [None]:
#Splitting withdrawn and other orders apart here and will recombine later
ca_ords_breakdown = pd.read_csv(f"""{folder_link}/ca_ords_breakdown.csv""", keep_default_na = False, na_values = ['', 'NULL'])
ca_ords_breakdown.columns = ca_ords_breakdown.columns.str.lower()
ca_ords_breakdown = ca_ords_breakdown.astype({'qtr': 'str'})

ca_ords_breakdown_orders = ca_ords_breakdown.loc[ca_ords_breakdown['disposal_type'] != 'Withdrawn', :]
ca_ords_breakdown_withdrawn = ca_ords_breakdown.loc[ca_ords_breakdown['disposal_type'] == 'Withdrawn', :]
#pydb.dataframe_to_temp_table(ca_ords_breakdown, "ca_ords_breakdown")

In [None]:
# Putting them together as temporary tables
pydb.dataframe_to_temp_table(ca_ords_breakdown_orders, "ca_ords_breakdown_orders")
pydb.dataframe_to_temp_table(ca_ords_breakdown_withdrawn, "ca_ords_breakdown_withdrawn")


In [None]:
#Joining application breakdown to application lookup
pydb.create_temp_table(
f"""
SELECT
t1.Year,
t1.Qtr,
t1.Type,
t1.count_type,
t1.Public_private,
t1.Disposal_type,
t2.order_desc as Order_type,
t2.order_type_code,
t1.Gender,
t1.age_band,
t1.Applicants_in_case,
t1.Respondents_in_case,
t1.HC_Indicator,
SUM(t1.Count) as count


FROM __temp__.ca_apps_breakdown t1
LEFT JOIN __temp__.ca_apps_lookup t2
ON t1.order_type = t2.order_type

GROUP BY
t1.Year,
t1.Qtr,
t1.Type,
t1.count_type,
t1.Public_private,
t1.Disposal_type,
t2.order_desc,
t2.order_type_code,
t1.Gender,
t1.age_band,
t1.Applicants_in_case,
t1.Respondents_in_case,
t1.HC_Indicator

ORDER BY
t1.Year,
t1.Qtr,
t1.Type,
t1.count_type,
t1.Public_private,
t1.Disposal_type,
t2.order_desc,
t2.order_type_code,
t1.Gender,
t1.age_band,
t1.Applicants_in_case,
t1.Respondents_in_case,
t1.HC_Indicator

""",
"ca_apps_join")

In [None]:
#pydb.read_sql_query('SELECT * FROM __temp__.ca_apps_join')

In [None]:
#Joining orders breakdown to orders lookup
pydb.create_temp_table(
f"""
SELECT
t1.Year,
t1.Qtr,
t1.Type,
t1.count_type,
t1.Public_private,
t1.Disposal_type,
t2.order_desc as Order_type,
CASE WHEN t1.Disposal_type IN ('Order made', 'Interim Order') THEN t2.order_type_code
ELSE NULL END AS order_type_code,
t1.Gender,
t1.age_band,
t1.Applicants_in_case,
t1.Respondents_in_case,
t1.HC_Indicator,
SUM(t1.Count) as count


FROM __temp__.ca_ords_breakdown_orders t1
LEFT JOIN __temp__.ca_ords_lookup t2
ON t1.OrderMadeTypeKey = t2.OrderTypeKey

GROUP BY
t1.Year,
t1.Qtr,
t1.Type,
t1.count_type,
t1.Public_private,
t1.Disposal_type,
t2.order_desc,
CASE WHEN t1.Disposal_type IN ('Order made', 'Interim Order') THEN t2.order_type_code
ELSE NULL END,
t1.Gender,
t1.age_band,
t1.Applicants_in_case,
t1.Respondents_in_case,
t1.HC_Indicator

ORDER BY
t1.Year,
t1.Qtr,
t1.Type,
t1.count_type,
t1.Public_private,
t1.Disposal_type,
t2.order_desc,
CASE WHEN t1.Disposal_type IN ('Order made', 'Interim Order') THEN t2.order_type_code
ELSE NULL END,
t1.Gender,
t1.age_band,
t1.Applicants_in_case,
t1.Respondents_in_case,
t1.HC_Indicator

""",
"ca_ords_join")

In [None]:
#pydb.read_sql_query('SELECT * FROM __temp__.ca_ords_join')

In [None]:
#Joining withdrawn orders breakdown to application lookup
pydb.create_temp_table(
f"""
SELECT
t1.Year,
t1.Qtr,
t1.Type,
t1.count_type,
t1.Public_private,
t1.Disposal_type,
t2.order_desc as Order_type,
CAST(NULL AS INT) as order_type_code,
t1.Gender,
t1.age_band,
t1.Applicants_in_case,
t1.Respondents_in_case,
t1.HC_Indicator,
SUM(t1.Count) as count


FROM __temp__.ca_ords_breakdown_withdrawn t1
LEFT JOIN __temp__.ca_apps_lookup t2
ON t1.order_type = t2.order_type

GROUP BY
t1.Year,
t1.Qtr,
t1.Type,
t1.count_type,
t1.Public_private,
t1.Disposal_type,
t2.order_desc,
t1.Gender,
t1.age_band,
t1.Applicants_in_case,
t1.Respondents_in_case,
t1.HC_Indicator

ORDER BY
t1.Year,
t1.Qtr,
t1.Type,
t1.count_type,
t1.Public_private,
t1.Disposal_type,
t2.order_desc,
t1.Gender,
t1.age_band,
t1.Applicants_in_case,
t1.Respondents_in_case,
t1.HC_Indicator

""",
"ca_ords_withdrawn_join")

In [None]:
#pydb.read_sql_query('SELECT * FROM __temp__.ca_ords_withdrawn_join')

In [None]:
#Joining different segments together
pydb.create_temp_table(
f"""
SELECT * FROM __temp__.ca_apps_join
UNION ALL
SELECT * FROM __temp__.ca_ords_join
UNION ALL
SELECT * FROM __temp__.ca_ords_withdrawn_join
UNION ALL
SELECT * FROM __temp__.ca_cases_child


ORDER BY
Year,
Qtr,
Type,
count_type,
Public_private,
Disposal_type,
order_type_code


""",
"pub_child_act_csv")

In [None]:
#pydb.delete_table_and_data(database="__temp__", table="pub_child_act_csv")

In [None]:
pub_law_csv = pydb.read_sql_query("SELECT * FROM __temp__.pub_child_act_csv")

pub_law_csv.to_csv(path_or_buf = f"""{folder_link}/pub_law_csv_sdp.csv""", index = False)

In [None]:
pub_law_csv