## DFJ file (Children Act)

### This code takes information produced from the application and disposals processes and summarises at DFJ level. The 4 areas it summarises are:
1.  Orders applied for
2.    Case starts
3.    Orders made
4.    Cases closes

#### Import packages

In [None]:
# libraries
import pandas as pd
import pydbtools as pydb
import os  # for file paths
import awswrangler as wr

In [None]:
# Structured  dataframes
pd.set_option("display.max_columns", 100)
pd.set_option("display.width", 900)
pd.set_option("display.max_colwidth", 200)

#### Assigning key variables

In [None]:
db1 = "familyman_dev_v3" # This is the database where the familyman data is stored on the platform
fcsq_db = "fcsq" # This database is used for FCCSQ processing
# create path for within athena FCSQ database in the S3 folder, alongside other S3 items
fcsq_db_path = f"s3://alpha-family-data/fcsq_processing/CA_disps/"

#### Orders applied for

In [None]:
#Fetching the data from the orders applied for dataset created in the main children act application processing
print("aggregating orders applied for by dfj....")
pydb.create_temp_table(
f""" 
SELECT 
  CASE WHEN order_case_type = 'P'
        THEN 'Private Law'
      WHEN order_case_type = 'C'
        THEN 'Public Law'
      ELSE 'check' END
    AS Category,
  'Start' AS stage,  
  year,
  quarter,  
  app_dfj AS dfj,
  app_region AS region,  
  COUNT(*) AS Count
FROM
  {fcsq_db}.ca_apps_order_types
WHERE
  year > 2010
GROUP BY
  year,
  quarter,
  order_case_type,
  app_dfj,
  app_region
  
""",
    
"ca_dfj_orders_applied_for")

#### Case starts

In [None]:
#Fetching the data from the case starts dataset created in the main children act application processing
print("aggregating case starts by dfj....")
pydb.create_temp_table(
f""" 
SELECT 
  CASE WHEN main_case_type = 'P'
        THEN 'Private Law'
      WHEN main_case_type = 'C'
        THEN 'Public Law' END
    AS Category,
  'Start' AS stage,  
  year,
  quarter,  
  dfj,
  region,  
  COUNT(*) AS Cases
FROM
  {fcsq_db}.ca_case_starts
WHERE
  year > 2010  
  AND main_case_type IN ('P','C')
GROUP BY
  year,
  quarter,
  main_case_type,
  dfj,
  region
  
""",
    
"ca_dfj_case_starts")

##### Combining order and start counts

In [None]:
#Create a the starts dataset - combining the order counts from the apps table to the cases counts from the starts table
print("combining orders applied for and case starts....")
pydb.create_temp_table(
f""" 
SELECT 
  CASE WHEN a.category IS NULL
        THEN c.category
        ELSE a.category END
      AS category,
  CASE WHEN a.stage IS NULL
        THEN c.stage
        ELSE a.stage END
      AS stage,
  CASE WHEN a.year IS NULL
        THEN c.year
        ELSE a.year END
      AS year,
  CASE WHEN a.quarter IS NULL
        THEN c.quarter
        ELSE a.quarter END
      AS quarter,
  CASE WHEN a.dfj IS NULL
        THEN c.dfj
        ELSE a.dfj END
      AS dfj,
  CASE WHEN a.region IS NULL
        THEN c.region
        ELSE a.region END
      AS region,        
  a.count,
  c.cases
FROM
  __temp__.ca_dfj_orders_applied_for a
  FULL JOIN __temp__.ca_dfj_case_starts c
    ON a.category = c.category
    AND a.stage = c.stage  
    AND a.year = c.year
    AND a.quarter = c.quarter  
    AND a.dfj = c.dfj  
""",
    
"ca_dfj_starts")

#### Orders made

In [None]:
#Fetching the data from the child disposals data created in the main children act disposals processing
print("aggregating orders made by dfj....")
pydb.create_temp_table(
f""" 
SELECT 
  CASE WHEN order_case_type = 'P'
        THEN 'Private Law'
      WHEN order_case_type = 'C'
        THEN 'Public Law'
      ELSE 'check' END
    AS Category,
  'End' AS stage,  
  year,
  quarter,  
  disposal_dfj AS dfj,
  disposal_region AS region,  
  COUNT(*) AS count
FROM
  {fcsq_db}.ca_all_disposals
WHERE
  disp_type_code = 1
  AND year > 2010
GROUP BY
  year,
  quarter,
  order_case_type,
  disposal_dfj,
  disposal_region
  
""",
    
"ca_dfj_orders_made")

#### Cases closed

In [None]:
#Fetching the data from the child cases data created in the main children act disposals processing
print("aggregating cases closed by dfj....")
pydb.create_temp_table(
f""" 
SELECT
  CASE WHEN case_type = 'P'
        THEN 'Private Law'
      WHEN case_type = 'C'
        THEN 'Public Law' END
    AS Category,
  'End' AS stage,
  year,
  quarter,  
  case_closed_dfj AS dfj,
  case_closed_region AS region,  
  COUNT(*) AS cases
FROM
  {fcsq_db}.ca_closed_cases
WHERE
  year > 2010 
  AND case_type IN ('P','C')
GROUP BY
  year,
  quarter,
  case_type,
  case_closed_dfj,
  case_closed_region
  
""",
    
"ca_dfj_case_closed")

##### Combining order and case closed counts

In [None]:
#Create a the ends dataset - combining the order counts from the disps table to the cases counts from the closed table
print("combining orders made and cases closed....")
pydb.create_temp_table(
f""" 
SELECT 
  CASE WHEN o.category IS NULL
        THEN c.category
        ELSE o.category END
      AS category,
  CASE WHEN o.stage IS NULL
        THEN c.stage
        ELSE o.stage END
      AS stage,
  CASE WHEN o.year IS NULL
        THEN c.year
        ELSE o.year END
      AS year,
  CASE WHEN o.quarter IS NULL
        THEN c.quarter
        ELSE o.quarter END
      AS quarter,
  CASE WHEN o.dfj IS NULL
        THEN c.dfj
        ELSE o.dfj END
      AS dfj,
  CASE WHEN o.region IS NULL
        THEN c.region
        ELSE o.region END
      AS region,   
  o.count,
  c.cases
FROM
  __temp__.ca_dfj_orders_made o
  FULL JOIN __temp__.ca_dfj_case_closed c
    ON o.category = c.category
    AND o.stage = c.stage  
    AND o.year = c.year
    AND o.quarter = c.quarter  
    AND o.dfj = c.dfj  
""",
    
"ca_dfj_ends")

#### Combining start and end data

In [None]:
#Appending application/starts and orders made/closed dfj data together
print("combining start and end data....")
pydb.create_temp_table(
f""" 
SELECT 
  *
FROM
  __temp__.ca_dfj_starts
UNION ALL
SELECT 
  *
FROM
  __temp__.ca_dfj_ends
  
""",
    
"ca_dfj")

##### Converting to dataframe and exporting to S3

In [None]:
dfj_data = pydb.read_sql_query ("select * from __temp__.ca_dfj")

In [None]:
dfj_df = pd.DataFrame(dfj_data)

In [None]:
print("exporting csv to S3....")
dfj_df.to_csv (r's3://alpha-family-data/fcsq_processing/children_act_temporary_outputs/ca_dfj.csv', header = True)