## Children Act (Private Law) legal representation (table 11)

In [None]:
import os  # for file paths
import pandas as pd
import awswrangler as wr
import pydbtools as pydb  # see https://github.com/moj-analytical-services/pydbtools

# few things for viewing dataframes better
pd.set_option("display.max_columns", 100)
pd.set_option("display.width", 900)
pd.set_option("display.max_colwidth", 200)

#### Defining variables

In [None]:
# define some variables to be used throughout the notebook
db1 = "familyman_dev_v3" #database where Familyman data is stored
snapshot_date = "2022-09-06" #To update where necessary
db2 = "fcsq" #database where tables created as part of FCSQ processing are stored where required

In [None]:
# create path for within athena FCSQ database in the S3 folder
fcsq_db_path = f"s3://alpha-family-data/fcsq_processing/CA_apps/"

### Extration of hearing data

In [None]:
#Extrating hearings data for private law - the event models used are those used from the previous SAS process
#Some fields not really required but pulling them in as they can be useful to see what hearing data might be held (eg. hearing type)
pydb.create_temp_table( 
f"""
SELECT 
  h.event,
  h.vacated_flag,
  h.hearing_type,
  h.hearing_date,
  e.receipt_date,
  e.case_number,
  e.event_model
FROM 
  {db1}.hearings h
  LEFT JOIN {db1}.events e 
    ON h.event = e.event
WHERE 
  h.vacated_flag IS NULL
  AND e.error = 'N'
  AND SUBSTR(case_number,5,1) = 'P'
  AND event_model IN ('C6', 'G61')
  AND e.mojap_snapshot_date = DATE'{snapshot_date}'
  AND h.mojap_snapshot_date = DATE'{snapshot_date}'
""",

"private_law_hearings")

### Adding hearing data to case starts

In [None]:
#Adding a flag to cases to show which ones have had a hearing
#The starts table is created during the main child applications processing
pydb.create_temp_table( 
f"""
SELECT 
  year,
  quarter,
  case_number,
  main_case_type,
  CASE WHEN case_number IN (SELECT case_number 
                            FROM __temp__.private_law_hearings)
        THEN 'Y' 
        ELSE 'N' END 
    AS hearing
FROM 
  {db2}.ca_case_starts
WHERE
  main_case_type = 'P'

""",

"case_starts_hearings")

### Applicant and respondent data for cases with a hearing

In [None]:
#Appending applicant and respondent data and just keeping those in cases with a hearing
#The applicant and respondent data sets are created during the main child applications processing
#Data here is at applicant/respondent level and not case level
pydb.create_temp_table( 
f"""
WITH case_parties AS (

SELECT 
  'Applicant' AS party,
  year,
  quarter,
  case_number,
  main_case_type, 
  role_id,
  gender_desc AS gender,
  representation     
FROM 
  {db2}.ca_applicants 
  
UNION ALL
SELECT 
  'Respondent' AS party,
  year,
  quarter,
  case_number,
  main_case_type, 
  role_id,
  gender_desc AS gender,
  representation    
FROM 
  {db2}.ca_respondents 
                  
    )

SELECT
  *
FROM
  case_parties
WHERE
  case_number IN (SELECT case_number 
                  FROM __temp__.case_starts_hearings
                  WHERE hearing = 'Y')
  
""",

"party_hearings")

### Aggregating data for csv

In [None]:
#Aggregating party data
pydb.create_temp_table( 
f"""
SELECT 
  'Private Law' AS case_type,
  Year,
  Quarter,
  'Party' AS Category,
  party,
  gender,
  representation,
  Count (*) AS Count
FROM 
  __temp__.party_hearings
GROUP BY
  Year,
  Quarter,
  party,
  gender,
  representation;
""",

"party_counts")

In [None]:
#Aggregated case starts data
pydb.create_temp_table( 
f"""
SELECT
  'Private Law' AS case_type,
  Year,
  Quarter,
  'Cases' AS Category,
  CAST (null AS VARCHAR) AS party,
  CAST (null AS VARCHAR) AS Gender,
  CAST (null AS VARCHAR) AS Representation,
  Count (*) AS Count
FROM
  __temp__.case_starts_hearings
GROUP BY
  Year,
  Quarter
  
""",

"case_start_counts")

In [None]:
#Aggregated case starts with a hearing
pydb.create_temp_table( 
f"""
SELECT
  'Private Law' AS case_type,
  Year,
  Quarter,
  'Cases with a hearing' AS Category,
  CAST (null AS VARCHAR) AS party,
  CAST (null AS VARCHAR) AS Gender,
  CAST (null AS VARCHAR) AS Representation,
  Count (*) AS Count
FROM
  __temp__.case_starts_hearings
WHERE
  hearing = 'Y'
GROUP BY
  Year,
  Quarter
  
""",

"case_starts_with_hearing_counts")

In [None]:
#Appending all aggregated data sets to create the final csv
pydb.create_temp_table( 
f"""
WITH all_agg_data AS (
SELECT
  *
FROM
  __temp__.party_counts
UNION ALL
SELECT
  *
FROM
  __temp__.case_start_counts
UNION ALL
SELECT
  *
FROM
  __temp__.case_starts_with_hearing_counts
  
  )

SELECT
  *
FROM 
  all_agg_data
WHERE
  year > 2010
  
""",

"private_law_leg_rep_csv")

### Lookup data

In [None]:
#Annual lookup data
pydb.create_temp_table( 
f"""
SELECT
  'Private Law|'||CAST(Year AS VARCHAR)||'|' as lookup,
  SUM(CASE WHEN category = 'Cases' THEN count END)
   AS cases,
  SUM(CASE WHEN category = 'Cases with a hearing' THEN count END)
   AS cases_hearing,
  SUM(CASE WHEN party = 'Applicant' and representation = 'Y' THEN count END)
   AS app_rep,
  SUM(CASE WHEN party = 'Applicant' and representation = 'N' THEN count END)
   AS app_unrep,
  SUM(CASE WHEN party = 'Respondent' and representation = 'Y' THEN count END)
   AS res_rep,
  SUM(CASE WHEN party = 'Respondent' and representation = 'N' THEN count END)
   AS res_unrep,
  SUM(CASE WHEN party IN ('Applicant','Respondent') THEN count END)
   AS total_parties
FROM
  __temp__.private_law_leg_rep_csv
GROUP BY
  year

UNION ALL

SELECT
  'Private Law|'||CAST(Year AS VARCHAR)||'|Q'||CAST(Quarter AS VARCHAR) AS lookup,
  SUM(CASE WHEN category = 'Cases' THEN count END)
   AS cases,
  SUM(CASE WHEN category = 'Cases with a hearing' THEN count END)
   AS cases_hearing,
  SUM(CASE WHEN party = 'Applicant' and representation = 'Y' THEN count END)
   AS app_rep,
  SUM(CASE WHEN party = 'Applicant' and representation = 'N' THEN count END)
   AS app_unrep,
  SUM(CASE WHEN party = 'Respondent' and representation = 'Y' THEN count END)
   AS res_rep,
  SUM(CASE WHEN party = 'Respondent' and representation = 'N' THEN count END)
   AS res_unrep,
  SUM(CASE WHEN party IN ('Applicant','Respondent') THEN count END)
   AS total_parties
FROM
  __temp__.private_law_leg_rep_csv
GROUP BY
  year,
  quarter

  
""",

"private_law_leg_rep_lookup")

##### Export csv to S3

In [None]:
pri_leg_rep_csv_data = pydb.read_sql_query ("select * from __temp__.private_law_leg_rep_csv")

In [None]:
pri_leg_rep_csv_df = pd.DataFrame(pri_leg_rep_csv_data)

In [None]:
pri_leg_rep_csv_df.to_csv (r's3://alpha-family-data/fcsq_processing/children_act_temporary_outputs/private_law_leg_rep_csv.csv',header = True)

##### Export lookup to S3 

In [None]:
pri_leg_rep_lookup_data = pydb.read_sql_query ("select * from __temp__.private_law_leg_rep_lookup")

In [None]:
pri_leg_rep_lookup_df = pd.DataFrame(pri_leg_rep_lookup_data)

In [None]:
pri_leg_rep_lookup_df.to_csv (r's3://alpha-family-data/fcsq_processing/children_act_temporary_outputs/private_law_leg_rep_lookup.csv',header = True)