In [None]:
# libraries
import pandas as pd
import pydbtools as pydb
import boto3
from datetime import datetime
#import re
import os  # for file paths
import awswrangler as wr

In [None]:
# Structured  dataframes
pd.set_option("display.max_columns", 100)
pd.set_option("display.width", 900)
pd.set_option("display.max_colwidth", 200)

##### Assigning key variables

In [None]:
# define some variables to be used throughout the notebook
db1 = "familyman_derived_dev_v2" #database where Familyman data is stored
db2 = "familyman_dev_v2"
snapshot_date = "2022-05-23" #To update where necessary
db3 = "fcsq" #database where tables created as part of FCSQ processing are stored where required

In [None]:
pydb.create_temp_table(
f""" 
SELECT 
  case_number,
  CAST(value AS Date) AS case_DOI
FROM 
  {db2}.case_fields AS f
 
WHERE
  field_model = 'FM2C_DI'
  AND mojap_snapshot_date = DATE'{snapshot_date}'

""",

"DOI")

In [None]:
pydb.create_temp_table(
f""" 
SELECT 
  t1.year,
  t1.quarter,
  t1.case_number,
  t1.receipt_date,
  t1.event,
  t1.event_model,
  t1.order_type,
  t1.order_code,
  t1.order_desc,
  t1.child_role_id,
  t1.order_case_type,
  t2.case_DOI,
  CASE WHEN  t2.case_DOI is null
        THEN t1.receipt_date
       WHEN t1.event_model = 'U22' AND (t2.Case_DOI<t1.Receipt_date)
        THEN t2.Case_DOI
        ELSE t1.Receipt_date END
      AS Start_date
FROM 
  __temp__.ca_apps_child_d AS t1
      LEFT JOIN __temp__.DOI AS t2 
       ON t1.case_number = t2.case_number
WHERE
  t1.order_code IN (1,4,14,25,27,29,30,31,32)

""",

"child_doi")

In [None]:
pydb.create_temp_table(
f""" 
WITH app_rank AS (

SELECT 
  *,
  RANK() OVER(PARTITION BY case_number, child_role_id
                       ORDER BY start_date) 
      AS app_rank
FROM 
  __temp__.child_doi
)

SELECT
  *
FROM
  app_rank
WHERE
  app_rank = 1

""",

"earliest_app")

In [None]:
#copy of code above but tweak to how the rank is calculated
#This essentially gets rid of where the same order is given to the same child within the same case - based on current FCSQ procedure but LA believes this might not be what is intended (see next comment)
#This takes all apps regardless of start date but LA believes the SAS process is trying to only keep the first application within the case (see query 3.3 in the SAS Timeliness part 1 child act code)

pydb.create_temp_table(
f""" 
WITH app_rank AS (

SELECT 
  *,
  RANK() OVER(PARTITION BY case_number, child_role_id, order_code) 
      AS app_rank
FROM 
  __temp__.child_doi
)

SELECT
  *
FROM
  app_rank
WHERE
  app_rank = 1

""",

"earliest_app")

In [None]:
pydb.read_sql_query("select * from __temp__.earliest_app limit 10")

In [None]:
pydb.create_temp_table(
f""" 
SELECT 
  a.*,
  d.disp_date,
  date_diff('day',a.start_date, d.disp_date) AS days,
  date_diff('week',a.start_date, d.disp_date) AS weeks,
  ROW_NUMBER() OVER(PARTITION BY a.case_number, a.child_role_id, a.order_code
                       ORDER BY d.disp_date) 
      AS disp_rank   
FROM
  __temp__.earliest_app a
  LEFT JOIN __temp__.ca_disps_all_children_d d
    ON a.case_number = d.case_number
    AND a.child_role_id = d.child_role_id
WHERE 
  date_diff('day',a.start_date, d.disp_date) >= 0
  AND d.event_model NOT IN ('CPA','C21','C27','C30','C31','C33','C35B','C44A','C44B','C46A','C46B','C47A','C47C','C48A', 
                          'C48B','C48C','C49','D51','D84C','MAGEPO','MAGS37')
  AND d.disp_type_code <> 2

""",

"app_disp_match")

In [None]:
 pydb.create_temp_table(
f""" 
 SELECT
   YEAR(disp_date) AS year,
   count(case_number) AS disposals,
   AVG(days)/7 as av_weeks
 FROM
   __temp__.app_disp_match
 WHERE 
   order_case_type = 'P'
   and YEAR(disp_date) > 2014
   and disp_rank = 1
GROUP BY
  YEAR(disp_date)
""",

"avg_time")

In [None]:
 pydb.create_temp_table( 
f"""
select A.*,  
  YEAR(disp_date) AS disp_year, 
  QUARTER(disp_date) AS disp_qtr 
from 
  __temp__.app_disp_match as A 
where 
  YEAR(disp_date) = 2019 
  and QUARTER(disp_date) = 1
  and order_case_type = 'P'
   and disp_rank = 1
""",
     
"qtr_time")

In [None]:
data = pydb.read_sql_query ("select * from __temp__.qtr_time")

In [None]:
df = pd.DataFrame(data)

In [None]:
df.to_csv (r's3://alpha-family-data/fcsq_processing/CA_disps/test_csv/t10_checks.csv', header = True)

In [None]:
df.to_csv ('Exports/t10_checks.csv',header = True)

In [None]:
pydb.read_sql_query ("select * from __temp__.avg_time order by year")

In [None]:
pydb.read_sql_query ("select * from __temp__.app_disp_match where case_number = 'BH18P00002' and disp_rank = 1")

In [None]:
pydb.read_sql_query ("select * from __temp__.app_disp_match where case_number = 'BH18P00002'")

In [None]:
pydb.read_sql_query ("select * from __temp__.ca_disps_all_children_d where case_number = 'BH18P00002'")

In [None]:
pydb.read_sql_query ("select * from __temp__.ca_apps_child_d where case_number = 'AF18P00095'")

In [None]:
pydb.read_sql_query ("select * from __temp__.app_disp_match where case_number = 'BD19P00174'")