In [59]:
##nodejs:  https://docs.aws.amazon.com/sdk-for-javascript/v2/developer-guide/setting-up-node-on-ec2-instance.html

# !pip install "jupyterlab>=3" "ipywidgets>=7.6" --user
# !pip install jupyter-dash --user
# !jupyter lab build --user

# !pip install snowflake --user
# !pip install snowflake-connector-python --user
import os
import sys
path=!pwd
sys.path.append(os.path.join(path[0], '..'))
from utils import *
import snowflake.connector

class SnowflakeConnector(BaseConnector):
    def __init__(self, credentials: Credentials):
        keys = credentials.get_keys()
        self._secrets = json.loads(keys.get('SecretString', "{}"))

    def connect(self, dbname: str, schema: str = 'DEFAULT'):
        ctx = snowflake.connector.connect(
            user=self._secrets['login_name'],
            password=self._secrets['login_password'],
            account=self._secrets['account'],
            warehouse=self._secrets['warehouse'],
            database=dbname,
            schema=schema
        )

        return ctx
    
## Credentials
SF_CREDS = 'datascience-max-dev-sagemaker-notebooks'

## Snowflake connection 
conn=SnowflakeConnector(SSMPSCredentials(SF_CREDS))
ctx=conn.connect("MAX_PROD","DATASCIENCE_STAGE")
cur = ctx.cursor()

## Get Actuals

In [None]:
print(1)

In [None]:
querystr = '''
set val_date = to_date(convert_timezone('America/Los_Angeles','2021-10-25'));
--Step 4: Gather past metrics and create the basic heuristic forecast, plus median metrics tables
create or replace table max_dev.workspace.psi_past_base_full as (
with assets as (
select distinct
      a.title_id
    , coalesce(a.season_number,0) as season_number
    , a.viewable_id
    , title_name
    , first_offered_date::date as asset_max_premiere
    , end_utc_max::date as asset_max_end_dt
    , coalesce(raod.season_first_offered_date::date,raod.title_first_offered_date::date) as season_premiere
    , asset_run_time
    , a.content_category
    , episode_number_in_season
    , content_source
    , program_type
    , category
    , tier
    , viewership_start_date as effective_start_date
    , viewership_end_date as effective_end_date
from max_prod.catalog.reporting_asset_dim a
join max_prod.catalog.reporting_asset_offering_dim raod
on a.viewable_id = raod.viewable_id
and brand = 'HBO MAX'
and territory = 'HBO MAX DOMESTIC'
and channel = 'HBO MAX SUBSCRIPTION'
inner join max_prod.content_analytics.psi_past_title_metadata b
on a.title_id = b.viewership_title_id
and coalesce(a.season_number,0) = coalesce(b.viewership_season_number,0)
where 1 = 1
and asset_type IN ('FEATURE','ELEMENT')
and start_utc_max is not null
and a.content_category in ('movies','series','special')
and coalesce(raod.season_first_offered_date,raod.title_first_offered_date)  >= '2020-05-27 07:01:00.000'
order by season_premiere, title_name-- desc
)
, fv as (
    select
          b.title_id
        , b.title_name
        , b.season_number
        , b.content_category
        , b.category
        , tier
        , request_time_gmt::date as request_date
        , count(distinct concat(hbo_uuid, subscription_id)) as first_views
    from MAX_PROD.BI_ANALYTICS.SUBSCRIPTION_FIRST_CONTENT_WATCHED a
    inner join assets b
        on a.viewable_id = b.viewable_id
        --and request_time_gmt::date between season_premiere_date and dateadd('day',90,season_premiere_date)
        --and season_premiere_date >= '2020-05-27 07:00:01'
    where 1 = 1
    and request_time_gmt::date between asset_max_premiere and asset_max_end_dt
    and request_time_gmt::date between effective_start_date and effective_end_date
    and request_time_gmt::date < dateadd('days',-1,$val_date)
    and country_iso_code in ('US','PR','GU')
    group by 1,2,3,4,5,6,7
    --order by 2,4
)
, hv as (
    select
          b.title_id
        , b.title_name
        , b.season_number
        , b.content_category
        , b.category
        , tier
        , request_time_gmt::date as request_date
        , coalesce(round(sum(stream_elapsed_play_seconds)/3600,3), 0) as hours_viewed
    from max_prod.viewership.max_user_stream_heartbeat a
    inner join assets b
        on a.viewable_id = b.viewable_id
    where 1 = 1
    and stream_elapsed_play_seconds >= 120
    and request_time_gmt > '2020-05-27 07:00:00'
    and request_time_gmt::date between asset_max_premiere and asset_max_end_dt
    and request_time_gmt::date between effective_start_date and effective_end_date
    and request_time_gmt::date < dateadd('days',-1,$val_date)
    group by 1,2,3,4,5,6,7
)
, dates as (
    select distinct
          rs.title_id
        , rs.title_name
        , rs.season_number
        , rs.content_category
        , rs.content_source
        , rs.program_type
        , rs.category
        , rs.tier
        --, rs.season_premiere
        , rs.effective_start_date
        , request_date
        , case when request_date::date = effective_start_date::date then 1 else 0 end as premiere_ind
        , count(distinct case when request_date::date = asset_max_premiere::date then viewable_id else null end) as asset_premiere_count
        , round(sum(distinct case when request_date::date = asset_max_premiere::date then asset_run_time else 0 end)/3600,3) as premiering_hours_runtime
    from assets rs
    cross join (
        select distinct seq_date as request_date 
        from max_prod.staging.date_range 
        where seq_date < '2024-12-31'::date
    ) rd
    where rd.request_date between 
    coalesce(rs.effective_start_date,rs.season_premiere,rs.asset_max_premiere) 
    and dateadd('days',90,coalesce(rs.effective_start_date,rs.season_premiere,rs.asset_max_premiere))
      and rd.request_date between effective_start_date and effective_end_date
    group by 1,2,3,4,5,6,7,8,9,10,11
    order by 2,3,8
)
    select dt.*
        , coalesce(first_views,0) as first_views
        , coalesce(hours_viewed,0) as hours_viewed
        , dt.request_date - effective_start_date as days_since_premiere
        , $val_date - effective_start_date -1 as days_on_platform
        , case when $val_date - effective_start_date - 1 >=
            case when dt.category = 'Popcorn' and year(effective_start_date) < 2022 then 31 else 90 end
        then 1 else 0 end as finished_window_flag
    from dates dt
    left join hv
    on dt.title_id = hv.title_id
    and dt.season_number = hv.season_number
    and dt.request_date = hv.request_date
    and dt.content_category = hv.content_category
    and dt.category = hv.category
    and dt.tier = hv.tier
    left join fv
    on dt.title_id = fv.title_id
    and dt.season_number = fv.season_number
    and dt.request_date = fv.request_date
    and dt.content_category = fv.content_category
    and dt.category = fv.category
    and dt.tier = fv.tier
    where 1 = 1
    --and dt.title_name like 'In Treatment'
    order by title_id, title_name, season_number, category, request_date
);
select
* 
from max_dev.workspace.psi_past_base_full
'''

cursor_list = ctx.execute_string(
    querystr
    )
df = pd.DataFrame.from_records(cursor_list[-1].fetchall(), columns=[x[0] for x in cursor_list[-1].description])
df.columns= df.columns.str.lower()
display(df.head())
df.to_csv('s3://datascience-hbo-users/users/tjung/psi/fv_actual_1025.csv')


## Get total prediction

In [16]:
querystr='''
-- s0
set val_date = to_date(convert_timezone('America/Los_Angeles','2021-01-01'));
-- s1 Step 4: Gather past metrics and create the basic heuristic forecast, plus median metrics tables
create or replace table max_dev.workspace.psi_past_base_temp as (
with assets as (
select distinct
      a.title_id
    , coalesce(a.season_number,0) as season_number
    , a.viewable_id
    , title_name
    , first_offered_date::date as asset_max_premiere
    , end_utc_max::date as asset_max_end_dt
    , coalesce(raod.season_first_offered_date::date,raod.title_first_offered_date::date) as season_premiere
    , asset_run_time
    , a.content_category
    , episode_number_in_season
    , content_source
    , program_type
    , category
    , tier
    , viewership_start_date as effective_start_date
    , viewership_end_date as effective_end_date
from max_prod.catalog.reporting_asset_dim a
join max_prod.catalog.reporting_asset_offering_dim raod
on a.viewable_id = raod.viewable_id
and brand = 'HBO MAX'
and territory = 'HBO MAX DOMESTIC'
and channel = 'HBO MAX SUBSCRIPTION'
inner join max_prod.content_analytics.psi_past_title_metadata b
on a.title_id = b.viewership_title_id
and coalesce(a.season_number,0) = coalesce(b.viewership_season_number,0)
where 1 = 1
and asset_type IN ('FEATURE','ELEMENT')
and start_utc_max is not null
and a.content_category in ('movies','series','special')
and coalesce(raod.season_first_offered_date,raod.title_first_offered_date)  >= '2020-05-27 07:01:00.000'
order by season_premiere, title_name-- desc
)
, hv as (
    select
          b.title_id
        , b.title_name
        , b.season_number
        , b.content_category
        , b.category
        , tier
        , request_time_gmt::date as request_date
        , coalesce(round(sum(stream_elapsed_play_seconds)/3600,3), 0) as hours_viewed
    from max_prod.viewership.max_user_stream_heartbeat a
    inner join assets b
        on a.viewable_id = b.viewable_id
    where 1 = 1
    and stream_elapsed_play_seconds >= 120
    and request_time_gmt > '2020-05-27 07:00:00'
    and request_time_gmt::date between asset_max_premiere and asset_max_end_dt
    and request_time_gmt::date between effective_start_date and effective_end_date
    and request_time_gmt::date < dateadd('days',-1,$val_date)
    group by 1,2,3,4,5,6,7
)
, fv as (
    select
          b.title_id
        , b.title_name
        , b.season_number
        , b.content_category
        , b.category
        , tier
        , request_time_gmt::date as request_date
        , count(distinct concat(hbo_uuid, subscription_id)) as first_views
    from MAX_PROD.BI_ANALYTICS.SUBSCRIPTION_FIRST_CONTENT_WATCHED a
    inner join assets b
        on a.viewable_id = b.viewable_id
        --and request_time_gmt::date between season_premiere_date and dateadd('day',90,season_premiere_date)
        --and season_premiere_date >= '2020-05-27 07:00:01'
    where 1 = 1
    and request_time_gmt::date between asset_max_premiere and asset_max_end_dt
    and request_time_gmt::date between effective_start_date and effective_end_date
    and request_time_gmt::date < dateadd('days',-1,$val_date)
    and country_iso_code in ('US','PR','GU')
    group by 1,2,3,4,5,6,7
    --order by 2,4
)
, dates as (
    select distinct
          rs.title_id
        , rs.title_name
        , rs.season_number
        , rs.content_category
        , rs.content_source
        , rs.program_type
        , rs.category
        , rs.tier
        --, rs.season_premiere
        , rs.effective_start_date
        , request_date
        , case when request_date::date = effective_start_date::date then 1 else 0 end as premiere_ind
        , count(distinct case when request_date::date = asset_max_premiere::date then viewable_id else null end) as asset_premiere_count
        , round(sum(distinct case when request_date::date = asset_max_premiere::date then asset_run_time else 0 end)/3600,3) as premiering_hours_runtime
    from assets rs
    cross join (
        select distinct seq_date as request_date 
        from max_prod.staging.date_range 
        where seq_date < '2024-12-31'::date
    ) rd
    where rd.request_date between 
    coalesce(rs.effective_start_date,rs.season_premiere,rs.asset_max_premiere) 
    and dateadd('days',90,coalesce(rs.effective_start_date,rs.season_premiere,rs.asset_max_premiere))
      and rd.request_date between effective_start_date and effective_end_date
    group by 1,2,3,4,5,6,7,8,9,10,11
    order by 2,3,8
)
    select dt.*
        , coalesce(first_views,0) as first_views
        , coalesce(hours_viewed,0) as hours_viewed
        , dt.request_date - effective_start_date as days_since_premiere
        , $val_date - effective_start_date -1 as days_on_platform
        , case when $val_date - effective_start_date - 1 >=
            case when dt.category = 'Popcorn' and year(effective_start_date) < 2022 then 90 else 90 end
        then 1 else 0 end as finished_window_flag
    from dates dt
    left join hv
    on dt.title_id = hv.title_id
    and dt.season_number = hv.season_number
    and dt.request_date = hv.request_date
    and dt.content_category = hv.content_category
    and dt.category = hv.category
    and dt.tier = hv.tier
    left join fv
    on dt.title_id = fv.title_id
    and dt.season_number = fv.season_number
    and dt.request_date = fv.request_date
    and dt.content_category = fv.content_category
    and dt.category = fv.category
    and dt.tier = fv.tier
    where 1 = 1
    --and dt.title_name like 'In Treatment'
    order by title_id, title_name, season_number, category, request_date
)
;

--s2
create or replace table max_dev.workspace.psi_median_decay as (
with title_totals as (
select
      title_id
    , title_name
    , season_number
    , content_category
    , category
    , tier
    , finished_window_flag
    , sum(first_views) as total_first_views
    , sum(hours_viewed) as total_hours_viewed
from max_dev.workspace.psi_past_base_temp
where effective_start_date <= $val_date
group by 1,2,3,4,5,6,7
)
, enriched_base as (
select
    base.*
    , div0(first_views,total_first_views) as first_views_pct
    , div0(hours_viewed,total_hours_viewed) as hours_viewed_pct
from max_dev.workspace.psi_past_base_temp as base
left join title_totals tt
on base.title_id = tt.title_id
and base.season_number = tt.season_number
and base.content_category = tt.content_category
and base.category = tt.category
and base.tier = tt.tier
and tt.finished_window_flag = 1
where 1 = 1
and total_first_views >= 0
or base.finished_window_flag = 0
)
, median_decay_pre as (
select
      category
    , days_since_premiere
    , median(hours_viewed_pct) as med_hours_viewed_pct
    , median(first_views_pct) as med_first_views_pct
from enriched_base
where finished_window_flag = 1
group by 1,2
order by 1,2
)
, median_decay_modifier as (
select
      category
    , sum(med_hours_viewed_pct) as med_hv_mod
    , sum(med_first_views_pct) as med_fv_mod
from median_decay_pre
group by 1
)
--, median_decay as (
select
      a.category
    , days_since_premiere
    , med_hours_viewed_pct/med_hv_mod as med_hours_viewed_pct
    , med_first_views_pct/med_fv_mod as med_first_views_pct
from median_decay_pre a
join median_decay_modifier b
on a.category = b.category
)
;

--s3
--create or replace temporary table max_dev.workspace.psi_past_current_daily_viewership as (
with current_running_assets as (
select
      title_id
    , title_name
    , season_number
    , content_category
    , category
    , tier
    , effective_start_date
    , max(days_since_premiere) days_so_far
    , sum(hours_viewed) as hv_so_far
    , sum(first_views) as fv_so_far
from max_dev.workspace.psi_past_base_temp
where 1 = 1
and request_date < dateadd('days',-1, $val_date::date)
and effective_start_date < dateadd('days',-4,$val_date::date)
and finished_window_flag = 0
group by 1,2,3,4,5,6,7
)
, current_running_assets_enriched as (
select
      title_id
    , title_name
    , season_number
    , content_category
    , a.category
    , a.tier
    , effective_start_date
    , days_so_far
    , hv_so_far
    , sum(med_hours_viewed_pct) hv_pct_so_far
    , fv_so_far
    , sum(med_first_views_pct) fv_pct_so_far
from current_running_assets a
join max_dev.workspace.psi_median_decay b
on case when a.category = 'Popcorn' and year(effective_start_date) >= 2022 then 'Scripted Features'
    else a. category end = b.category
and days_since_premiere <= days_so_far
group by 1,2,3,4,5,6,7,8,9,11
)
, current_running_assets_predicted_totals as (
select
      title_id
    , title_name
    , season_number
    , content_category
    , category
    , tier
    , effective_start_date
    , hv_so_far/hv_pct_so_far as predicted_total_hours_viewed
    , fv_so_far/fv_pct_so_far as predicted_total_first_views
from current_running_assets_enriched
)
select
      a.*
    , case when request_date < dateadd('days',-1,$val_date::date) then first_views
    else round(b.med_first_views_pct * c.predicted_total_first_views,0) end as predicted_first_views
    , case when request_date < dateadd('days',-1,$val_date::date) then hours_viewed
    else round(b.med_hours_viewed_pct * c.predicted_total_hours_viewed,3) end as predicted_hours_viewed
from max_dev.workspace.psi_past_base_temp a
left join max_dev.workspace.psi_median_decay b
on case when a.category = 'Popcorn' and year(effective_start_date) >= 2022 then 'Scripted Features'
    else a. category end = b.category
and a.days_since_premiere = b.days_since_premiere
left join current_running_assets_predicted_totals c
on a.title_id = c.title_id
and a.season_number = c.season_number
and a.content_category = c.content_category
and a.category = c.category
and a.tier =  c.tier
--where a.title_name like '%Snyder%'
;

'''


## Ask about tier 0 logic 
cursor_list = ctx.execute_string(
    querystr
    )
df = pd.DataFrame.from_records(cursor_list[-1].fetchall(), columns=[x[0] for x in cursor_list[-1].description])
df.columns= df.columns.str.lower()
print(df.head())
df.to_csv('s3://datascience-hbo-users/users/tjung/psi/fv_pred_0101.csv')

                title_id     title_name  season_number content_category  \
0  GXxmT_ww39aJrqAEAAAAM  A Hidden Life              0           movies   
1  GXxmT_ww39aJrqAEAAAAM  A Hidden Life              0           movies   
2  GXxmT_ww39aJrqAEAAAAM  A Hidden Life              0           movies   
3  GXxmT_ww39aJrqAEAAAAM  A Hidden Life              0           movies   
4  GXxmT_ww39aJrqAEAAAAM  A Hidden Life              0           movies   

  content_source program_type category tier effective_start_date request_date  \
0            HBO     acquired     Pay1    3           2020-09-01   2020-10-21   
1            HBO     acquired     Pay1    3           2020-09-01   2020-10-22   
2            HBO     acquired     Pay1    3           2020-09-01   2020-10-23   
3            HBO     acquired     Pay1    3           2020-09-01   2020-10-24   
4            HBO     acquired     Pay1    3           2020-09-01   2020-10-25   

   premiere_ind  asset_premiere_count premiering_hours_runtime

In [15]:
print(1)

1


## Get median prediction decayed

In [3]:
querystr='''
-- s0
set val_date = to_date(convert_timezone('America/Los_Angeles','2021-06-01'));

create or replace temporary table max_dev.workspace.psi_past_current_daily_viewership as (
with current_running_assets as (
select
      title_id
    , title_name
    , season_number
    , content_category
    , category
    , tier
    , effective_start_date
    , max(days_since_premiere) days_so_far
    , sum(hours_viewed) as hv_so_far
    , sum(first_views) as fv_so_far
from max_dev.workspace.psi_past_base
where 1 = 1
and request_date < dateadd('days',-1, $val_date::date)
and effective_start_date < dateadd('days',-4, $val_date::date)
and finished_window_flag = 0
group by 1,2,3,4,5,6,7
)
, current_running_assets_enriched as (
select
      title_id
    , title_name
    , season_number
    , content_category
    , a.category
    , a.tier
    , effective_start_date
    , days_so_far
    , hv_so_far
    , sum(med_hours_viewed_pct) hv_pct_so_far
    , fv_so_far
    , sum(med_first_views_pct) fv_pct_so_far
from current_running_assets a
join max_dev.workspace.psi_median_decay b
on case when a.category = 'Popcorn' and year(effective_start_date) >= 2022 then 'Scripted Features'
    else a. category end = b.category
and days_since_premiere <= days_so_far
group by 1,2,3,4,5,6,7,8,9,11
)
, current_running_assets_predicted_totals as (
select
      title_id
    , title_name
    , season_number
    , content_category
    , category
    , tier
    , effective_start_date
    , hv_so_far/hv_pct_so_far as predicted_total_hours_viewed
    , fv_so_far/fv_pct_so_far as predicted_total_first_views
from current_running_assets_enriched
)
--, past_current_daily_predictions as (
select
      a.*
    , case when request_date < dateadd('days',-1, $val_date::date) then first_views
    else round(b.med_first_views_pct * c.predicted_total_first_views,0) end as predicted_first_views
    , case when request_date < dateadd('days',-1, $val_date::date) then hours_viewed
    else round(b.med_hours_viewed_pct * c.predicted_total_hours_viewed,3) end as predicted_hours_viewed
from max_dev.workspace.psi_past_base_temp a
left join max_dev.workspace.psi_median_decay b
on case when a.category = 'Popcorn' and year(effective_start_date) >= 2022 then 'Scripted Features'
    else a. category end = b.category
and a.days_since_premiere = b.days_since_premiere
left join current_running_assets_predicted_totals c
on a.title_id = c.title_id
and a.season_number = c.season_number
and a.content_category = c.content_category
and a.category = c.category
and a.tier =  c.tier
--where a.title_name like '%Snyder%'
);


with title_totals as (
select
      title_id
    , title_name
    , season_number
    , content_category
    , category
    , tier
    , sum(predicted_hours_viewed) as total_hours_viewed
    , sum(predicted_first_views) as total_first_views
from max_dev.workspace.psi_past_current_daily_viewership
--where tier = 1 and category = 'Scripted Drama Series'
group by 1,2,3,4,5,6
)
, tier_x_catg_meds as (
select
      category
    , round(tier,0)::varchar as tier
    , median(total_hours_viewed) as med_hours_viewed
    , median(total_first_views) as med_first_views
from title_totals
group by 1,2
order by 1,2
)
--, median_decay_values as (
select
      a.category
    , tier
    , days_since_premiere
    , round(med_hours_viewed * med_hours_viewed_pct,4) as predicted_hours_viewed
    , round(med_first_views * med_first_views_pct,0) as predicted_first_views
from max_dev.workspace.psi_median_decay a
join (
    select * from tier_x_catg_meds
    union
    select
          category
        , '0' as tier
        , med_hours_viewed*7
        , med_first_views*7
    from tier_x_catg_meds
    where tier = '1'
) b
on a.category = b.category
order by 1,2,3
;
'''



## Ask about tier 0 logic 
cursor_list = ctx.execute_string(
    querystr
    )
df = pd.DataFrame.from_records(cursor_list[-1].fetchall(), columns=[x[0] for x in cursor_list[-1].description])
df.columns= df.columns.str.lower()
print(df.head())
df.to_csv('s3://datascience-hbo-users/users/tjung/psi/fv_pred_decay_0601.csv')

      category tier  days_since_premiere predicted_hours_viewed  \
0  Docu-Series    2                    0            112799.5642   
1  Docu-Series    2                    1            169267.1707   
2  Docu-Series    2                    2            197926.3640   
3  Docu-Series    2                    3            106013.4774   
4  Docu-Series    2                    4            119058.1614   

   predicted_first_views  
0                   1602  
1                   2564  
2                   2387  
3                   1459  
4                   1305  


## Meta data for future contents

In [8]:
querystr = '''
select * from max_prod.catalog.asset_dim a
join max_prod.catalog.reporting_asset_offering_dim raod
on a.viewable_id = raod.viewable_id
full outer join
(select title, premiere_date from max_prod.content_analytics.daily_future_programming_schedule group by title, tier, season, category, premiere_date) fp
on a.asset_title_short = fp.title
where brand = 'HBO MAX'
and territory = 'HBO MAX DOMESTIC'
and channel = 'HBO MAX SUBSCRIPTION'
;
'''

cursor_list = ctx.execute_string(
    querystr
    )
df = pd.DataFrame.from_records(cursor_list[-1].fetchall(), columns=[x[0] for x in cursor_list[-1].description])
df.columns= df.columns.str.lower()
display(df.head())
df.to_csv('s3://datascience-hbo-users/users/tjung/psi/future_program_metadata.csv')

              asset_id                                              wm_id  \
0  6880571200656194905  urn:warnermedia:wmid:product:714a9d96bfcb15d26...   
1  7549631505866500288  urn:warnermedia:wmid:product:df59373b53ac93d41...   
2  1248129206130563769  urn:warnermedia:wmid:product:9220c96177fc3ee5e...   
3  4042642318876503435  urn:warnermedia:wmid:product:4b7a3c60d37bc6ae5...   
4  3157350528423863620  urn:warnermedia:wmid:product:6aba20d3edc5d79e5...   

             viewable_id source_asset_id              series_id  \
0  GXodE-Q8aYbDCYwEAAAQT        PROD6988                   None   
1  GYUjdLgBiJp5otAEAAAAJ         2239504                   None   
2  GYNtG7w4K2hOXUQEAAAEH      PROD820381                   None   
3  GX5td4Q6-mbrCwgEAAAET          849806  GX5tdIgP5ILrCwgEAAADH   
4  GYBMTGAR7eoYUkAEAAAAa         2242223  GYBMTBAxniqsjwwEAAAAR   

               season_id franchise_id catalog_geo_id     imdb_id  \
0                   None         None           None   tt0087182  

## Full query

In [57]:
print(1)

1


In [62]:
print(1)

1


In [61]:
querystr='''
-- s0
set val_date = to_date(convert_timezone('America/Los_Angeles','2021-07-01'));
-- s1 Step 4: Gather past metrics and create the basic heuristic forecast, plus median metrics tables
create or replace table max_dev.workspace.psi_past_base_temp as (
with assets as (
select distinct
      a.title_id
    , coalesce(a.season_number,0) as season_number
    , a.viewable_id
    , title_name
    , first_offered_date::date as asset_max_premiere
    , end_utc_max::date as asset_max_end_dt
    , coalesce(raod.season_first_offered_date::date,raod.title_first_offered_date::date) as season_premiere
    , asset_run_time
    , a.content_category
    , episode_number_in_season
    , content_source
    , program_type
    , category
    , tier
    , viewership_start_date as effective_start_date
    , viewership_end_date as effective_end_date
from max_prod.catalog.reporting_asset_dim a
join max_prod.catalog.reporting_asset_offering_dim raod
on a.viewable_id = raod.viewable_id
and brand = 'HBO MAX'
and territory = 'HBO MAX DOMESTIC'
and channel = 'HBO MAX SUBSCRIPTION'
inner join max_prod.content_analytics.psi_past_title_metadata b
on a.title_id = b.viewership_title_id
and coalesce(a.season_number,0) = coalesce(b.viewership_season_number,0)
where 1 = 1
and asset_type IN ('FEATURE','ELEMENT')
and start_utc_max is not null
and a.content_category in ('movies','series','special')
and coalesce(raod.season_first_offered_date,raod.title_first_offered_date)  >= '2020-05-27 07:01:00.000'
order by season_premiere, title_name-- desc
)
, hv as (
    select
          b.title_id
        , b.title_name
        , b.season_number
        , b.content_category
        , b.category
        , tier
        , request_time_gmt::date as request_date
        , coalesce(round(sum(stream_elapsed_play_seconds)/3600,3), 0) as hours_viewed
    from max_prod.viewership.max_user_stream_heartbeat a
    inner join assets b
        on a.viewable_id = b.viewable_id
    where 1 = 1
    and stream_elapsed_play_seconds >= 120
    and request_time_gmt > '2020-05-27 07:00:00'
    and request_time_gmt::date between asset_max_premiere and asset_max_end_dt
    and request_time_gmt::date between effective_start_date and effective_end_date
    and request_time_gmt::date < dateadd('days',-1,$val_date)
    group by 1,2,3,4,5,6,7
)
, fv as (
    select
          b.title_id
        , b.title_name
        , b.season_number
        , b.content_category
        , b.category
        , tier
        , request_time_gmt::date as request_date
        , count(distinct concat(hbo_uuid, subscription_id)) as first_views
    from MAX_PROD.BI_ANALYTICS.SUBSCRIPTION_FIRST_CONTENT_WATCHED a
    inner join assets b
        on a.viewable_id = b.viewable_id
        --and request_time_gmt::date between season_premiere_date and dateadd('day',90,season_premiere_date)
        --and season_premiere_date >= '2020-05-27 07:00:01'
    where 1 = 1
    and request_time_gmt::date between asset_max_premiere and asset_max_end_dt
    and request_time_gmt::date between effective_start_date and effective_end_date
    and request_time_gmt::date < dateadd('days',-1,$val_date)
    and country_iso_code in ('US','PR','GU')
    group by 1,2,3,4,5,6,7
    --order by 2,4
)
, dates as (
    select distinct
          rs.title_id
        , rs.title_name
        , rs.season_number
        , rs.content_category
        , rs.content_source
        , rs.program_type
        , rs.category
        , rs.tier
        --, rs.season_premiere
        , rs.effective_start_date
        , request_date
        , case when request_date::date = effective_start_date::date then 1 else 0 end as premiere_ind
        , count(distinct case when request_date::date = asset_max_premiere::date then viewable_id else null end) as asset_premiere_count
        , round(sum(distinct case when request_date::date = asset_max_premiere::date then asset_run_time else 0 end)/3600,3) as premiering_hours_runtime
    from assets rs
    cross join (
        select distinct seq_date as request_date 
        from max_prod.staging.date_range 
        where seq_date < '2024-12-31'::date
    ) rd
    where rd.request_date between 
    coalesce(rs.effective_start_date,rs.season_premiere,rs.asset_max_premiere) 
    and dateadd('days',90,coalesce(rs.effective_start_date,rs.season_premiere,rs.asset_max_premiere))
      and rd.request_date between effective_start_date and effective_end_date
    group by 1,2,3,4,5,6,7,8,9,10,11
    order by 2,3,8
)
    select dt.*
        , coalesce(first_views,0) as first_views
        , coalesce(hours_viewed,0) as hours_viewed
        , dt.request_date - effective_start_date as days_since_premiere
        , $val_date - effective_start_date -1 as days_on_platform
        , case when $val_date - effective_start_date - 1 >=
            case when dt.category = 'Popcorn' and year(effective_start_date) < 2022 then 90 else 90 end
        then 1 else 0 end as finished_window_flag
    from dates dt
    left join hv
    on dt.title_id = hv.title_id
    and dt.season_number = hv.season_number
    and dt.request_date = hv.request_date
    and dt.content_category = hv.content_category
    and dt.category = hv.category
    and dt.tier = hv.tier
    left join fv
    on dt.title_id = fv.title_id
    and dt.season_number = fv.season_number
    and dt.request_date = fv.request_date
    and dt.content_category = fv.content_category
    and dt.category = fv.category
    and dt.tier = fv.tier
    where 1 = 1
    --and dt.title_name like 'In Treatment'
    order by title_id, title_name, season_number, category, request_date
)
;

--s2
create or replace table max_dev.workspace.psi_median_decay as (
with title_totals as (
select
      title_id
    , title_name
    , season_number
    , content_category
    , category
    , tier
    , finished_window_flag
    , sum(first_views) as total_first_views
    , sum(hours_viewed) as total_hours_viewed
from max_dev.workspace.psi_past_base_temp
where effective_start_date <= $val_date
group by 1,2,3,4,5,6,7
)
, enriched_base as (
select
    base.*
    , div0(first_views,total_first_views) as first_views_pct
    , div0(hours_viewed,total_hours_viewed) as hours_viewed_pct
from max_dev.workspace.psi_past_base_temp as base
left join title_totals tt
on base.title_id = tt.title_id
and base.season_number = tt.season_number
and base.content_category = tt.content_category
and base.category = tt.category
and base.tier = tt.tier
and tt.finished_window_flag = 1
where 1 = 1
and total_first_views >= 0
or base.finished_window_flag = 0
)
, median_decay_pre as (
select
      category
    , days_since_premiere
    , median(hours_viewed_pct) as med_hours_viewed_pct
    , median(first_views_pct) as med_first_views_pct
from enriched_base
where finished_window_flag = 1
group by 1,2
order by 1,2
)
, median_decay_modifier as (
select
      category
    , sum(med_hours_viewed_pct) as med_hv_mod
    , sum(med_first_views_pct) as med_fv_mod
from median_decay_pre
group by 1
)
select
      a.category
    , days_since_premiere
    , med_hours_viewed_pct/med_hv_mod as med_hours_viewed_pct
    , med_first_views_pct/med_fv_mod as med_first_views_pct
from median_decay_pre a
join median_decay_modifier b
on a.category = b.category
)
;

--s3
create or replace temporary table max_dev.workspace.psi_past_current_daily_viewership as (
with current_running_assets as (
select
      title_id
    , title_name
    , season_number
    , content_category
    , category
    , tier
    , effective_start_date
    , max(days_since_premiere) days_so_far
    , sum(hours_viewed) as hv_so_far
    , sum(first_views) as fv_so_far
from max_dev.workspace.psi_past_base_temp
where 1 = 1
and request_date < dateadd('days',-1, $val_date::date)
and effective_start_date < dateadd('days',-4,$val_date::date)
and finished_window_flag = 0
group by 1,2,3,4,5,6,7
)
, current_running_assets_enriched as (
select
      title_id
    , title_name
    , season_number
    , content_category
    , a.category
    , a.tier
    , effective_start_date
    , days_so_far
    , hv_so_far
    , sum(med_hours_viewed_pct) hv_pct_so_far
    , fv_so_far
    , sum(med_first_views_pct) fv_pct_so_far
from current_running_assets a
join max_dev.workspace.psi_median_decay b
on case when a.category = 'Popcorn' and year(effective_start_date) >= 2022 then 'Scripted Features'
    else a. category end = b.category
and days_since_premiere <= days_so_far
group by 1,2,3,4,5,6,7,8,9,11
)
, current_running_assets_predicted_totals as (
select
      title_id
    , title_name
    , season_number
    , content_category
    , category
    , tier
    , effective_start_date
    , hv_so_far/hv_pct_so_far as predicted_total_hours_viewed
    , fv_so_far/fv_pct_so_far as predicted_total_first_views
from current_running_assets_enriched
)
select
      a.*
    , case when request_date < dateadd('days',-1,$val_date::date) then first_views
    else round(b.med_first_views_pct * c.predicted_total_first_views,0) end as predicted_first_views
    , case when request_date < dateadd('days',-1,$val_date::date) then hours_viewed
    else round(b.med_hours_viewed_pct * c.predicted_total_hours_viewed,3) end as predicted_hours_viewed
from max_dev.workspace.psi_past_base_temp a
left join max_dev.workspace.psi_median_decay b
on case when a.category = 'Popcorn' and year(effective_start_date) >= 2022 then 'Scripted Features'
    else a. category end = b.category
and a.days_since_premiere = b.days_since_premiere
left join current_running_assets_predicted_totals c
on a.title_id = c.title_id
and a.season_number = c.season_number
and a.content_category = c.content_category
and a.category = c.category
and a.tier =  c.tier)
;
create or replace table max_dev.workspace.psi_past_current_inferred_decay_values_ as (
with title_totals as (
select
      title_id
    , title_name
    , season_number
    , content_category
    , category
    , tier
    , sum(predicted_hours_viewed) as total_hours_viewed
    , sum(predicted_first_views) as total_first_views
from max_dev.workspace.psi_past_current_daily_viewership
--where tier = 1 and category = 'Scripted Drama Series'
group by 1,2,3,4,5,6
)
, tier_x_catg_meds as (
select
      category
    , round(tier,0)::varchar as tier
    , median(total_hours_viewed) as med_hours_viewed
    , median(total_first_views) as med_first_views
from title_totals
group by 1,2
order by 1,2
)
select
      a.category
    , tier
    , days_since_premiere
    , round(med_hours_viewed * med_hours_viewed_pct,4) as predicted_hours_viewed
    , round(med_first_views * med_first_views_pct,0) as predicted_first_views
from max_dev.workspace.psi_median_decay a
join (
    select * from tier_x_catg_meds
    union
    select
          category
        , '0' as tier
        , med_hours_viewed*7
        , med_first_views*7
    from tier_x_catg_meds
    where tier = '1'
) b
on a.category = b.category
order by 1,2,3
)
;

--- merge tier-category-days_post_premiere prediction (psi_past_current_inferred_decay_values) 
--- to future programming schedule 
select
      null as title_id
    , a.title as title_name
    , a.season as season_number
    , null as content_category
    , source as content_source
    , null as program_type
    , initcap(a.category) as category
    , a.tier
    --, a.season_premiere
    , a.premiere_date
    , a.seq_date
    , num_premiering_titles
    , num_episodes_released
    , num_hours_released
    , 0 as first_views
    , 0 as hours_viewed
    , seq_date::date - a.premiere_date as says_since_premiere
    , $val_date::date - 1 - a.premiere_date as days_on_platform
    , 0 as finished_window_flag
    , b.predicted_first_views
    , b.predicted_hours_viewed
    , schedule_label
from max_prod.content_analytics.daily_future_programming_schedule a
left join max_dev.workspace.psi_past_current_inferred_decay_values_ b
on case when initcap(a.category) = 'Popcorn' and year(a.premiere_date) >= 2022 then 'Scripted Features'
    else initcap(a. category) end = initcap(b.category)
and seq_date - a.premiere_date = b.days_since_premiere
and a.tier::varchar = b.tier::varchar
where 1 = 1
and first_window_flag = 1
and finished_window_flag = 0
and premiere_date >= dateadd('days',-3,$val_date::date)
and premiere_date < dateadd('days',31,$val_date::date)
and concat(title,season) not in (
    select distinct concat(psi_title,a.season_number)
    from max_dev.workspace.psi_past_current_daily_viewership a
    join max_prod.content_analytics.psi_past_title_metadata b
    on initcap(a.title_name) = initcap(b.viewership_title)
    and coalesce(a.season_number,0) = coalesce(b.viewership_season_number,0)
    where premiere_ind = 1
    and a.effective_start_date < dateadd('days',-3, $val_date::date)
    )
;
'''


## Ask about tier 0 logic 
cursor_list = ctx.execute_string(
    querystr
    )
df = pd.DataFrame.from_records(cursor_list[-1].fetchall(), columns=[x[0] for x in cursor_list[-1].description])
df.columns= df.columns.str.lower()
print(df.head())
df.to_csv('s3://datascience-hbo-users/users/tjung/psi/fv_pred_decay_0701.csv')

  title_id                   title_name  season_number content_category  \
0     None             Through Our Eyes              1             None   
1     None                       Freaky              0             None   
2     None  Entre Nos: Frankie Quinones              0             None   
3     None                  Gossip Girl              1             None   
4     None               No Sudden Move              0             None   

  content_source program_type               category tier premiere_date  \
0           None         None            Docu-Series    3    2021-07-22   
1           None         None                   Pay1    3    2021-07-15   
2           None         None          International    3    2021-07-09   
3           None         None  Scripted Drama Series    1    2021-07-08   
4           None         None      Scripted Features    2    2021-07-15   

     seq_date  num_premiering_titles  num_episodes_released  \
0  2021-09-24                      

In [60]:
querystr='''
-- s0
set val_date = to_date(convert_timezone('America/Los_Angeles','2021-08-01'));
-- s1 Step 4: Gather past metrics and create the basic heuristic forecast, plus median metrics tables
create or replace table max_dev.workspace.psi_past_base_temp as (
with assets as (
select distinct
      a.title_id
    , coalesce(a.season_number,0) as season_number
    , a.viewable_id
    , title_name
    , first_offered_date::date as asset_max_premiere
    , end_utc_max::date as asset_max_end_dt
    , coalesce(raod.season_first_offered_date::date,raod.title_first_offered_date::date) as season_premiere
    , asset_run_time
    , a.content_category
    , episode_number_in_season
    , content_source
    , program_type
    , category
    , tier
    , viewership_start_date as effective_start_date
    , viewership_end_date as effective_end_date
from max_prod.catalog.reporting_asset_dim a
join max_prod.catalog.reporting_asset_offering_dim raod
on a.viewable_id = raod.viewable_id
and brand = 'HBO MAX'
and territory = 'HBO MAX DOMESTIC'
and channel = 'HBO MAX SUBSCRIPTION'
inner join max_prod.content_analytics.psi_past_title_metadata b
on a.title_id = b.viewership_title_id
and coalesce(a.season_number,0) = coalesce(b.viewership_season_number,0)
where 1 = 1
and asset_type IN ('FEATURE','ELEMENT')
and start_utc_max is not null
and a.content_category in ('movies','series','special')
and coalesce(raod.season_first_offered_date,raod.title_first_offered_date)  >= '2020-05-27 07:01:00.000'
order by season_premiere, title_name-- desc
)
, hv as (
    select
          b.title_id
        , b.title_name
        , b.season_number
        , b.content_category
        , b.category
        , tier
        , request_time_gmt::date as request_date
        , coalesce(round(sum(stream_elapsed_play_seconds)/3600,3), 0) as hours_viewed
    from max_prod.viewership.max_user_stream_heartbeat a
    inner join assets b
        on a.viewable_id = b.viewable_id
    where 1 = 1
    and stream_elapsed_play_seconds >= 120
    and request_time_gmt > '2020-05-27 07:00:00'
    and request_time_gmt::date between asset_max_premiere and asset_max_end_dt
    and request_time_gmt::date between effective_start_date and effective_end_date
    and request_time_gmt::date < dateadd('days',-1,$val_date)
    group by 1,2,3,4,5,6,7
)
, fv as (
    select
          b.title_id
        , b.title_name
        , b.season_number
        , b.content_category
        , b.category
        , tier
        , request_time_gmt::date as request_date
        , count(distinct concat(hbo_uuid, subscription_id)) as first_views
    from MAX_PROD.BI_ANALYTICS.SUBSCRIPTION_FIRST_CONTENT_WATCHED a
    inner join assets b
        on a.viewable_id = b.viewable_id
        --and request_time_gmt::date between season_premiere_date and dateadd('day',90,season_premiere_date)
        --and season_premiere_date >= '2020-05-27 07:00:01'
    where 1 = 1
    and request_time_gmt::date between asset_max_premiere and asset_max_end_dt
    and request_time_gmt::date between effective_start_date and effective_end_date
    and request_time_gmt::date < dateadd('days',-1,$val_date)
    and country_iso_code in ('US','PR','GU')
    group by 1,2,3,4,5,6,7
    --order by 2,4
)
, dates as (
    select distinct
          rs.title_id
        , rs.title_name
        , rs.season_number
        , rs.content_category
        , rs.content_source
        , rs.program_type
        , rs.category
        , rs.tier
        --, rs.season_premiere
        , rs.effective_start_date
        , request_date
        , case when request_date::date = effective_start_date::date then 1 else 0 end as premiere_ind
        , count(distinct case when request_date::date = asset_max_premiere::date then viewable_id else null end) as asset_premiere_count
        , round(sum(distinct case when request_date::date = asset_max_premiere::date then asset_run_time else 0 end)/3600,3) as premiering_hours_runtime
    from assets rs
    cross join (
        select distinct seq_date as request_date 
        from max_prod.staging.date_range 
        where seq_date < '2024-12-31'::date
    ) rd
    where rd.request_date between 
    coalesce(rs.effective_start_date,rs.season_premiere,rs.asset_max_premiere) 
    and dateadd('days',90,coalesce(rs.effective_start_date,rs.season_premiere,rs.asset_max_premiere))
      and rd.request_date between effective_start_date and effective_end_date
    group by 1,2,3,4,5,6,7,8,9,10,11
    order by 2,3,8
)
    select dt.*
        , coalesce(first_views,0) as first_views
        , coalesce(hours_viewed,0) as hours_viewed
        , dt.request_date - effective_start_date as days_since_premiere
        , $val_date - effective_start_date -1 as days_on_platform
        , case when $val_date - effective_start_date - 1 >=
            case when dt.category = 'Popcorn' and year(effective_start_date) < 2022 then 90 else 90 end
        then 1 else 0 end as finished_window_flag
    from dates dt
    left join hv
    on dt.title_id = hv.title_id
    and dt.season_number = hv.season_number
    and dt.request_date = hv.request_date
    and dt.content_category = hv.content_category
    and dt.category = hv.category
    and dt.tier = hv.tier
    left join fv
    on dt.title_id = fv.title_id
    and dt.season_number = fv.season_number
    and dt.request_date = fv.request_date
    and dt.content_category = fv.content_category
    and dt.category = fv.category
    and dt.tier = fv.tier
    where 1 = 1
    --and dt.title_name like 'In Treatment'
    order by title_id, title_name, season_number, category, request_date
)
;

--s2
create or replace table max_dev.workspace.psi_median_decay as (
with title_totals as (
select
      title_id
    , title_name
    , season_number
    , content_category
    , category
    , tier
    , finished_window_flag
    , sum(first_views) as total_first_views
    , sum(hours_viewed) as total_hours_viewed
from max_dev.workspace.psi_past_base_temp
where effective_start_date <= $val_date
group by 1,2,3,4,5,6,7
)
, enriched_base as (
select
    base.*
    , div0(first_views,total_first_views) as first_views_pct
    , div0(hours_viewed,total_hours_viewed) as hours_viewed_pct
from max_dev.workspace.psi_past_base_temp as base
left join title_totals tt
on base.title_id = tt.title_id
and base.season_number = tt.season_number
and base.content_category = tt.content_category
and base.category = tt.category
and base.tier = tt.tier
and tt.finished_window_flag = 1
where 1 = 1
and total_first_views >= 0
or base.finished_window_flag = 0
)
, median_decay_pre as (
select
      category
    , days_since_premiere
    , median(hours_viewed_pct) as med_hours_viewed_pct
    , median(first_views_pct) as med_first_views_pct
from enriched_base
where finished_window_flag = 1
group by 1,2
order by 1,2
)
, median_decay_modifier as (
select
      category
    , sum(med_hours_viewed_pct) as med_hv_mod
    , sum(med_first_views_pct) as med_fv_mod
from median_decay_pre
group by 1
)
select
      a.category
    , days_since_premiere
    , med_hours_viewed_pct/med_hv_mod as med_hours_viewed_pct
    , med_first_views_pct/med_fv_mod as med_first_views_pct
from median_decay_pre a
join median_decay_modifier b
on a.category = b.category
)
;

--s3
create or replace temporary table max_dev.workspace.psi_past_current_daily_viewership as (
with current_running_assets as (
select
      title_id
    , title_name
    , season_number
    , content_category
    , category
    , tier
    , effective_start_date
    , max(days_since_premiere) days_so_far
    , sum(hours_viewed) as hv_so_far
    , sum(first_views) as fv_so_far
from max_dev.workspace.psi_past_base_temp
where 1 = 1
and request_date < dateadd('days',-1, $val_date::date)
and effective_start_date < dateadd('days',-4,$val_date::date)
and finished_window_flag = 0
group by 1,2,3,4,5,6,7
)
, current_running_assets_enriched as (
select
      title_id
    , title_name
    , season_number
    , content_category
    , a.category
    , a.tier
    , effective_start_date
    , days_so_far
    , hv_so_far
    , sum(med_hours_viewed_pct) hv_pct_so_far
    , fv_so_far
    , sum(med_first_views_pct) fv_pct_so_far
from current_running_assets a
join max_dev.workspace.psi_median_decay b
on case when a.category = 'Popcorn' and year(effective_start_date) >= 2022 then 'Scripted Features'
    else a. category end = b.category
and days_since_premiere <= days_so_far
group by 1,2,3,4,5,6,7,8,9,11
)
, current_running_assets_predicted_totals as (
select
      title_id
    , title_name
    , season_number
    , content_category
    , category
    , tier
    , effective_start_date
    , hv_so_far/hv_pct_so_far as predicted_total_hours_viewed
    , fv_so_far/fv_pct_so_far as predicted_total_first_views
from current_running_assets_enriched
)
select
      a.*
    , case when request_date < dateadd('days',-1,$val_date::date) then first_views
    else round(b.med_first_views_pct * c.predicted_total_first_views,0) end as predicted_first_views
    , case when request_date < dateadd('days',-1,$val_date::date) then hours_viewed
    else round(b.med_hours_viewed_pct * c.predicted_total_hours_viewed,3) end as predicted_hours_viewed
from max_dev.workspace.psi_past_base_temp a
left join max_dev.workspace.psi_median_decay b
on case when a.category = 'Popcorn' and year(effective_start_date) >= 2022 then 'Scripted Features'
    else a. category end = b.category
and a.days_since_premiere = b.days_since_premiere
left join current_running_assets_predicted_totals c
on a.title_id = c.title_id
and a.season_number = c.season_number
and a.content_category = c.content_category
and a.category = c.category
and a.tier =  c.tier)
;
create or replace table max_dev.workspace.psi_past_current_inferred_decay_values_ as (
with title_totals as (
select
      title_id
    , title_name
    , season_number
    , content_category
    , category
    , tier
    , sum(predicted_hours_viewed) as total_hours_viewed
    , sum(predicted_first_views) as total_first_views
from max_dev.workspace.psi_past_current_daily_viewership
--where tier = 1 and category = 'Scripted Drama Series'
group by 1,2,3,4,5,6
)
, tier_x_catg_meds as (
select
      category
    , round(tier,0)::varchar as tier
    , median(total_hours_viewed) as med_hours_viewed
    , median(total_first_views) as med_first_views
from title_totals
group by 1,2
order by 1,2
)
select
      a.category
    , tier
    , days_since_premiere
    , round(med_hours_viewed * med_hours_viewed_pct,4) as predicted_hours_viewed
    , round(med_first_views * med_first_views_pct,0) as predicted_first_views
from max_dev.workspace.psi_median_decay a
join (
    select * from tier_x_catg_meds
    union
    select
          category
        , '0' as tier
        , med_hours_viewed*7
        , med_first_views*7
    from tier_x_catg_meds
    where tier = '1'
) b
on a.category = b.category
order by 1,2,3
)
;

--- merge tier-category-days_post_premiere prediction (psi_past_current_inferred_decay_values) 
--- to future programming schedule 
select
      null as title_id
    , a.title as title_name
    , a.season as season_number
    , null as content_category
    , source as content_source
    , null as program_type
    , initcap(a.category) as category
    , a.tier
    --, a.season_premiere
    , a.premiere_date
    , a.seq_date
    , num_premiering_titles
    , num_episodes_released
    , num_hours_released
    , 0 as first_views
    , 0 as hours_viewed
    , seq_date::date - a.premiere_date as says_since_premiere
    , $val_date::date - 1 - a.premiere_date as days_on_platform
    , 0 as finished_window_flag
    , b.predicted_first_views
    , b.predicted_hours_viewed
    , schedule_label
from max_prod.content_analytics.daily_future_programming_schedule a
left join max_dev.workspace.psi_past_current_inferred_decay_values_ b
on case when initcap(a.category) = 'Popcorn' and year(a.premiere_date) >= 2022 then 'Scripted Features'
    else initcap(a. category) end = initcap(b.category)
and seq_date - a.premiere_date = b.days_since_premiere
and a.tier::varchar = b.tier::varchar
where 1 = 1
and first_window_flag = 1
and finished_window_flag = 0
and premiere_date >= dateadd('days',-3,$val_date::date)
and premiere_date < dateadd('days',31,$val_date::date)
and concat(title,season) not in (
    select distinct concat(psi_title,a.season_number)
    from max_dev.workspace.psi_past_current_daily_viewership a
    join max_prod.content_analytics.psi_past_title_metadata b
    on initcap(a.title_name) = initcap(b.viewership_title)
    and coalesce(a.season_number,0) = coalesce(b.viewership_season_number,0)
    where premiere_ind = 1
    and a.effective_start_date < dateadd('days',-3, $val_date::date)
    )
;
'''


## Ask about tier 0 logic 
cursor_list = ctx.execute_string(
    querystr
    )
df = pd.DataFrame.from_records(cursor_list[-1].fetchall(), columns=[x[0] for x in cursor_list[-1].description])
df.columns= df.columns.str.lower()
print(df.head())
df.to_csv('s3://datascience-hbo-users/users/tjung/psi/fv_pred_decay_0801.csv')

  title_id     title_name  season_number content_category content_source  \
0     None       Laetitia              0             None           None   
1     None  The Other Two              2             None           None   
2     None  The Other Two              2             None           None   
3     None       Laetitia              0             None           None   
4     None  The Other Two              2             None           None   

  program_type                category tier premiere_date    seq_date  \
0         None           International    3    2021-08-30  2021-11-21   
1         None  Scripted Comedy Series    2    2021-08-26  2021-11-21   
2         None  Scripted Comedy Series    2    2021-08-26  2021-11-22   
3         None           International    3    2021-08-30  2021-11-22   
4         None  Scripted Comedy Series    2    2021-08-26  2021-11-23   

   num_premiering_titles  num_episodes_released  num_hours_released  \
0                      0         