In [3]:
##nodejs:  https://docs.aws.amazon.com/sdk-for-javascript/v2/developer-guide/setting-up-node-on-ec2-instance.html

# !pip install "jupyterlab>=3" "ipywidgets>=7.6"
# !pip install jupyter-dash
# !jupyter lab build


# !pip install snowflake --user
# !pip install snowflake-connector-python --user
# !pip install category_encoders
# !pip install xgboost
# !pip install lightgbm --user
import os
import sys
path=!pwd
sys.path.append(os.path.join(path[0], '..'))
from utils import *
import snowflake.connector
from datetime import timedelta

from category_encoders import OneHotEncoder
import xgboost as xgb
import lightgbm as lgbm
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error as MSE
from sklearn.metrics import mean_absolute_percentage_error as MAPE
from sklearn.metrics import mean_absolute_error as MAE
from sklearn.metrics import r2_score as r2_score
import sklearn.model_selection

class SnowflakeConnector(BaseConnector):
    def __init__(self, credentials: Credentials):
        keys = credentials.get_keys()
        self._secrets = json.loads(keys.get('SecretString', "{}"))

    def connect(self, dbname: str, schema: str = 'DEFAULT'):
        ctx = snowflake.connector.connect(
            user=self._secrets['login_name'],
            password=self._secrets['login_password'],
            account=self._secrets['account'],
            warehouse=self._secrets['warehouse'],
            database=dbname,
            schema=schema
        )

        return ctx
    
## Credentials
SF_CREDS = 'datascience-max-dev-sagemaker-notebooks'

## Snowflake connection 
conn=SnowflakeConnector(SSMPSCredentials(SF_CREDS))
ctx=conn.connect("MAX_PROD","DATASCIENCE_STAGE")
cur = ctx.cursor()

## Manually entered imdb id

In [87]:
class SnowflakeConnector(BaseConnector):
    def __init__(self, credentials: Credentials):
        keys = credentials.get_keys()
        self._secrets = json.loads(keys.get('SecretString', "{}"))

    def connect(self, dbname: str, schema: str = 'DEFAULT'):
        ctx = snowflake.connector.connect(
            user=self._secrets['login_name'],
            password=self._secrets['login_password'],
            account=self._secrets['account'],
            warehouse=self._secrets['warehouse'],
            database=dbname,
            schema=schema
        )

        return ctx


def run_query(query, dbname, schema):
    SF_CREDS = 'datascience-max-dev-sagemaker-notebooks'

    conn=SnowflakeConnector(SSMPSCredentials(SF_CREDS))
    ctx=conn.connect(dbname,schema)
    cursor = ctx.cursor()
    cursor.execute(query)
    df = pd.DataFrame(cursor.fetchall(), columns = [desc[0] for desc in cursor.description])
    df.columns= df.columns.str.lower()
    return df


def cvdf_to_snowflake(df, table_name):
    stage = '@HBO_OUTBOUND_DATASCIENCE_CONTENT_DEV'
    output_bucket = "hbo-outbound-datascience-content-dev"
    filename ='psi/' + table_name + '.csv'
    dbname, schema = 'MAX_DEV', 'WORKSPACE'
    
    csv_buffer = io.StringIO()
    df.to_csv(csv_buffer, index = False)
    content = csv_buffer.getvalue()
    client = boto3.client('s3')
    client.put_object(Bucket=output_bucket, Key=filename, Body=content)

    print ('Create Table: ' + table_name)
    run_query('''
    create or replace table {table_name}(
    title_name varchar,
    tier int,
    season_number int, 
    category varchar,
    effective_start_date varchar,
    imdb_title_name varchar,
    imdb_title_id varchar,
    content_category varchar
    )
    '''.format(table_name = table_name), dbname, schema)

    print ('Begin Uploading')
    run_query('''
    insert into max_dev.workspace.{table_name}

    select 
          $1, $2, $3, $4, $5, $6, $7, $8
    from {stage}/psi/{file_name}

     (FILE_FORMAT => csv_v2)

    '''.format(stage = stage, table_name = table_name,
              file_name = table_name+'.csv')
            , dbname, schema)

    print ('Finish Uploading')    



In [93]:
## Upload manually entered data to snowflake 
import io

df_fp = pd.read_csv('s3://datascience-hbo-users/users/tjung/psi/future_program_imdb_id_full.csv')
df_fp = df_fp.rename(columns={'premiere_date':'effective_start_date',
                             'title_name_imdb':'imdb_title_name',
                             'imdb_id':'imdb_title_id',
                             'program_type':'content_category'})
df_fp.loc[df_fp.content_category=='movie','content_category'] = 'movies'
df_fp = df_fp[['title_name', 'tier', 'season_number', 'category',
       'effective_start_date', 'imdb_title_name','imdb_title_id', 'content_category']]

cvdf_to_snowflake(df_fp, 'future_title_imdb_map')

for i in [1,2,3]:
    df_grp = df_fp[df_fp.tier==i].groupby('effective_start_date').apply(my_agg)
    df_grp['imdb_availability'] = df_grp['imdb_id_count']/df_grp['count']
    print(i)
    display(df_grp)

Create Table: future_title_imdb_map
Begin Uploading
Finish Uploading


KeyError: 'premiere_month'

In [95]:
querystr='''
select distinct
    ft.title_name
    , ft.imdb_title_id as imdb_imdb_series_id
    , ft.season_number
    , ft.tier
    , ft.category
    , ft.content_category
    , ft.effective_start_date
    , it.original_title as imdb_title_name
    , imc.reference_type
    , itr.original_title as reference_title
    , itr.title_id as reference_title_id
    , itr.title_type as reference_title_type
    , imcr.reference_type as reference_reference_type
    , itrr.title_id as reference_reference_title_id
from max_dev.workspace.future_title_imdb_map ft
left join enterprise_data.catalog.imdb_title it 
    on ft.imdb_title_id = it.title_id
left join enterprise_data.catalog.imdb_movie_connection imc 
    on it.title_id = imc.title_id
left join enterprise_data.catalog.imdb_title itr 
    on itr.title_id = imc.reference_title_id
left join enterprise_data.catalog.imdb_movie_connection imcr
    on itr.title_id = imcr.title_id
    and imcr.reference_type in ('featured_in')
left join enterprise_data.catalog.imdb_title itrr 
    on itrr.title_id = imcr.reference_title_id
where 1 = 1
  and imc.reference_type in ('follows','spin_off_from','remake_of','version_of','featured_in')
order by effective_start_date, title_name
;
'''

cursor_list = ctx.execute_string(
    querystr
    )
df_future = pd.DataFrame.from_records(cursor_list[-1].fetchall(), columns=[x[0] for x in cursor_list[-1].description])
df_future.columns= df_future.columns.str.lower()
df_future

In [None]:
def my_agg(x):
    agg = {
        'count': x['title_name'].count(),
        'imdb_id_count': x[x['imdb_id']!=0]['imdb_id'].count()}
    return pd.Series(agg)

display(df_fp.head(2))
print(df_fp.shape)
print(df_fp.isnull().sum())
df_fp = df_fp.fillna(0)

## Data availability for future titles 
df_grp = df_fp.groupby('premiere_month').apply(my_agg)
df_grp['imdb_availability'] = df_grp['imdb_id_count']/df_grp['count']
display(df_grp)


## Future titles IMDB features

In [80]:
querystr='''
select 
it.original_title,
it.title_id,
it.title_type,
it.genres,
imc.reference_type,
itr.original_title as reference_title,
itr.title_id as reference_title_id,
itr.title_type as reference_title_type,
imcr.reference_type as reference_referece_type
from enterprise_data.catalog.imdb_title it 
left join enterprise_data.catalog.imdb_movie_connection imc 
    on it.title_id = imc.title_id
left join enterprise_data.catalog.imdb_title itr 
    on itr.title_id = imc.reference_title_id
left join enterprise_data.catalog.imdb_movie_connection imcr
    on itr.title_id = imcr.title_id
    and imcr.reference_type in ('featured_in', 'spoofed_in')
where imc.reference_type in ('follows','spin_off_from','remake_of', 'version_of', 'featured_in')
and it.title_id in ('tt0475784',
 'tt1160419',
 'tt11198330',
 'tt0350448',
 'tt11048090',
 'tt5460226',
 'tt13406094',
 'tt7671070',
 'tt11212198',
 'tt14695788',
 'tt8783930',
 'tt12879632',
 'tt10801534',
 'tt8633478',
 'tt12585076',
 'tt7211618',
 'tt11000902',
 'tt6718412',
 'tt8633478',
 'tt11212276',
 'tt11471892',
 'tt12564744',
 'tt0264235',
 'tt7721046',
 'tt1361336',
 'tt13819960',
 'tt10801368',
 'tt10838180',
 'tt10802170',
 'tt9170108',
 'tt13649314',
 'tt13345606',
 'tt11285856',
 'tt13075042',
 'tt11815682',
 'tt9272514',
 'tt3215824',
 'tt12759100',
 'tt14558054',
 'tt8005118',
 'tt5607976',
 'tt8634332',
 'tt9620288',
 'tt9698520',
 'tt8110232',
 'tt7278862',
 'tt8416494',
 'tt10574236',
 'tt7569576',
 'tt13819960',
 'tt0293429',
 'tt0063951',
 'tt5024912',
 'tt12762460',
 'tt10222764',
 'tt12564744',
 'tt11468254',
 'tt1924245',
 'tt14675328',
 'tt10234362',
 'tt8425532',
 'tt14128670',
 'tt1321510',
 'tt13263106',
 'tt14825858',
 'tt11057226',
 'tt10244600',
 'tt12286260',
 'tt14586350',
 'tt12682218',
 'tt12286260',
 'tt0063951',
 'tt7660850',
 'tt7808566',
 'tt10653784',
 'tt11847410',
 'tt3811906',
 'tt8697870',
 'tt8772296',
 'tt14126234',
 'tt13146488',
 'tt14192504',
 'tt0063951',
 'tt4406178',
 'tt14825858',
 'tt8416494',
 'tt2887954',
 'tt8310612',
 'tt3554046',
 'tt0063951',
 'tt10380768',
 'tt11212276',
 'tt11947418',
 'tt6334354',
 'tt0063951',
 'tt11540284',
 'tt5348176',
 'tt14406000',
 'tt1043813')
;
'''

cursor_list = ctx.execute_string(
    querystr
    )
df_future = pd.DataFrame.from_records(cursor_list[-1].fetchall(), columns=[x[0] for x in cursor_list[-1].description])
df_future.columns= df_future.columns.str.lower()
df_future.to_csv('s3://datascience-hbo-users/users/tjung/psi/future_program_imdb_id_features.csv')

In [92]:
df_future[df_future.original_title=='Succession']

Unnamed: 0,original_title,title_id,title_type,genres,reference_type,reference_title,reference_title_id,reference_title_type,reference_referece_type
12,Succession,tt7660850,tvSeries,"[""Drama""]",featured_in,The 2020 Primetime Creative Arts Emmy Awards,tt13135398,tvSpecial,
21,Succession,tt7660850,tvSeries,"[""Drama""]",featured_in,Top 10 Best TV Shows of 2019,tt11346024,tvEpisode,
776,Succession,tt7660850,tvSeries,"[""Drama""]",featured_in,The 72nd Primetime Emmy Awards,tt11640058,tvSpecial,featured_in
781,Succession,tt7660850,tvSeries,"[""Drama""]",featured_in,The 72nd Primetime Emmy Awards,tt11640058,tvSpecial,featured_in
786,Succession,tt7660850,tvSeries,"[""Drama""]",featured_in,The 72nd Primetime Emmy Awards,tt11640058,tvSpecial,featured_in
791,Succession,tt7660850,tvSeries,"[""Drama""]",featured_in,The 72nd Primetime Emmy Awards,tt11640058,tvSpecial,featured_in
796,Succession,tt7660850,tvSeries,"[""Drama""]",featured_in,The 72nd Primetime Emmy Awards,tt11640058,tvSpecial,featured_in
801,Succession,tt7660850,tvSeries,"[""Drama""]",featured_in,The 72nd Primetime Emmy Awards,tt11640058,tvSpecial,featured_in
812,Succession,tt7660850,tvSeries,"[""Drama""]",featured_in,Take 5 With Mireille Enos,tt10347732,tvEpisode,
931,Succession,tt7660850,tvSeries,"[""Drama""]",featured_in,Top 10 Summer TV Shows Not on Your Radar,tt9573182,tvEpisode,


In [34]:
### get imdb data for future titles.  
### features:  imdb_pg available; 
### identify if there're prequels:  reference_type = 'follows', 'remake_of','spin_off_from'
### identify popularity of prequels: 'featured_in'

df_future = pd.read_csv('s3://datascience-hbo-users/users/tjung/psi/future_program_imdb_id_features.csv')
df_future = df_future.fillna(0)
df_future = df_future.rename(columns={'original_title':'title_name_imdb','title_id':'imdb_id'})

grpby=['title_name_imdb','imdb_id','genres','title_type','reference_type']
df_ref = df_future.groupby(by=grpby).agg({'reference_title_id':'nunique'}).reset_index()
df_ref = df_ref.pivot(index=grpby[:-1], columns='reference_type', values='reference_title_id')\
            .reset_index()
df_ref = df_ref.rename(columns={'follows':'ref_follows',
                                'spin_off_from':'ref_spin_off_from','remake_of':'ref_remake_of',
                               'version_of':'ref_version_of', 'featured_in':'ref_featured_in'})


grpby=['title_name_imdb','imdb_id','genres','title_type','reference_referece_type']
df_future['imdb_title_id'] = df_future['imdb_id']

df_ref_ref = df_future.groupby(by=grpby).agg({'imdb_title_id':'count'}).reset_index()
df_ref_ref = df_ref_ref.pivot(index=grpby[:-1], columns='reference_referece_type', values='imdb_title_id')\
            .reset_index()
df_ref_ref = df_ref_ref.rename(columns={'featured_in':'ref_ref_featured_in','spoofed_in':'ref_ref_spoofed_in'})

col_ref = ['imdb_id', 'genres','ref_follows','ref_spin_off_from','ref_remake_of', 'ref_version_of', 'ref_featured_in']
col_ref_ref = ['imdb_id','ref_ref_featured_in','ref_ref_spoofed_in']

df_tot = df_fp.merge(df_ref[col_ref], how='left', on='imdb_id')\
                    .merge(df_ref_ref[col_ref_ref], how='left', on= 'imdb_id')
df_tot = df_tot.rename(columns={'imdb_id_x':'imdb_id', 'program_type':'content_category'})
# print(df_tot.shape)
# display(df_tot.isnull().sum())
# display(df_tot.describe())
# display(df_tot.head(2))
df_tot = df_tot.fillna(0)
df_tot_future = df_tot.copy()
# df_tot.to_csv('s3://datascience-hbo-users/users/tjung/psi/imdb_features_engineered_future_program.csv')
# df_tot[df_tot.title_name=='Succession']

## Future data 
df_imdb_future = df_tot_future.copy()
df_imdb_future['ref_tot'] = df_imdb_future[['ref_follows','ref_spin_off_from','ref_remake_of']].sum(axis=1)

display(df_imdb_future.groupby(by='content_category')[['season_number','ref_tot','ref_ref_featured_in']].describe(percentiles=percents))
display(df_imdb_future[df_imdb_future.category=='Popcorn'].groupby(by='content_category')[['season_number','ref_tot','ref_ref_featured_in']].describe(percentiles=percents))


Unnamed: 0.1,Unnamed: 0,title_name,tier,season_number,category,premiere_date,title_name_imdb,premiere_month,imdb_id,content_category,genres,ref_follows,ref_spin_off_from,ref_remake_of,ref_version_of,ref_featured_in,ref_ref_featured_in,ref_ref_spoofed_in
1,1,Westworld,1,4,Scripted Drama Series,2022-06-26,Westworld S4,2022-06-01,tt0475784,series,"[""Drama"",""Mystery"",""Sci-Fi""]",0.0,0.0,1.0,0.0,44.0,25.0,10.0
2,2,Tig Notaro: Drawn,3,0,Specials,2021-07-24,0,2021-07-01,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,3,Odo,3,3,Kids & Family,2022-04-07,0,2022-04-01,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,4,The First Year,3,0,Documentary Features,2022-07-05,0,2022-07-01,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,5,Odo,3,2,Kids & Family,2022-02-10,0,2022-02-01,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
904,904,Co-Pro Series #1,3,1,Kids & Family,2024-10-17,0,2024-10-01,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
905,905,The Sex Lives of College Girls,1,4,Scripted Comedy Series,2024-11-14,Sex Lives of College Girls,2024-11-01,tt11212276,series,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
906,906,Issa Rae's Binky,3,1,Kids & Family,2024-11-14,0,2024-11-01,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
907,907,Noel's Christmas (Movie),3,0,Kids & Family,2024-12-05,0,2024-12-01,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Unnamed: 0_level_0,season_number,season_number,season_number,season_number,season_number,season_number,season_number,season_number,season_number,season_number,season_number,season_number,season_number,season_number,season_number,ref_tot,ref_tot,ref_tot,ref_tot,ref_tot,ref_tot,ref_tot,ref_tot,ref_tot,ref_tot,ref_tot,ref_tot,ref_tot,ref_tot,ref_tot,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in
Unnamed: 0_level_1,count,mean,std,min,1%,5%,10%,25%,50%,75%,90%,95%,99%,100%,max,count,mean,std,min,1%,5%,10%,25%,50%,75%,90%,95%,99%,100%,max,count,mean,std,min,1%,5%,10%,25%,50%,75%,90%,95%,99%,100%,max
content_category,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2,Unnamed: 26_level_2,Unnamed: 27_level_2,Unnamed: 28_level_2,Unnamed: 29_level_2,Unnamed: 30_level_2,Unnamed: 31_level_2,Unnamed: 32_level_2,Unnamed: 33_level_2,Unnamed: 34_level_2,Unnamed: 35_level_2,Unnamed: 36_level_2,Unnamed: 37_level_2,Unnamed: 38_level_2,Unnamed: 39_level_2,Unnamed: 40_level_2,Unnamed: 41_level_2,Unnamed: 42_level_2,Unnamed: 43_level_2,Unnamed: 44_level_2,Unnamed: 45_level_2
0,694.0,1.108069,2.781846,0.0,0.0,0.0,0.0,0.0,0.0,1.0,3.0,4.0,16.07,30.0,30.0,694.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,694.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
movie,44.0,0.204545,0.631703,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,1.85,2.57,3.0,3.0,44.0,0.5,1.389328,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,6.28,8.0,8.0,44.0,41.25,88.838009,0.0,0.0,0.0,0.0,0.0,0.0,31.5,121.5,177.0,381.45,461.0,461.0
series,171.0,3.994152,9.096539,0.0,0.0,0.0,0.0,1.0,2.0,3.0,4.0,15.0,53.3,55.0,55.0,171.0,0.245614,0.582511,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,3.0,3.0,3.0,171.0,39.964912,122.074129,0.0,0.0,0.0,0.0,0.0,0.0,8.0,102.0,368.0,546.0,546.0,546.0


Unnamed: 0_level_0,season_number,season_number,season_number,season_number,season_number,season_number,season_number,season_number,season_number,season_number,season_number,season_number,season_number,season_number,season_number,ref_tot,ref_tot,ref_tot,ref_tot,ref_tot,ref_tot,ref_tot,ref_tot,ref_tot,ref_tot,ref_tot,ref_tot,ref_tot,ref_tot,ref_tot,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in
Unnamed: 0_level_1,count,mean,std,min,1%,5%,10%,25%,50%,75%,90%,95%,99%,100%,max,count,mean,std,min,1%,5%,10%,25%,50%,75%,90%,95%,99%,100%,max,count,mean,std,min,1%,5%,10%,25%,50%,75%,90%,95%,99%,100%,max
content_category,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2,Unnamed: 26_level_2,Unnamed: 27_level_2,Unnamed: 28_level_2,Unnamed: 29_level_2,Unnamed: 30_level_2,Unnamed: 31_level_2,Unnamed: 32_level_2,Unnamed: 33_level_2,Unnamed: 34_level_2,Unnamed: 35_level_2,Unnamed: 36_level_2,Unnamed: 37_level_2,Unnamed: 38_level_2,Unnamed: 39_level_2,Unnamed: 40_level_2,Unnamed: 41_level_2,Unnamed: 42_level_2,Unnamed: 43_level_2,Unnamed: 44_level_2,Unnamed: 45_level_2
0,31.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,31.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,31.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
movie,16.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,16.0,0.8125,2.007278,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.5,3.5,7.1,8.0,8.0,16.0,66.0625,119.255171,0.0,0.0,0.0,0.0,0.0,9.5,84.0,151.5,248.0,418.4,461.0,461.0


Unnamed: 0_level_0,season_number,season_number,season_number,season_number,season_number,season_number,season_number,season_number,season_number,season_number,season_number,season_number,season_number,season_number,season_number,ref_tot,ref_tot,ref_tot,ref_tot,ref_tot,ref_tot,ref_tot,ref_tot,ref_tot,ref_tot,ref_tot,ref_tot,ref_tot,ref_tot,ref_tot,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in
Unnamed: 0_level_1,count,mean,std,min,1%,5%,10%,25%,50%,75%,90%,95%,99%,100%,max,count,mean,std,min,1%,5%,10%,25%,50%,75%,90%,95%,99%,100%,max,count,mean,std,min,1%,5%,10%,25%,50%,75%,90%,95%,99%,100%,max
content_category,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2,Unnamed: 26_level_2,Unnamed: 27_level_2,Unnamed: 28_level_2,Unnamed: 29_level_2,Unnamed: 30_level_2,Unnamed: 31_level_2,Unnamed: 32_level_2,Unnamed: 33_level_2,Unnamed: 34_level_2,Unnamed: 35_level_2,Unnamed: 36_level_2,Unnamed: 37_level_2,Unnamed: 38_level_2,Unnamed: 39_level_2,Unnamed: 40_level_2,Unnamed: 41_level_2,Unnamed: 42_level_2,Unnamed: 43_level_2,Unnamed: 44_level_2,Unnamed: 45_level_2
0,663.0,1.159879,2.835642,0.0,0.0,0.0,0.0,0.0,0.0,1.0,3.0,4.0,16.38,30.0,30.0,663.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,663.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
movie,28.0,0.321429,0.772374,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.3,2.0,2.73,3.0,3.0,28.0,0.321429,0.862965,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.65,3.46,4.0,4.0,28.0,27.071429,63.99012,0.0,0.0,0.0,0.0,0.0,0.0,13.75,85.8,153.9,249.27,276.0,276.0
series,171.0,3.994152,9.096539,0.0,0.0,0.0,0.0,1.0,2.0,3.0,4.0,15.0,53.3,55.0,55.0,171.0,0.245614,0.582511,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,3.0,3.0,3.0,171.0,39.964912,122.074129,0.0,0.0,0.0,0.0,0.0,0.0,8.0,102.0,368.0,546.0,546.0,546.0


Unnamed: 0.1,Unnamed: 0,title_name,tier,season_number,category,premiere_date,title_name_imdb,premiere_month,imdb_id,content_category,genres,ref_follows,ref_spin_off_from,ref_remake_of,ref_version_of,ref_featured_in,ref_ref_featured_in,ref_ref_spoofed_in,ref_tot
238,238,The Matrix Resurrections,1,0,Popcorn,2021-12-22,The Matrix Resurrections,2021-12-01,tt10838180,movie,"[""Action"",""Sci-Fi""]",8.0,0.0,0.0,0.0,0.0,461.0,308.0,8.0


Unnamed: 0.1,Unnamed: 0,title_name,tier,season_number,category,premiere_date,title_name_imdb,premiere_month,imdb_id,content_category,genres,ref_follows,ref_spin_off_from,ref_remake_of,ref_version_of,ref_featured_in,ref_ref_featured_in,ref_ref_spoofed_in,ref_tot
238,238,The Matrix Resurrections,1,0,Popcorn,2021-12-22,The Matrix Resurrections,2021-12-01,tt10838180,movie,"[""Action"",""Sci-Fi""]",8.0,0.0,0.0,0.0,0.0,461.0,308.0,8.0
382,382,Many Saints of Newark,2,0,Popcorn,2021-10-15,The Many Saints of Newark,2021-10-01,tt8110232,movie,"[""Crime"",""Drama""]",1.0,0.0,0.0,0.0,0.0,126.0,33.0,1.0
494,494,The Suicide Squad,1,0,Popcorn,2021-08-06,The Suicide Squad,2021-08-01,tt6334354,movie,"[""Action"",""Adventure"",""Comedy"",""Sci-Fi""]",1.0,0.0,0.0,0.0,12.0,111.0,12.0,1.0
495,495,Mortal Kombat,1,0,Popcorn,2021-04-23,Mortal Kombat,2021-04-01,tt0293429,movie,"[""Action"",""Adventure"",""Fantasy"",""Sci-Fi"",""Thri...",0.0,1.0,1.0,0.0,10.0,177.0,62.0,2.0


Unnamed: 0.1,Unnamed: 0,title_name,tier,season_number,category,premiere_date,title_name_imdb,premiere_month,imdb_id,content_category,genres,ref_follows,ref_spin_off_from,ref_remake_of,ref_version_of,ref_featured_in,ref_ref_featured_in,ref_ref_spoofed_in,ref_tot
278,278,Evil Dead Rise,2,0,Scripted Features,2022-08-18,Evil Dead Rise,2022-08-01,tt13345606,movie,"[""Fantasy"",""Horror""]",4.0,0.0,0.0,0.0,0.0,276.0,81.0,4.0


Unnamed: 0.1,Unnamed: 0,title_name,tier,season_number,category,premiere_date,title_name_imdb,premiere_month,imdb_id,content_category,genres,ref_follows,ref_spin_off_from,ref_remake_of,ref_version_of,ref_featured_in,ref_ref_featured_in,ref_ref_spoofed_in,ref_tot
278,278,Evil Dead Rise,2,0,Scripted Features,2022-08-18,Evil Dead Rise,2022-08-01,tt13345606,movie,"[""Fantasy"",""Horror""]",4.0,0.0,0.0,0.0,0.0,276.0,81.0,4.0
437,437,Mortal Kombat,1,0,Pay1,2021-09-15,Mortal Kombat,2021-09-01,tt0293429,movie,"[""Action"",""Adventure"",""Fantasy"",""Sci-Fi"",""Thri...",0.0,1.0,1.0,0.0,10.0,177.0,62.0,2.0
876,876,The Suicide Squad,1,0,Pay1,2021-12-15,The Suicide Squad,2021-12-01,tt6334354,movie,"[""Action"",""Adventure"",""Comedy"",""Sci-Fi""]",1.0,0.0,0.0,0.0,12.0,111.0,12.0,1.0


In [37]:
df_tot_future[df_tot_future.title_name.str.contains('Succession')]

Unnamed: 0.1,Unnamed: 0,title_name,tier,season_number,category,premiere_date,title_name_imdb,premiere_month,imdb_id,content_category,genres,ref_follows,ref_spin_off_from,ref_remake_of,ref_version_of,ref_featured_in,ref_ref_featured_in,ref_ref_spoofed_in
652,652,Succession,1,3,Scripted Drama Series,2021-10-17,Succession S3,2021-10-01,tt7660850,series,"[""Drama""]",0.0,0.0,0.0,0.0,11.0,13.0,0.0
699,699,Succession,1,4,Scripted Drama Series,2023-03-26,Succession S3,2023-03-01,tt7660850,series,"[""Drama""]",0.0,0.0,0.0,0.0,11.0,13.0,0.0


## Past titles IMDB

In [None]:
## series imdb ids are missing- get it from select * from max_prod.editorial.imdb_viewable_map where contains(title, 'Wolves')

For series past titles, ref_tot=0, ref_ref_tot = ref_featured_in * (season_number-1)/season_number (to account for before-premiere)
For non-series past titles, ref_tot= follows,version_of, etc., ref_ref_tot = sum of ref_ref_featured_in 

## winsorize season to 10
## winsorize ref_tot to 5 
## winsorize ref_ref tot to 20 

## Penalize kids & family 

In [30]:
querystr='''
select distinct
      a.title_id
    , coalesce(a.season_number,0) as season_number
    , a.viewable_id
    , title_name
    , a.content_category
    , program_type
    , category
    , tier
    , viewership_start_date as effective_start_date
    , viewership_end_date as effective_end_date
    , coalesce(ivm.imdb_id, ivm.imdb_series_id) as imdb_imdb_series_id
    , imc.reference_type
    , itr.original_title as reference_title
    , itr.title_id as reference_title_id
    , itr.title_type as reference_title_type
    , imcr.reference_type as reference_reference_type
    , itrr.title_id as reference_reference_title_id
from max_prod.catalog.reporting_asset_dim a
join max_prod.catalog.reporting_asset_offering_dim raod
  on a.viewable_id = raod.viewable_id
  and brand = 'HBO MAX'
  and territory = 'HBO MAX DOMESTIC'
  and channel = 'HBO MAX SUBSCRIPTION'
inner join max_prod.content_analytics.psi_past_title_metadata b
    on a.title_id = b.viewership_title_id
    and coalesce(a.season_number,0) = coalesce(b.viewership_season_number,0)
left join max_prod.editorial.imdb_viewable_map ivm
    on b.viewership_title_id = coalesce(ivm.viewable_id, ivm.viewable_series_id) 
left join enterprise_data.catalog.imdb_title it 
    on coalesce(ivm.imdb_id, ivm.imdb_series_id) = it.title_id
left join enterprise_data.catalog.imdb_movie_connection imc 
    on it.title_id = imc.title_id
left join enterprise_data.catalog.imdb_title itr 
    on itr.title_id = imc.reference_title_id
left join enterprise_data.catalog.imdb_movie_connection imcr
    on itr.title_id = imcr.title_id
    and imcr.reference_type in ('featured_in')
left join enterprise_data.catalog.imdb_title itrr 
    on itrr.title_id = imcr.reference_title_id
where 1 = 1
  and asset_type IN ('FEATURE','ELEMENT')
  and start_utc_max is not null
  and a.content_category in ('movies','series','special')
  and coalesce(raod.season_first_offered_date,raod.title_first_offered_date)  >= '2020-05-27 07:01:00.000'
  and coalesce(episode_number_in_season, 0) <=1
  and imc.reference_type in ('follows','spin_off_from','remake_of','version_of','featured_in')
order by effective_start_date, title_name
;
'''

cursor_list = ctx.execute_string(
    querystr
    )
df = pd.DataFrame.from_records(cursor_list[-1].fetchall(), columns=[x[0] for x in cursor_list[-1].description])
df.columns= df.columns.str.lower()
df = df.fillna(0)
df.to_csv('s3://datascience-hbo-users/users/tjung/psi/imdb_features.csv')
df_actual=pd.read_csv('s3://datascience-hbo-users/users/tjung/psi/fv_actual_1025.csv')

In [59]:
## Series 
def munge_imdb_features(df, df_actual, historical=True):

    grpby = ['title_id','season_number','title_name','content_category','category','tier','effective_start_date']

    df_s = df[(df.content_category=='series') & df.reference_type.isin(['featured_in'])] 
    df_s_tot = df_s[df_s.reference_type=='featured_in'].groupby(by=grpby).agg({'reference_title_id':'nunique'}).reset_index()
    df_s_tot = df_s_tot.rename(columns={'reference_title_id':'ref_ref_featured_in'})
    ### Adjustment for training data 
    df_s_tot[['ref_follows','ref_spin_off_from','ref_remake_of']] = 0
    df_s_tot[['ref_tot']] = 0
    df_s_tot['ref_ref_tot'] = 0
    if historical==True:
        df_s_tot['ref_ref_featured_in'] = (df_s_tot['ref_ref_featured_in'] * (df_s_tot['season_number']-1))/df_s_tot['season_number']
    df_s_tot.loc[df_s_tot.season_number>1, 'ref_ref_tot'] = (df_s_tot['ref_ref_featured_in'])/(df_s_tot['season_number']-1)
        
    ## Non-series 
    df_md = df[(df.content_category!='series') & (df.reference_type.isin(['follows','spin_off_from','remake_of','version_of']))] 
    grpby = ['title_id','title_name','season_number','imdb_imdb_series_id','tier','content_category','category', 'effective_start_date']

    df_ref = df_md.groupby(by=grpby+['reference_type']).agg({'reference_title_id':'nunique'}).reset_index()
    df_ref = df_ref.pivot(index=grpby[:-1], columns='reference_type', values='reference_title_id')\
                .reset_index()
    df_ref = df_ref.rename(columns={'follows':'ref_follows',
                                    'spin_off_from':'ref_spin_off_from',
                                    'remake_of':'ref_remake_of',
                                   'version_of':'ref_version_of'})

    df_ref_ref = df_md.groupby(by=grpby+['reference_reference_type']).agg({'reference_reference_title_id':'nunique'}).reset_index()
    df_ref_ref = df_ref_ref.pivot(index=grpby[:-1], columns='reference_reference_type', values='reference_reference_title_id')\
                .reset_index()
    df_ref_ref = df_ref_ref.rename(columns={'featured_in':'ref_ref_featured_in'})


    grpby_title = ['title_id','title_name','season_number','tier','content_category','category', 'effective_start_date']
    df_actual = df_actual[grpby_title+['first_views']].groupby(by=grpby_title).sum().reset_index()
    
    col_ref = ['title_id', 'season_number','category','imdb_imdb_series_id','ref_follows','ref_spin_off_from','ref_remake_of', 'ref_version_of']
    col_ref_ref = ['title_id','season_number','category','ref_ref_featured_in']
    df_tot = df_actual.merge(df_ref[col_ref], how='inner', on=['title_id','season_number','category'])\
                        .merge(df_ref_ref[col_ref_ref], how='inner', on=['title_id','season_number','category'])
    df_tot['ref_tot'] = df_tot[['ref_follows','ref_spin_off_from','ref_remake_of','ref_version_of']].sum(axis=1)
    df_tot['ref_ref_tot'] = df_tot['ref_ref_featured_in']/df_tot['ref_tot']


    ## Stack series & non series 
    df_imdb = pd.concat([df_tot, df_s_tot]) 
    df_imdb = df_imdb.fillna(0)

    ## Winsorize 

    return df_imdb 

df_actual=pd.read_csv('s3://datascience-hbo-users/users/tjung/psi/fv_actual_1025.csv')
munge_imdb_features(df, df_actual, True)
# df_imdb_past.to_csv('s3://datascience-hbo-users/users/tjung/psi/imdb_features_engineered.csv')


Unnamed: 0,title_id,title_name,season_number,tier,content_category,category,effective_start_date,first_views,imdb_imdb_series_id,ref_follows,ref_spin_off_from,ref_remake_of,ref_version_of,ref_ref_featured_in,ref_tot,ref_ref_tot
0,GX1oycwoNEsMslAEAAAAI,Dolittle,0,2,movies,Pay1,2020-11-15,37655.0,tt6673612,0.0,0.0,2.0,6.0,29.0,8.0,3.625
1,GX1ozeARCl6vDcwEAAAF9,The Call of the Wild,0,2,movies,Pay1,2020-11-29,26190.0,tt7504726,0.0,0.0,1.0,0.0,4.0,1.0,4.0
2,GX4YOrQsIwGNViQEAAAGH,A West Wing Special to Benefit When We All Vote,0,2,special,Specials,2020-10-15,126073.0,tt13180026,0.0,0.0,1.0,0.0,1.0,1.0,1.0
3,GX5A8JQnSPQ2QFgEAAAAC,Roald Dahl’s The Witches,0,1,movies,Scripted Features,2020-10-22,524524.0,tt0805647,0.0,0.0,1.0,0.0,22.0,1.0,22.0
4,GX7QU5AGnhiLDwwEAAAAj,The Fresh Prince of Bel-Air Reunion,0,1,special,Specials,2020-11-19,383903.0,tt13315308,1.0,0.0,0.0,0.0,95.0,1.0,95.0
5,GX9KHPw1OIMPCJgEAAAAD,Wonder Woman 1984,0,1,movies,Pay1,2021-05-13,70736.0,tt7126948,1.0,0.0,0.0,0.0,83.0,1.0,83.0
6,GX9KHPw1OIMPCJgEAAAAD,Wonder Woman 1984,0,1,movies,Popcorn,2020-12-25,2727097.0,tt7126948,1.0,0.0,0.0,0.0,83.0,1.0,83.0
7,GXtf0UwTqw8JHjQEAAAbT,Scoob!,0,2,movies,Scripted Features,2020-06-26,136110.0,tt3152592,37.0,0.0,0.0,2.0,141.0,39.0,3.615385
8,GXtq0PQyT7MPCwgEAAAaq,Birds of Prey: Harley Quinn,0,2,movies,Pay1,2020-08-16,81001.0,tt7713068,0.0,1.0,0.0,1.0,92.0,2.0,46.0
9,GXw3dswg2J5piwwEAAAbW,The Invisible Man,0,2,movies,Pay1,2020-09-20,49996.0,tt1051906,0.0,0.0,0.0,9.0,52.0,9.0,5.777778


In [11]:
grpby=['title_name','viewable_id','imdb_imdb_series_id','content_category','effective_start_date','reference_type']
df_ref = df.groupby(by=grpby).agg({'reference_title_id':'nunique'}).reset_index()
df_ref = df_ref.pivot(index=grpby[:-1], columns='reference_type', values='reference_title_id')\
            .reset_index()
df_ref = df_ref.rename(columns={'follows':'ref_follows',
                                'spin_off_from':'ref_spin_off_from','remake_of':'ref_remake_of',
                               'version_of':'ref_version_of', 'featured_in':'ref_featured_in'})

df_ref

reference_type,title_name,viewable_id,imdb_imdb_series_id,content_category,effective_start_date,ref_featured_in,ref_follows
0,Friends: The Reunion,GYJxCDACcN8PDewEAAAAG,tt11337862,special,2021-05-27,6,3


In [63]:
# ### get imdb data for future titles.  
# ### features:  imdb_pg available; 
# ### identify if there're prequels:  reference_type = 'follows', 'remake_of','spin_off_from'
# ### identify popularity of prequels: 'featured_in'
# df = df.fillna(0)

# grpby=['asset_title_long','viewable_id','imdb_id','genres','title_type','reference_type']
# df_ref = df.groupby(by=grpby).agg({'reference_title_id':'nunique'}).reset_index()
# df_ref = df_ref.pivot(index=grpby[:-1], columns='reference_type', values='reference_title_id')\
#             .reset_index()
# df_ref = df_ref.rename(columns={'viewable_id':'title_id','follows':'ref_follows',
#                                 'spin_off_from':'ref_spin_off_from','remake_of':'ref_remake_of',
#                                'version_of':'ref_version_of', 'featured_in':'ref_featured_in'})

# display(df_ref[df_ref.imdb_id=='tt11337862'])

# grpby=['asset_title_long','viewable_id','imdb_id','genres','title_type','reference_referece_type']
# df_ref_ref = df.groupby(by=grpby).agg({'title_id':'count'}).reset_index()
# df_ref_ref = df_ref_ref.pivot(index=grpby[:-1], columns='reference_referece_type', values='title_id')\
#             .reset_index()
# df_ref_ref = df_ref_ref.rename(columns={'viewable_id':'title_id','featured_in':'ref_ref_featured_in','spoofed_in':'ref_ref_spoofed_in'})

# grpby_title= ['tier','content_category','category','title_name', 'title_id','season_number',
#         'program_type']

# df_actuals=pd.read_csv('s3://datascience-hbo-users/users/tjung/psi/fv_actual_1025.csv')
# df_actuals = df_actuals[['first_views'] + grpby_title].groupby(by=grpby_title).sum().reset_index()
# col_ref = ['title_id', 'title_type', 'imdb_id', 'asset_title_long', 'genres','ref_follows','ref_spin_off_from','ref_remake_of', 'ref_version_of','ref_featured_in']
# col_ref_ref = ['title_id','ref_ref_featured_in','ref_ref_spoofed_in']

# df_tot = df_actuals.merge(df_ref[col_ref], how='left', on='title_id')\
#                     .merge(df_ref_ref[col_ref_ref], how='left', on='title_id')
# print(df_tot.shape)
# display(df_tot.isnull().sum())
# display(df_tot.describe())

# display(df_tot[df_tot.imdb_id=='tt11337862']) ## friends 
# display(df_tot.head(2))
# df_tot = df_tot.fillna(0)


# df_tot.loc[(df_tot['ref_follows']>5), 'ref_follows'] = 1
# 

## Past data 
df_tot = pd.read_csv('s3://datascience-hbo-users/users/tjung/psi/imdb_features_engineered.csv')
display(df_tot.head())

df_imdb = df_tot.copy()
df_imdb['ref_tot'] = df_imdb[['ref_follows','ref_spin_off_from','ref_remake_of']].sum(axis=1)

display(df_imdb.groupby(by='content_category')[['season_number','ref_tot','ref_featured_in','ref_ref_featured_in']].describe(percentiles=percents))
display(df_imdb[df_imdb.category=='Popcorn'].groupby(by='content_category')[['season_number','ref_tot','ref_featured_in','ref_ref_featured_in']].describe(percentiles=percents))
display(df_imdb[df_imdb.category!='Popcorn'].groupby(by='content_category')[['season_number','ref_tot','ref_featured_in','ref_ref_featured_in']].describe(percentiles=percents))

display(df_imdb[(df_imdb.category!='Popcorn') & (df_imdb.content_category=='movies') & (df_imdb.ref_tot>3)])
display(df_imdb[(df_imdb.category!='Popcorn') & (df_imdb.content_category=='movies') & (df_imdb.ref_ref_featured_in>50)])

Unnamed: 0.1,Unnamed: 0,tier,content_category,category,title_name,title_id,season_number,program_type,first_views,title_type,imdb_id,asset_title_long,genres,ref_follows,ref_spin_off_from,ref_remake_of,ref_version_of,ref_featured_in,ref_ref_featured_in,ref_ref_spoofed_in
0,0,1,movies,Pay1,Godzilla vs. Kong,GYFEzmwNES16GkQEAAAAC,0,acquired,68095,movie,tt5034838,Godzilla vs. Kong,"[""Action"",""Sci-Fi"",""Thriller""]",3.0,0.0,2.0,0.0,16.0,145.0,18.0
1,1,1,movies,Pay1,Mortal Kombat,GYGYKfQwaKLheqwEAAAEC,0,acquired,57288,movie,tt0293429,Mortal Kombat,"[""Action"",""Adventure"",""Fantasy"",""Sci-Fi"",""Thri...",0.0,1.0,1.0,0.0,10.0,176.0,62.0
2,2,1,movies,Pay1,Tenet,GYEq4eApHYpA4cwEAAAAC,0,acquired,43056,movie,tt6723592,Tenet,"[""Action"",""Sci-Fi"",""Thriller""]",0.0,0.0,0.0,0.0,18.0,4.0,0.0
3,3,1,movies,Pay1,Wonder Woman 1984,GX9KHPw1OIMPCJgEAAAAD,0,acquired,70736,movie,tt7126948,Wonder Woman 1984,"[""Action"",""Adventure"",""Fantasy""]",1.0,0.0,0.0,0.0,28.0,90.0,6.0
4,4,1,movies,Popcorn,Dune,GYUjdLgBiJp5otAEAAAAJ,0,acquired,56801,movie,tt1160419,Dune,"[""Action"",""Adventure"",""Drama"",""Sci-Fi""]",0.0,0.0,0.0,2.0,14.0,69.0,22.0


Unnamed: 0_level_0,season_number,season_number,season_number,season_number,season_number,season_number,season_number,season_number,season_number,season_number,season_number,season_number,season_number,season_number,season_number,ref_tot,ref_tot,ref_tot,ref_tot,ref_tot,ref_tot,ref_tot,ref_tot,ref_tot,ref_tot,ref_tot,ref_tot,ref_tot,ref_tot,ref_tot,ref_featured_in,ref_featured_in,ref_featured_in,ref_featured_in,ref_featured_in,ref_featured_in,ref_featured_in,ref_featured_in,ref_featured_in,ref_featured_in,ref_featured_in,ref_featured_in,ref_featured_in,ref_featured_in,ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in
Unnamed: 0_level_1,count,mean,std,min,1%,5%,10%,25%,50%,75%,90%,95%,99%,100%,max,count,mean,std,min,1%,5%,10%,25%,50%,75%,90%,95%,99%,100%,max,count,mean,std,min,1%,5%,10%,25%,50%,75%,90%,95%,99%,100%,max,count,mean,std,min,1%,5%,10%,25%,50%,75%,90%,95%,99%,100%,max
content_category,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2,Unnamed: 26_level_2,Unnamed: 27_level_2,Unnamed: 28_level_2,Unnamed: 29_level_2,Unnamed: 30_level_2,Unnamed: 31_level_2,Unnamed: 32_level_2,Unnamed: 33_level_2,Unnamed: 34_level_2,Unnamed: 35_level_2,Unnamed: 36_level_2,Unnamed: 37_level_2,Unnamed: 38_level_2,Unnamed: 39_level_2,Unnamed: 40_level_2,Unnamed: 41_level_2,Unnamed: 42_level_2,Unnamed: 43_level_2,Unnamed: 44_level_2,Unnamed: 45_level_2,Unnamed: 46_level_2,Unnamed: 47_level_2,Unnamed: 48_level_2,Unnamed: 49_level_2,Unnamed: 50_level_2,Unnamed: 51_level_2,Unnamed: 52_level_2,Unnamed: 53_level_2,Unnamed: 54_level_2,Unnamed: 55_level_2,Unnamed: 56_level_2,Unnamed: 57_level_2,Unnamed: 58_level_2,Unnamed: 59_level_2,Unnamed: 60_level_2
movies,140.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,140.0,0.278571,0.759286,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,3.83,5.0,5.0,140.0,3.942857,6.922772,0.0,0.0,0.0,0.0,0.0,1.0,4.0,13.0,18.1,28.0,42.0,42.0,140.0,30.328571,145.636803,0.0,0.0,0.0,0.0,0.0,0.0,6.25,75.9,126.95,273.49,1650.0,1650.0
series,179.0,5.452514,12.082662,1.0,1.0,1.0,1.0,1.0,1.0,2.0,11.4,43.1,50.22,52.0,52.0,179.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,179.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,179.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
special,27.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,27.0,0.185185,0.622466,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4,1.0,2.48,3.0,3.0,27.0,0.333333,1.208941,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4,1.7,4.96,6.0,6.0,27.0,12.814815,50.916249,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4,66.1,210.18,251.0,251.0


Unnamed: 0_level_0,season_number,season_number,season_number,season_number,season_number,season_number,season_number,season_number,season_number,season_number,season_number,season_number,season_number,season_number,season_number,ref_tot,ref_tot,ref_tot,ref_tot,ref_tot,ref_tot,ref_tot,ref_tot,ref_tot,ref_tot,ref_tot,ref_tot,ref_tot,ref_tot,ref_tot,ref_featured_in,ref_featured_in,ref_featured_in,ref_featured_in,ref_featured_in,ref_featured_in,ref_featured_in,ref_featured_in,ref_featured_in,ref_featured_in,ref_featured_in,ref_featured_in,ref_featured_in,ref_featured_in,ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in
Unnamed: 0_level_1,count,mean,std,min,1%,5%,10%,25%,50%,75%,90%,95%,99%,100%,max,count,mean,std,min,1%,5%,10%,25%,50%,75%,90%,95%,99%,100%,max,count,mean,std,min,1%,5%,10%,25%,50%,75%,90%,95%,99%,100%,max,count,mean,std,min,1%,5%,10%,25%,50%,75%,90%,95%,99%,100%,max
content_category,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2,Unnamed: 26_level_2,Unnamed: 27_level_2,Unnamed: 28_level_2,Unnamed: 29_level_2,Unnamed: 30_level_2,Unnamed: 31_level_2,Unnamed: 32_level_2,Unnamed: 33_level_2,Unnamed: 34_level_2,Unnamed: 35_level_2,Unnamed: 36_level_2,Unnamed: 37_level_2,Unnamed: 38_level_2,Unnamed: 39_level_2,Unnamed: 40_level_2,Unnamed: 41_level_2,Unnamed: 42_level_2,Unnamed: 43_level_2,Unnamed: 44_level_2,Unnamed: 45_level_2,Unnamed: 46_level_2,Unnamed: 47_level_2,Unnamed: 48_level_2,Unnamed: 49_level_2,Unnamed: 50_level_2,Unnamed: 51_level_2,Unnamed: 52_level_2,Unnamed: 53_level_2,Unnamed: 54_level_2,Unnamed: 55_level_2,Unnamed: 56_level_2,Unnamed: 57_level_2,Unnamed: 58_level_2,Unnamed: 59_level_2,Unnamed: 60_level_2
movies,17.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,17.0,0.764706,1.300452,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,2.6,4.52,5.0,5.0,17.0,7.823529,8.071828,0.0,0.0,0.0,0.6,2.0,4.0,12.0,18.0,22.4,26.88,28.0,28.0,17.0,55.058824,58.848822,0.0,0.0,0.0,0.0,0.0,22.0,90.0,133.6,151.2,171.04,176.0,176.0


Unnamed: 0_level_0,season_number,season_number,season_number,season_number,season_number,season_number,season_number,season_number,season_number,season_number,season_number,season_number,season_number,season_number,season_number,ref_tot,ref_tot,ref_tot,ref_tot,ref_tot,ref_tot,ref_tot,ref_tot,ref_tot,ref_tot,ref_tot,ref_tot,ref_tot,ref_tot,ref_tot,ref_featured_in,ref_featured_in,ref_featured_in,ref_featured_in,ref_featured_in,ref_featured_in,ref_featured_in,ref_featured_in,ref_featured_in,ref_featured_in,ref_featured_in,ref_featured_in,ref_featured_in,ref_featured_in,ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in,ref_ref_featured_in
Unnamed: 0_level_1,count,mean,std,min,1%,5%,10%,25%,50%,75%,90%,95%,99%,100%,max,count,mean,std,min,1%,5%,10%,25%,50%,75%,90%,95%,99%,100%,max,count,mean,std,min,1%,5%,10%,25%,50%,75%,90%,95%,99%,100%,max,count,mean,std,min,1%,5%,10%,25%,50%,75%,90%,95%,99%,100%,max
content_category,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2,Unnamed: 26_level_2,Unnamed: 27_level_2,Unnamed: 28_level_2,Unnamed: 29_level_2,Unnamed: 30_level_2,Unnamed: 31_level_2,Unnamed: 32_level_2,Unnamed: 33_level_2,Unnamed: 34_level_2,Unnamed: 35_level_2,Unnamed: 36_level_2,Unnamed: 37_level_2,Unnamed: 38_level_2,Unnamed: 39_level_2,Unnamed: 40_level_2,Unnamed: 41_level_2,Unnamed: 42_level_2,Unnamed: 43_level_2,Unnamed: 44_level_2,Unnamed: 45_level_2,Unnamed: 46_level_2,Unnamed: 47_level_2,Unnamed: 48_level_2,Unnamed: 49_level_2,Unnamed: 50_level_2,Unnamed: 51_level_2,Unnamed: 52_level_2,Unnamed: 53_level_2,Unnamed: 54_level_2,Unnamed: 55_level_2,Unnamed: 56_level_2,Unnamed: 57_level_2,Unnamed: 58_level_2,Unnamed: 59_level_2,Unnamed: 60_level_2
movies,123.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,123.0,0.211382,0.630535,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,2.0,5.0,5.0,123.0,3.406504,6.608288,0.0,0.0,0.0,0.0,0.0,0.0,4.0,11.0,16.0,27.78,42.0,42.0,123.0,26.910569,153.669755,0.0,0.0,0.0,0.0,0.0,0.0,2.0,45.8,89.4,275.02,1650.0,1650.0
series,179.0,5.452514,12.082662,1.0,1.0,1.0,1.0,1.0,1.0,2.0,11.4,43.1,50.22,52.0,52.0,179.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,179.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,179.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
special,27.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,27.0,0.185185,0.622466,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4,1.0,2.48,3.0,3.0,27.0,0.333333,1.208941,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4,1.7,4.96,6.0,6.0,27.0,12.814815,50.916249,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4,66.1,210.18,251.0,251.0


Unnamed: 0.1,Unnamed: 0,tier,content_category,category,title_name,title_id,season_number,program_type,first_views,title_type,imdb_id,asset_title_long,genres,ref_follows,ref_spin_off_from,ref_remake_of,ref_version_of,ref_featured_in,ref_ref_featured_in,ref_ref_spoofed_in,ref_tot
0,0,1,movies,Pay1,Godzilla vs. Kong,GYFEzmwNES16GkQEAAAAC,0,acquired,68095,movie,tt5034838,Godzilla vs. Kong,"[""Action"",""Sci-Fi"",""Thriller""]",3.0,0.0,2.0,0.0,16.0,145.0,18.0,5.0


Unnamed: 0.1,Unnamed: 0,tier,content_category,category,title_name,title_id,season_number,program_type,first_views,title_type,imdb_id,asset_title_long,genres,ref_follows,ref_spin_off_from,ref_remake_of,ref_version_of,ref_featured_in,ref_ref_featured_in,ref_ref_spoofed_in,ref_tot
0,0,1,movies,Pay1,Godzilla vs. Kong,GYFEzmwNES16GkQEAAAAC,0,acquired,68095,movie,tt5034838,Godzilla vs. Kong,"[""Action"",""Sci-Fi"",""Thriller""]",3.0,0.0,2.0,0.0,16.0,145.0,18.0,5.0
1,1,1,movies,Pay1,Mortal Kombat,GYGYKfQwaKLheqwEAAAEC,0,acquired,57288,movie,tt0293429,Mortal Kombat,"[""Action"",""Adventure"",""Fantasy"",""Sci-Fi"",""Thri...",0.0,1.0,1.0,0.0,10.0,176.0,62.0,2.0
3,3,1,movies,Pay1,Wonder Woman 1984,GX9KHPw1OIMPCJgEAAAAD,0,acquired,70736,movie,tt7126948,Wonder Woman 1984,"[""Action"",""Adventure"",""Fantasy""]",1.0,0.0,0.0,0.0,28.0,90.0,6.0,1.0
11,11,1,movies,Scripted Features,Zack Snyder's Justice League,GYDAnZgCFQ8IJpQEAAAAN,0,original,1154969,movie,tt12361974,Zack Snyder's Justice League,"[""Action"",""Adventure"",""Fantasy"",""Sci-Fi""]",0.0,0.0,0.0,1.0,20.0,71.0,6.0,0.0
33,33,2,movies,Pay1,Birds of Prey: Harley Quinn,GXtq0PQyT7MPCwgEAAAaq,0,acquired,81001,movie,tt7713068,Birds of Prey: Harley Quinn,"[""Action"",""Adventure"",""Comedy"",""Crime""]",0.0,1.0,0.0,1.0,27.0,93.0,13.0,1.0
34,34,2,movies,Pay1,Dolittle,GX1oycwoNEsMslAEAAAAI,0,acquired,37655,movie,tt6673612,Dolittle,"[""Adventure"",""Comedy"",""Family"",""Fantasy""]",0.0,0.0,2.0,6.0,11.0,58.0,2.0,2.0
42,42,2,movies,Pay1,The Conjuring: The Devil Made Me Do It,GYOYX9gphVFOLNgEAAAQR,0,acquired,1784,movie,tt7069210,The Conjuring: The Devil Made Me Do It,"[""Horror"",""Mystery"",""Thriller""]",2.0,0.0,0.0,0.0,4.0,84.0,5.0,2.0
43,43,2,movies,Pay1,The Invisible Man,GXw3dswg2J5piwwEAAAbW,0,acquired,49996,movie,tt1051906,The Invisible Man,"[""Drama"",""Horror"",""Mystery"",""Sci-Fi"",""Thriller""]",0.0,0.0,0.0,9.0,23.0,51.0,16.0,0.0
62,62,2,movies,Scripted Features,Scoob!,GXtf0UwTqw8JHjQEAAAbT,0,acquired,136110,movie,tt3152592,Scoob!,"[""Animation"",""Adventure"",""Comedy"",""Crime"",""Fam...",1.0,0.0,0.0,2.0,6.0,277.0,234.0,1.0
172,172,3,movies,Pay1,Black Christmas,GXzFh-gkF3ALCOAEAAAdi,0,acquired,7053,movie,tt10481868,Black Christmas,"[""Horror"",""Mystery"",""Thriller""]",0.0,0.0,2.0,0.0,2.0,58.0,14.0,2.0


## Ref feature EDA