In [None]:
import pandas as pd
import numpy as np

### Get TAC of smartphone devices

In [None]:
%%bigquery device_df
SELECT 
    A.tac_id,
    A.type_txt Dvc_Name,
    A.proj_id,
    A.proj_char_id,
    A.techno_txt,
    A.market_typ,
    A.feature_txt,
    A.years_since_launch, 
    A.ta_dt, 
    A.platform_txt,
    A.gsm_gprs_edge_bands_supported_txt,
    A.umts_bands_supported_txt,
    A.dual_carr_bands_supported_txt, 
    A.lte_bands_supported_txt, 
    A.volte_dvc_ind,
    A.volte_ind,
    A.dvc_typ

FROM  
    `cto-wln-sa-data-pr-bb5283.ref_table.bq_exttable_hive_tac_device_type` A

In [None]:
device_df.head(1)

In [None]:
device_df.dvc_typ.value_counts(dropna=False)

In [None]:
dvc = device_df.copy()
dvc = dvc.sort_values(by=['tac_id','proj_char_id']).drop_duplicates('tac_id',keep='last') 

In [None]:
dvc['VoLTE_Capable'] = 'Non-VoLTE'
dvc.loc[(dvc['market_typ'] == 'Grey Market') & 
        (dvc['techno_txt'].str.contains("VOLTE: Supported",case=False)),'VoLTE_Capable'] = 'VoLTE' # Grey Market
dvc.loc[dvc['volte_ind'] == 'Supported','VoLTE_Capable'] = 'VoLTE' # TELUS
dvc.loc[dvc['volte_ind'] == 'Supported in Future MR','VoLTE_Capable'] = 'Non-VoLTE For TELUS' # TELUS
dvc[dvc['VoLTE_Capable'] == 'VoLTE'].tac_id.nunique()

In [None]:
dvc.volte_ind.value_counts(dropna=False)

In [None]:
dvc.VoLTE_Capable.value_counts(dropna=False)

In [None]:
ids = dvc["tac_id"]
dvc[ids.isin(ids[ids.duplicated()])].sort_values("tac_id")

In [None]:
dvc['Device_TypeG']= dvc['techno_txt'].str.split('\+\+\+').str[0]
dvc['Device_Type']= dvc['dvc_typ'].where(dvc['market_typ'] != 'Grey Market', dvc['Device_TypeG'])

# Smarthub TACs
## more than this ... but aren't using smarthubs for now
# contact Shahzeb if needed for TACs
tac_s=['86605301','86633903','86242503','86432902','86459603','99000896','86886302']
# Create Device_Group
dvc_grp= {      
'Handset' : 'Smartphone',
'Portable(include PDA)' : 'Connected Devices',
'Modem'	 : 'Dongle',
'Module' : 	'Dongle',
'Tablet' : 	'Connected Devices',
'Connected Computer' : 'Connected Devices',
'Smartphone' : 'Smartphone',
'Notebook' : 'Connected Devices',
'SmartHub' : 'Smarthub',
'Smarthub' : 'Smarthub',
'Specialty' : 'Dongle',
'Handheld' : 'Connected Devices',
'WLAN Router' : 'Connected Devices',
'Dongle' : 'Dongle',
'Smartwatch' : 'Connected Devices',
'WHP' : 'Connected Devices',
'IoT Device' : 'Connected Devices - IoT',
'Mobile PhoneFeature phone' : 'Smartphone',
'Vehicle' : 'Connected Devices - Vehicle'
}
dvc['Device_Type'] = np.where(dvc.tac_id.isin(tac_s), 'Smarthub', dvc.Device_Type )
dvc['Device_Group'] = dvc['Device_Type'].map(dvc_grp) 

In [None]:
dvc['Device_Group'].value_counts(dropna=False)

In [None]:
smartphone_tac_ids = dvc.loc[dvc.Device_Group=='Smartphone'][['tac_id']].reset_index(drop=True)

In [None]:
dvc.loc[dvc.Device_Group=='Smartphone']

### Get usage data for whsia customers

In [None]:
%%bigquery df

-- Get streaming app usage for whsia customers
-- Avg usage for past 3 months

WITH whsia_usage AS (
  SELECT 
    imsi_num,
    event_dt,
    EXTRACT(MONTH FROM event_dt) as MONTH,
    EXTRACT(YEAR FROM event_dt) as YEAR,
    dl_volume_qty/1000000.0 as dl_mb,
    ul_volume_qty/1000000.0 as ul_mb,
  FROM `cio-datahub-enterprise-pr-183a.ent_usage_unrated_ott.bq_ott_app_event`
  WHERE 
    -- SOC Codes of WHSIA customers
    imsi_num IN (SELECT CAST(IMSI as STRING) as IMSI 
                FROM `cto-wln-sa-data-pr-bb5283.ref_table.bq_hpbi_product_instance_wls` 
                WHERE PRIM_PRICE_PLAN_CD IN (SELECT whsia_soc FROM `cto-wln-sa-data-pr-bb5283.ref_table.bq_whsia_soc_codes`)
              )
    AND
    app_nm in ('apple adaptive http video', 'discovery','netflix video', 'amazon video', 'disney plus', 'crave' ,'paramount'  )
    
), 

-- Total Monthly usage by imsi
total_monthly_usage AS (
  SELECT
    imsi_num,
    YEAR,
    MONTH,
    ROUND(SUM(dl_mb)) AS total_dl_mb_monthly,
    ROUND(SUM(ul_mb)) AS total_ul_mb_monthly
  FROM
    whsia_usage
  GROUP BY 
    imsi_num, YEAR, MONTH

)

-- Average Monthly Usage
SELECT 
  imsi_num,
  AVG(total_dl_mb_monthly) as avg_monthly_dl_mb,
  AVG(total_ul_mb_monthly) as avg_monthly_ul_mb
FROM total_monthly_usage
GROUP BY imsi_num

In [None]:
df.shape

In [None]:
%%bigquery whsia_total_usage_df

-- Get total app data usage for whsia customers
-- Avg usage for past 3 months

WITH whsia_usage AS (
  SELECT 
    imsi_num,
    event_dt,
    EXTRACT(MONTH FROM event_dt) as MONTH,
    EXTRACT(YEAR FROM event_dt) as YEAR,
    dl_volume_qty/1000000.0 as dl_mb,
    ul_volume_qty/1000000.0 as ul_mb,
  FROM `cio-datahub-enterprise-pr-183a.ent_usage_unrated_ott.bq_ott_app_event`
  WHERE 
    -- SOC Codes of WHSIA customers
    imsi_num IN (SELECT CAST(IMSI as STRING) as IMSI 
                FROM `cto-wln-sa-data-pr-bb5283.ref_table.bq_hpbi_product_instance_wls` 
                WHERE PRIM_PRICE_PLAN_CD IN (SELECT whsia_soc FROM `cto-wln-sa-data-pr-bb5283.ref_table.bq_whsia_soc_codes`)
              )

    
), 

-- Total Monthly usage by imsi
total_monthly_usage AS (
  SELECT
    imsi_num,
    YEAR,
    MONTH,
    ROUND(SUM(dl_mb)) AS total_dl_mb_monthly,
    ROUND(SUM(ul_mb)) AS total_ul_mb_monthly
  FROM
    whsia_usage
  GROUP BY 
    imsi_num, YEAR, MONTH

)

-- Average Monthly Usage
SELECT 
  imsi_num,
  AVG(total_dl_mb_monthly) as avg_monthly_dl_mb,
  AVG(total_ul_mb_monthly) as avg_monthly_ul_mb
FROM total_monthly_usage
GROUP BY imsi_num

### whsia customers

In [None]:
%%bigquery whsia_cust_df
-- WHSIA Customers
WITH WHSIA_BAN AS (

SELECT BILLING_ACCOUNT_NUMBER
FROM `cto-wln-sa-data-pr-bb5283.ref_table.bq_hpbi_product_instance_wls` 
WHERE PRIM_PRICE_PLAN_CD IN (SELECT whsia_soc FROM `cto-wln-sa-data-pr-bb5283.ref_table.bq_whsia_soc_codes`) 

)

SELECT 
  A.BILLING_ACCOUNT_NUMBER,
  A.MSISDN,
  CAST(A.IMSI AS STRING) AS IMSI,
  A.PRIM_PRICE_PLAN_TXT,
  A.DEVICE_PROD_EQUIP_TXT,
  CAST(A.TAC_ID AS STRING) AS TAC_ID
FROM `cto-wln-sa-data-pr-bb5283.ref_table.bq_hpbi_product_instance_wls` A 
INNER JOIN WHSIA_BAN B 
ON A.BILLING_ACCOUNT_NUMBER = B.BILLING_ACCOUNT_NUMBER 
ORDER BY A.BILLING_ACCOUNT_NUMBER

In [None]:
whsia_cust_df.BILLING_ACCOUNT_NUMBER.nunique()

In [None]:
whsia_cust_df.loc[whsia_cust_df.BILLING_ACCOUNT_NUMBER==218]

In [None]:
whsia_cust_df['has_telus_cell_phone_plan']=0

In [None]:
whsia_cust_df.loc[whsia_cust_df.TAC_ID.isin(smartphone_tac_ids.tac_id), 'has_telus_cell_phone_plan'] = 1

In [None]:
cust_with_phone_plan = whsia_cust_df.groupby(['BILLING_ACCOUNT_NUMBER']).sum().reset_index()[['BILLING_ACCOUNT_NUMBER', 'has_telus_cell_phone_plan']]

Customers with phone plan with Telus

In [None]:
whsia_ban_with_cell_plan = cust_with_phone_plan.loc[cust_with_phone_plan.has_telus_cell_phone_plan!=0].reset_index(drop=True)
whsia_ban_with_cell_plan['has_telus_cell_phone_plan'] = 1

In [None]:
whsia_ban_with_cell_plan

In [None]:
%%bigquery whsia_ffh_cust_df

-- GET WHSIA Customers who have FFH BAN and see if they have a mobility account
WITH FFH_CUST AS (
SELECT 
  prod_instnc_ts, 
  bus_prod_instnc_id, 
  pi_prod_instnc_resrc_str,
  bus_prod_instnc_src_id, 
  BACCT_BUS_BACCT_NUM AS FFH_BAN,  
  pp_bus_pp_catlg_itm_cd , 
  pp_catlg_itm_nm
FROM `cio-datahub-enterprise-pr-183a.ent_cust_cust.bq_prod_instnc_snpsht` 
WHERE 
  prod_instnc_ts = (SELECT MAX(prod_instnc_ts) FROM `cio-datahub-enterprise-pr-183a.ent_cust_cust.bq_prod_instnc_snpsht` ) AND -- Get most recent date in snapshot table
  pp_bus_pp_catlg_itm_cd IN (SELECT whsia_soc FROM `cto-wln-sa-data-pr-bb5283.ref_table.bq_whsia_soc_codes`) AND --Get WHSIA SOC Codes
  bus_prod_instnc_src_id=1001 -- GET FFH BAN


)

SELECT
  A.*,
  B.MOB_BAN,
  CASE WHEN B.MOB_BAN IS NULL THEN 'N' ELSE 'Y' END AS HAS_MOB_BAN --By doing a left join with mnh_ban_mapping, if MOB_BAN is NULL, then FFH Cust doesn't have MOB BAN, if it contains a MOB_BAN, then that is their MOB_BAN 
FROM FFH_CUST A 
LEFT JOIN  `cto-wln-sa-data-pr-bb5283.ref_table.bq_hpbi_mnh_ban_mapping` B
ON A.FFH_BAN = B.FFH_BAN 
ORDER BY HAS_MOB_BAN 

### Merging Data

In [None]:
df.rename(columns={'avg_monthly_dl_mb' : 'streaming_avg_monthly_dl_mb' , 'avg_monthly_ul_mb' : 'streaming__avg_monthly_ul_mb'})

In [None]:
whsia_total_usage_df.rename(columns={'avg_monthly_dl_mb' : 'total_avg_monthly_dl_mb' , 'avg_monthly_ul_mb' : 'total_avg_monthly_ul_mb'})

In [None]:
df_merged = pd.merge(df.rename(columns={'avg_monthly_dl_mb' : 'streaming_avg_monthly_dl_mb' , 'avg_monthly_ul_mb' : 'streaming__avg_monthly_ul_mb'}),
                     whsia_total_usage_df.rename(columns={'avg_monthly_dl_mb' : 'total_avg_monthly_dl_mb' , 'avg_monthly_ul_mb' : 'total_avg_monthly_ul_mb'}),
                     how='outer',
                     on='imsi_num')

In [None]:
df_merged['%_dl_streaming']=df_merged.streaming_avg_monthly_dl_mb/df_merged.total_avg_monthly_dl_mb
df_merged=df_merged.fillna(0)

In [None]:
df_merged

In [None]:
df_merged2 = pd.merge(df_merged, whsia_cust_df[['BILLING_ACCOUNT_NUMBER','MSISDN', 'IMSI']].drop_duplicates(['IMSI', 'MSISDN', 'BILLING_ACCOUNT_NUMBER'])
                      , how='left', left_on='imsi_num', right_on='IMSI')

In [None]:
df_merged2 = pd.merge(df_merged2, whsia_ban_with_cell_plan, how='left', on='BILLING_ACCOUNT_NUMBER')

In [None]:
df_merged2=df_merged2.fillna(0)

In [None]:
df_merged2.to_csv('whsia_streaming_data_summary.csv', index=False)

In [None]:
df_merged2= pd.read_csv('whsia_streaming_data_summary.csv')

In [None]:
df_merged2.head()

In [None]:
df_merged2.shape

In [None]:
df_merged2['use_streaming']=0
df_merged2['high_streaming']=0

In [None]:
#Number of customers who use streaming services
df_merged2.loc[df_merged2.streaming_avg_monthly_dl_mb > 0, 'use_streaming'] = int(1)
df_merged2.loc[df_merged2.streaming_avg_monthly_dl_mb > 0].shape

In [None]:
df_merged2.loc[df_merged2.streaming_avg_monthly_dl_mb > 0].streaming_avg_monthly_dl_mb.mean()

In [None]:
df_merged2.loc[df_merged2.streaming_avg_monthly_dl_mb > 0].has_telus_cell_phone_plan.sum()

Number of customers who use 30% of data on streaming

In [None]:
df_merged2.loc[df_merged2['%_dl_streaming'] >= 0.3, 'high_streaming']=int(1)

In [None]:
df_merged2.loc[df_merged2['%_dl_streaming'] >= 0.3].shape

In [None]:
df_merged2.loc[df_merged2['%_dl_streaming'] >= 0.3].has_telus_cell_phone_plan.sum()

In [None]:
df_merged2.has_telus_cell_phone_plan = df_merged2.has_telus_cell_phone_plan.astype(int)

In [None]:
df_merged2[['MSISDN', 'has_telus_cell_phone_plan', 'use_streaming', 'high_streaming']]

In [None]:
df_merged2[['MSISDN', 'has_telus_cell_phone_plan', 'use_streaming', 'high_streaming']].to_csv('whsia_msisdn_usage_summary.csv', index=False)

In [None]:
df_merged2.loc[df_merged2.high_streaming==1].has_telus_cell_phone_plan.sum()

In [None]:
df_merged2.loc[df_merged2.use_streaming==1].has_telus_cell_phone_plan.sum()

### Scrap

In [None]:
df

In [None]:
df.loc[(df.imsi_num=='302220320348350') & (df.MONTH==4)].dl_mb.sum()

In [None]:
df.loc[(df.imsi_num=='302220320348350') & (df.MONTH==3)].dl_mb.sum()

In [None]:
df.loc[(df.imsi_num=='302220545119884') & (df.MONTH==3)].dl_mb.sum()

Sum usage in month

In [None]:
df.groupby(['imsi_num', 'YEAR', 'MONTH']).sum().reset_index()[['imsi_num' , 'MONTH', 'YEAR', 'dl_mb','ul_mb']]

Avg monthly usage Since Mar 2022

In [None]:
df.groupby(['imsi_num', 'YEAR', 'MONTH']).sum().reset_index().groupby('imsi_num').mean().reset_index()[['imsi_num' ,  'dl_mb','ul_mb']]

### Number of Netflix, Apple TV, Discovery Users for WHSIA

In [None]:
%%bigquery  whsia_imsi_df

SELECT CAST(PROD_INSTNC_ALIAS_STR as STRING) as IMSI 
FROM `cto-wln-sa-data-pr-bb5283.ref_table.bq_hpbi_product_instance_profl` 
WHERE PRIM_PRICE_PLAN_CD IN (SELECT whsia_soc FROM `cto-wln-sa-data-pr-bb5283.ref_table.bq_whsia_soc_codes`)

### App Usage

In [None]:
%%bigquery apple_tv_df

DECLARE v_app_proj_name STRING DEFAULT 'cio-datahub-enterprise-pr-183a';
DECLARE v_app_dataset_name STRING DEFAULT 'ent_usage_unrated_ott';
DECLARE v_app_table_name STRING DEFAULT 'bq_ott_app_event';
DECLARE v_app_column_name STRING DEFAULT 'app_nm';
DECLARE v_dl_vol_column_name STRING DEFAULT 'dl_volume_qty';
DECLARE v_ul_vol_column_name STRING DEFAULT 'ul_volume_qty';
DECLARE v_date_column_name STRING DEFAULT 'event_dt';
DECLARE v_id_app_column_name STRING DEFAULT 'imsi_num';
DECLARE v_cust_dataset_name STRING DEFAULT 'temp_workspace';
DECLARE v_cust_table_name STRING DEFAULT 'check_cust_mapping';
DECLARE v_id_column_name STRING DEFAULT 'IMSI';
DECLARE v_app_mapping_dataset STRING DEFAULT 'app_cat_map';
DECLARE v_app_mapping_table STRING DEFAULT 'bq_app_cat_mapping_latest_view';
DECLARE v_app_tier1_column_name STRING DEFAULT 'tier_1';
DECLARE v_app_tier2_column_name STRING DEFAULT 'tier_2';
DECLARE v_application_filter_type STRING DEFAULT 'by_app_name';
DECLARE v_application_wishlist ARRAY<STRING> DEFAULT ['apple adaptive http video'];
DECLARE v_application_blacklist ARRAY<STRING> DEFAULT [];
DECLARE v_app_cat_tier1 ARRAY<STRING> DEFAULT [];
DECLARE v_app_cat_tier2 ARRAY<STRING> DEFAULT [];
DECLARE v_date_filter_type STRING DEFAULT 'window';
DECLARE v_today_date DATE DEFAULT current_date();
DECLARE v_date_len INT64 DEFAULT 3;
DECLARE v_date_step STRING DEFAULT 'MONTH'; -- Define duration 
DECLARE v_start_date DATE DEFAULT NULL;
DECLARE v_end_date DATE DEFAULT NULL;
DECLARE v_segment_name STRING DEFAULT 'appletv';
DECLARE v_output_dataset_name STRING DEFAULT 'temp_workspace';
DECLARE v_output_table_name STRING DEFAULT 'appletv_usage_per_imsi';


CALL `cto-wln-sa-data-pr-bb5283.customer_personas_features.app_usage_without_cust_mapping`(v_app_proj_name, v_app_dataset_name, v_app_table_name, v_app_column_name, v_dl_vol_column_name, v_ul_vol_column_name, v_date_column_name, v_id_app_column_name, v_cust_dataset_name, v_cust_table_name, v_id_column_name, v_app_mapping_dataset, v_app_mapping_table, v_app_tier1_column_name, v_app_tier2_column_name, v_application_filter_type, v_application_wishlist, v_application_blacklist, v_app_cat_tier1, v_app_cat_tier2, v_date_filter_type, v_today_date, v_date_len, v_date_step, v_start_date, v_end_date, v_segment_name, v_output_dataset_name, v_output_table_name);


In [None]:
%%bigquery discovery_df

DECLARE v_app_proj_name STRING DEFAULT 'cio-datahub-enterprise-pr-183a';
DECLARE v_app_dataset_name STRING DEFAULT 'ent_usage_unrated_ott';
DECLARE v_app_table_name STRING DEFAULT 'bq_ott_app_event';
DECLARE v_app_column_name STRING DEFAULT 'app_nm';
DECLARE v_dl_vol_column_name STRING DEFAULT 'dl_volume_qty';
DECLARE v_ul_vol_column_name STRING DEFAULT 'ul_volume_qty';
DECLARE v_date_column_name STRING DEFAULT 'event_dt';
DECLARE v_id_app_column_name STRING DEFAULT 'imsi_num';
DECLARE v_cust_dataset_name STRING DEFAULT 'temp_workspace';
DECLARE v_cust_table_name STRING DEFAULT 'check_cust_mapping';
DECLARE v_id_column_name STRING DEFAULT 'IMSI';
DECLARE v_app_mapping_dataset STRING DEFAULT 'app_cat_map';
DECLARE v_app_mapping_table STRING DEFAULT 'bq_app_cat_mapping_latest_view';
DECLARE v_app_tier1_column_name STRING DEFAULT 'tier_1';
DECLARE v_app_tier2_column_name STRING DEFAULT 'tier_2';
DECLARE v_application_filter_type STRING DEFAULT 'by_app_name';
DECLARE v_application_wishlist ARRAY<STRING> DEFAULT ['discovery'];
DECLARE v_application_blacklist ARRAY<STRING> DEFAULT [];
DECLARE v_app_cat_tier1 ARRAY<STRING> DEFAULT [];
DECLARE v_app_cat_tier2 ARRAY<STRING> DEFAULT [];
DECLARE v_date_filter_type STRING DEFAULT 'window';
DECLARE v_today_date DATE DEFAULT current_date();
DECLARE v_date_len INT64 DEFAULT 3;
DECLARE v_date_step STRING DEFAULT 'MONTH'; -- Define duration 
DECLARE v_start_date DATE DEFAULT NULL;
DECLARE v_end_date DATE DEFAULT NULL;
DECLARE v_segment_name STRING DEFAULT 'discovery';
DECLARE v_output_dataset_name STRING DEFAULT 'temp_workspace';
DECLARE v_output_table_name STRING DEFAULT 'discovery_usage_per_imsi';


CALL `cto-wln-sa-data-pr-bb5283.customer_personas_features.app_usage_without_cust_mapping`(v_app_proj_name, v_app_dataset_name, v_app_table_name, v_app_column_name, v_dl_vol_column_name, v_ul_vol_column_name, v_date_column_name, v_id_app_column_name, v_cust_dataset_name, v_cust_table_name, v_id_column_name, v_app_mapping_dataset, v_app_mapping_table, v_app_tier1_column_name, v_app_tier2_column_name, v_application_filter_type, v_application_wishlist, v_application_blacklist, v_app_cat_tier1, v_app_cat_tier2, v_date_filter_type, v_today_date, v_date_len, v_date_step, v_start_date, v_end_date, v_segment_name, v_output_dataset_name, v_output_table_name);


In [None]:
%%bigquery netflix_df

DECLARE v_app_proj_name STRING DEFAULT 'cio-datahub-enterprise-pr-183a';
DECLARE v_app_dataset_name STRING DEFAULT 'ent_usage_unrated_ott';
DECLARE v_app_table_name STRING DEFAULT 'bq_ott_app_event';
DECLARE v_app_column_name STRING DEFAULT 'app_nm';
DECLARE v_dl_vol_column_name STRING DEFAULT 'dl_volume_qty';
DECLARE v_ul_vol_column_name STRING DEFAULT 'ul_volume_qty';
DECLARE v_date_column_name STRING DEFAULT 'event_dt';
DECLARE v_id_app_column_name STRING DEFAULT 'imsi_num';
DECLARE v_cust_dataset_name STRING DEFAULT 'temp_workspace';
DECLARE v_cust_table_name STRING DEFAULT 'check_cust_mapping';
DECLARE v_id_column_name STRING DEFAULT 'IMSI';
DECLARE v_app_mapping_dataset STRING DEFAULT 'app_cat_map';
DECLARE v_app_mapping_table STRING DEFAULT 'bq_app_cat_mapping_latest_view';
DECLARE v_app_tier1_column_name STRING DEFAULT 'tier_1';
DECLARE v_app_tier2_column_name STRING DEFAULT 'tier_2';
DECLARE v_application_filter_type STRING DEFAULT 'by_app_name';
DECLARE v_application_wishlist ARRAY<STRING> DEFAULT ['netflix video'];
DECLARE v_application_blacklist ARRAY<STRING> DEFAULT [];
DECLARE v_app_cat_tier1 ARRAY<STRING> DEFAULT [];
DECLARE v_app_cat_tier2 ARRAY<STRING> DEFAULT [];
DECLARE v_date_filter_type STRING DEFAULT 'window';
DECLARE v_today_date DATE DEFAULT current_date();
DECLARE v_date_len INT64 DEFAULT 3;
DECLARE v_date_step STRING DEFAULT 'MONTH'; -- Define duration 
DECLARE v_start_date DATE DEFAULT NULL;
DECLARE v_end_date DATE DEFAULT NULL;
DECLARE v_segment_name STRING DEFAULT 'netflix';
DECLARE v_output_dataset_name STRING DEFAULT 'temp_workspace';
DECLARE v_output_table_name STRING DEFAULT 'netflix_usage_per_imsi';


CALL `cto-wln-sa-data-pr-bb5283.customer_personas_features.app_usage_without_cust_mapping`(v_app_proj_name, v_app_dataset_name, v_app_table_name, v_app_column_name, v_dl_vol_column_name, v_ul_vol_column_name, v_date_column_name, v_id_app_column_name, v_cust_dataset_name, v_cust_table_name, v_id_column_name, v_app_mapping_dataset, v_app_mapping_table, v_app_tier1_column_name, v_app_tier2_column_name, v_application_filter_type, v_application_wishlist, v_application_blacklist, v_app_cat_tier1, v_app_cat_tier2, v_date_filter_type, v_today_date, v_date_len, v_date_step, v_start_date, v_end_date, v_segment_name, v_output_dataset_name, v_output_table_name);


In [None]:
%%bigquery competitor_streaming_df

DECLARE v_app_proj_name STRING DEFAULT 'cio-datahub-enterprise-pr-183a';
DECLARE v_app_dataset_name STRING DEFAULT 'ent_usage_unrated_ott';
DECLARE v_app_table_name STRING DEFAULT 'bq_ott_app_event';
DECLARE v_app_column_name STRING DEFAULT 'app_nm';
DECLARE v_dl_vol_column_name STRING DEFAULT 'dl_volume_qty';
DECLARE v_ul_vol_column_name STRING DEFAULT 'ul_volume_qty';
DECLARE v_date_column_name STRING DEFAULT 'event_dt';
DECLARE v_id_app_column_name STRING DEFAULT 'imsi_num';
DECLARE v_cust_dataset_name STRING DEFAULT 'temp_workspace';
DECLARE v_cust_table_name STRING DEFAULT 'check_cust_mapping';
DECLARE v_id_column_name STRING DEFAULT 'IMSI';
DECLARE v_app_mapping_dataset STRING DEFAULT 'app_cat_map';
DECLARE v_app_mapping_table STRING DEFAULT 'bq_app_cat_mapping_latest_view';
DECLARE v_app_tier1_column_name STRING DEFAULT 'tier_1';
DECLARE v_app_tier2_column_name STRING DEFAULT 'tier_2';
DECLARE v_application_filter_type STRING DEFAULT 'by_app_name';
DECLARE v_application_wishlist ARRAY<STRING> DEFAULT ['amazon video', 'disney plus', 'crave' ,'paramount' ];
DECLARE v_application_blacklist ARRAY<STRING> DEFAULT [];
DECLARE v_app_cat_tier1 ARRAY<STRING> DEFAULT [];
DECLARE v_app_cat_tier2 ARRAY<STRING> DEFAULT [];
DECLARE v_date_filter_type STRING DEFAULT 'window';
DECLARE v_today_date DATE DEFAULT current_date();
DECLARE v_date_len INT64 DEFAULT 3;
DECLARE v_date_step STRING DEFAULT 'MONTH'; -- Define duration 
DECLARE v_start_date DATE DEFAULT NULL;
DECLARE v_end_date DATE DEFAULT NULL;
DECLARE v_segment_name STRING DEFAULT 'competitor_streaming';
DECLARE v_output_dataset_name STRING DEFAULT 'temp_workspace';
DECLARE v_output_table_name STRING DEFAULT 'competitor_streaming_usage_per_imsi';


CALL `cto-wln-sa-data-pr-bb5283.customer_personas_features.app_usage_without_cust_mapping`(v_app_proj_name, v_app_dataset_name, v_app_table_name, v_app_column_name, v_dl_vol_column_name, v_ul_vol_column_name, v_date_column_name, v_id_app_column_name, v_cust_dataset_name, v_cust_table_name, v_id_column_name, v_app_mapping_dataset, v_app_mapping_table, v_app_tier1_column_name, v_app_tier2_column_name, v_application_filter_type, v_application_wishlist, v_application_blacklist, v_app_cat_tier1, v_app_cat_tier2, v_date_filter_type, v_today_date, v_date_len, v_date_step, v_start_date, v_end_date, v_segment_name, v_output_dataset_name, v_output_table_name);


In [None]:
import pandas as pd
import numpy as np

In [None]:
df = pd.merge(apple_tv_df, netflix_df, how='outer', on='IMSI')
df = pd.merge(df, discovery_df, how='outer', on='IMSI')
df = pd.merge(df, competitor_streaming_df, how='outer', on='IMSI')
# df = pd.merge(df, general_streaming_df, how='outer', on='IMSI')
df = df.fillna(0)

In [None]:
processed_df = df.loc[df.IMSI.isin(whsia_imsi_df.IMSI)].reset_index(drop=True) #whsia

In [None]:
app_usage_summary_df2=processed_df[['IMSI']]
app_usage_summary_df2['use_appletv']=np.where((processed_df['appletv_days_frequency'] >= 1) & (processed_df['netflix_days_frequency'] == 0) & (processed_df['discovery_days_frequency'] == 0) , 1, 0)
app_usage_summary_df2['use_netflix']=np.where((processed_df['appletv_days_frequency'] == 0) & (processed_df['netflix_days_frequency'] >= 1) & (processed_df['discovery_days_frequency'] == 0) , 1, 0)
app_usage_summary_df2['use_discovery']=np.where((processed_df['appletv_days_frequency'] == 0) & (processed_df['netflix_days_frequency'] == 0) & (processed_df['discovery_days_frequency'] >= 1) , 1, 0)

app_usage_summary_df2['appletv_netflix'] =  np.where((processed_df['appletv_days_frequency'] >= 1) & (processed_df['netflix_days_frequency'] >= 1) & (processed_df['discovery_days_frequency'] == 0) , 1, 0)
app_usage_summary_df2['appletv_discovery'] =  np.where((processed_df['appletv_days_frequency'] >= 1) & (processed_df['netflix_days_frequency'] == 0) & (processed_df['discovery_days_frequency'] >= 1) , 1, 0)
app_usage_summary_df2['discovery_netflix'] =  np.where((processed_df['appletv_days_frequency'] == 0) & (processed_df['netflix_days_frequency'] >= 1) & (processed_df['discovery_days_frequency'] >= 1) , 1, 0)

app_usage_summary_df2['use_any_2_services']  = app_usage_summary_df2['appletv_netflix'] | app_usage_summary_df2['appletv_discovery'] | app_usage_summary_df2['discovery_netflix']
app_usage_summary_df2['use_all_3_services'] = np.where((processed_df['appletv_days_frequency'] >= 1) & (processed_df['netflix_days_frequency'] >= 1) & (processed_df['discovery_days_frequency'] >= 1) , 1, 0)

app_usage_summary_df2['use_competitor_streaming']=np.where((processed_df['competitor_streaming_days_frequency'] >= 1) & (processed_df['appletv_days_frequency'] == 0) & (processed_df['netflix_days_frequency'] == 0) & (processed_df['discovery_days_frequency'] == 0), 1, 0)
app_usage_summary_df2['competitor_firefly']=np.where((processed_df['competitor_streaming_days_frequency'] >= 1) & 
                                                     ((app_usage_summary_df2['use_appletv']) | app_usage_summary_df2['use_netflix'] | app_usage_summary_df2['use_discovery'] |
                                                     app_usage_summary_df2['use_any_2_services'] | app_usage_summary_df2['use_all_3_services']) , 1, 0)

#app_usage_summary_df2['new_apple_device'] = processed_df['new_apple_device']

In [None]:
app_usage_summary_df2['index']='num_customers'
app_usage_summary_df2.groupby('index').sum()