In [2]:
#### import global modules
import os
import sys
import pandas as pd
import numpy as np
from pathlib import Path
from yaml import safe_load
import google.oauth2.credentials
from google.cloud import bigquery
import gc

# Set global vars
pth_project = Path(os.getcwd().split('notebooks')[0])
pth_data = pth_project / 'data'
pth_queries = pth_project / 'core' / 'queries'
pth_creds = pth_project / 'conf' / 'local' / 'project_config.yaml'
sys.path.insert(0, str(pth_project))
d_project_config = safe_load(pth_creds.open())
# d_params = safe_load((pth_project / 'core' / 'parameters' / 'common.yaml').open())['data_extract']

# import local modules
from core.utils.gcp import connect_bq_services
# from core.etl.extract import extract_bq_data, extract_pr_codes, format_conv_df, filter_convs

# Connect to google services
bq_client = connect_bq_services(d_project_config['gcp-project-name'])
pd.options.display.max_rows = 100

In [3]:
def extract_bq_data(bq_client, sql=None, pth_query=None):
    if sql is not None:
        df = bq_client.query(sql).to_dataframe()
    elif pth_query is not None:
        sql = pth_query.read_text()
        df = bq_client.query(sql).to_dataframe()
    else:
        raise ValueError('`sql` or `pth_query` should be set')  
    return df

In [53]:
Query='''






DECLARE
  start_dt_YTD_snpsht DATE DEFAULT '2023-01-01';
DECLARE
  end_dt_YTD_snpsht DATE DEFAULT '2023-03-31';
DECLARE
  snpsht_start_mnth DATE DEFAULT '2023-03-01';



with order_data as (select *,PARSE_DATE('%d%h%Y',SUBSTR(CREATED_DT, 0,9)) as Created_date,  PARSE_DATE('%d%h%Y',SUBSTR(ACTIVATION_DT, 0,9)) as Activation_date
from `divgpras-pr-579355.SHS.Order_details_Jan2023_Mar2023`
)


, order_data_1 as (

select * , cast (CUST_ID as STRING) as Telus_Cust_ID,

CASE
      WHEN DATE_DIFF(DATE(Activation_date),Created_date, DAY) > 30 THEN 'Activated_30days+'
    ELSE
    'Activated_within_30days'
  END
    AS Day_gap

from order_data

)

,Vol_churn as (

select distinct BILLING_ACCOUNT_NUM as BAN,1 as Vol_churn ,PARSE_DATE('%d%h%Y',SUBSTR(ACTIVITY_DATE, 0,9)) as Vol_churn_date from `divgpras-pr-579355.SHS.VolChurn_Jan2023_Mar2023`

)

,InVol_churn as (

select distinct BILLING_ACCOUNT_NUM as BAN, 1 as InVol_churn,PARSE_DATE('%d%h%Y',SUBSTR(DLY_PROD_INSTNC_ACTVY_TS, 0,9)) as InVol_churn_date from `divgpras-pr-579355.SHS.InVolChurn_Jan2023_Mar2023`

)

,ADC_customer_details as (

select customer_id as adc_customer_id,dealer_customer_id  as Telus_customer_ID, date(join_date) as join_date

from `cio-datahub-enterprise-pr-183a.src_adc.bq_customer_account_details`
where dealer_name='TELUS Communications Inc.' 
and date(last_updt_ts)>=start_dt_YTD_snpsht and date(last_updt_ts)<= end_dt_YTD_snpsht
and date(join_date) >= start_dt_YTD_snpsht
QUALIFY ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY last_updt_ts DESC) = 1

)


,ADC_login_YTD as (

with date_sql as (
select  count(distinct date(login_dt_utc)) as count_of_dates from `cio-datahub-enterprise-pr-183a.src_adc.bq_customer_daily_logins`
where date(login_dt_utc)<= end_dt_YTD_snpsht and date(login_dt_utc)>= start_dt_YTD_snpsht

)

select customer_id,sum(login_count) as sum_login_count,count (distinct date(login_dt_utc)) as number_of_login_days_YTD, (select count_of_dates from date_sql ) as count_of_dates_login_YTD
from `cio-datahub-enterprise-pr-183a.src_adc.bq_customer_daily_logins`
where date(login_dt_utc)<= end_dt_YTD_snpsht and date(login_dt_utc)>= start_dt_YTD_snpsht
group by customer_id



)

,ADC_merge_data as (

select *
from ADC_customer_details a
left join ADC_login_YTD b
on a.adc_customer_id=b.customer_id


)

,ADC_final_data as (

select Telus_customer_ID, avg(number_of_login_days_YTD) as Avg_no_login_days_YTD
from ADC_merge_data 
group by Telus_customer_ID

)


select a.*,b.Vol_churn, b.Vol_churn_date,c.InVol_churn,c.InVol_churn_date,d.Telus_customer_ID,d.Avg_no_login_days_YTD,DATE_DIFF(end_dt_YTD_snpsht,a.Activation_date, DAY)  as Days_potential_login
-- round(d.number_of_login_days_YTD)*100/(DATE_DIFF(end_dt_YTD_snpsht,a.Activation_date, DAY)) as login_consistency_YTD



from order_data_1 a
left join Vol_churn b
on a.BILL_ACCOUNT_NUMBER=b.BAN
left join InVol_churn c
on a.BILL_ACCOUNT_NUMBER=c.BAN
left join ADC_final_data d
on a.Telus_Cust_ID=d.Telus_customer_ID
order by a.Created_date




'''

In [54]:
DF=extract_bq_data(bq_client, sql=Query)

In [55]:
DF.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 44819 entries, 0 to 44818
Data columns (total 26 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   SALES_AGENT_ID         44766 non-null  object 
 1   CUST_ID                44818 non-null  float64
 2   CHANNEL_GROUP          44819 non-null  object 
 3   CHANNEL                44811 non-null  object 
 4   PROD_INSTNC_ID         44819 non-null  Int64  
 5   CREATED_DT             44819 non-null  object 
 6   ORDER_STATUS           44819 non-null  object 
 7   PRODUCT_NAME           44819 non-null  object 
 8   BILL_ACCOUNT_NUMBER    44709 non-null  float64
 9   IS_DIY                 44819 non-null  Int64  
 10  IS_EXISTING_CUSTOMER   44819 non-null  Int64  
 11  SELF_INSTALL           44819 non-null  Int64  
 12  CURRENT_ORDER_STATUS   44819 non-null  object 
 13  IS_TEST_ACCOUNT        44819 non-null  Int64  
 14  ACTIVATION_DT          44819 non-null  object 
 15  Cr

In [56]:
DF.head()

Unnamed: 0,SALES_AGENT_ID,CUST_ID,CHANNEL_GROUP,CHANNEL,PROD_INSTNC_ID,CREATED_DT,ORDER_STATUS,PRODUCT_NAME,BILL_ACCOUNT_NUMBER,IS_DIY,...,Activation_date,Telus_Cust_ID,Day_gap,Vol_churn,Vol_churn_date,InVol_churn,InVol_churn_date,Telus_customer_ID,Avg_no_login_days_YTD,Days_potential_login
0,224079,80755649.0,CSD,CSD: EXT,1134334672,01JAN2023:00:00:00,Installed,Control Plus Video,605614261.0,0,...,2023-01-08,80755649,Activated_within_30days,,NaT,,NaT,80755649,82.0,82
1,SYSADM,105339362.0,WEB,T.COM: AUTO,1134331714,01JAN2023:00:00:00,Installed,Control Plus Video,605620291.0,0,...,2023-01-06,105339362,Activated_within_30days,,NaT,,NaT,105339362,28.0,84
2,SYSADM,105338461.0,WEB,T.COM: AUTO,1134330380,01JAN2023:00:00:00,Installed,Smart Automation Plus,605620264.0,0,...,2023-01-05,105338461,Activated_within_30days,,NaT,,NaT,105338461,84.0,85
3,SYSADM,98533273.0,WEB,T.COM: AUTO,1134339619,01JAN2023:00:00:00,Installed,Control Plus Video,605620374.0,0,...,2023-01-08,98533273,Activated_within_30days,,NaT,,NaT,98533273,82.0,82
4,T894539,100172532.0,CLIENT CARE,CSE: LNR,1134332786,01JAN2023:00:00:00,Installed,Smart Automation Plus,604121849.0,0,...,2023-01-11,100172532,Activated_within_30days,,NaT,,NaT,100172532,77.0,79


In [57]:
DF.tail()

Unnamed: 0,SALES_AGENT_ID,CUST_ID,CHANNEL_GROUP,CHANNEL,PROD_INSTNC_ID,CREATED_DT,ORDER_STATUS,PRODUCT_NAME,BILL_ACCOUNT_NUMBER,IS_DIY,...,Activation_date,Telus_Cust_ID,Day_gap,Vol_churn,Vol_churn_date,InVol_churn,InVol_churn_date,Telus_customer_ID,Avg_no_login_days_YTD,Days_potential_login
44814,CSSTIPOBX254042,36811807.0,CSS,CSS,1138212224,31MAR2023:00:00:00,Installed,Smart Automation Plus,600684932.0,0,...,2023-03-31,36811807,Activated_within_30days,,NaT,,NaT,,,0
44815,CSSTIPOBX258415,79438115.0,CSS,CSS,1138212736,31MAR2023:00:00:00,Installed,Smart Automation Plus,601390147.0,1,...,2023-03-31,79438115,Activated_within_30days,,NaT,,NaT,,,0
44816,CSSTIPOB-X252509,41280906.0,CSS,CSS,1138209297,31MAR2023:00:00:00,Installed,Smart Automation Plus,601491906.0,0,...,2023-03-31,41280906,Activated_within_30days,,NaT,,NaT,,,0
44817,CSSTIPOB-X252509,99395770.0,CSS,CSS,1138213918,31MAR2023:00:00:00,Installed,Smart Automation Plus,603936871.0,1,...,2023-03-31,99395770,Activated_within_30days,,NaT,,NaT,,,0
44818,X238552VENUSBENECIO,99829644.0,CSS,CSS,1138202501,31MAR2023:00:00:00,Installed,Smart Automation Plus,604040530.0,1,...,2023-03-31,99829644,Activated_within_30days,,NaT,,NaT,,,0


In [58]:
# DF['adc_customer_id'].isna().sum()

In [59]:
DF['Avg_no_login_days_YTD'].isna().sum()

12557

In [60]:
# DF['number_of_login_days_YTD']=np.where(DF['adc_customer_id'].isna(),0,DF['number_of_login_days_YTD'])

In [61]:
DF.to_csv('Telus_Sales_Quality_report.csv',index=False)