In [1]:
#### import global modules
import os
import sys
import pandas as pd
import numpy as np
from pathlib import Path
from yaml import safe_load
import google.oauth2.credentials
from google.cloud import bigquery
import gc

# Set global vars
pth_project = Path(os.getcwd().split('notebooks')[0])
pth_data = pth_project / 'data'
pth_queries = pth_project / 'core' / 'queries'
pth_creds = pth_project / 'conf' / 'local' / 'project_config.yaml'
sys.path.insert(0, str(pth_project))
d_project_config = safe_load(pth_creds.open())
# d_params = safe_load((pth_project / 'core' / 'parameters' / 'common.yaml').open())['data_extract']

# import local modules
from core.utils.gcp import connect_bq_services
# from core.etl.extract import extract_bq_data, extract_pr_codes, format_conv_df, filter_convs

# Connect to google services
bq_client = connect_bq_services(d_project_config['gcp-project-name'])
pd.options.display.max_rows = 100

In [2]:
def extract_bq_data(bq_client, sql=None, pth_query=None):
    if sql is not None:
        df = bq_client.query(sql).to_dataframe()
    elif pth_query is not None:
        sql = pth_query.read_text()
        df = bq_client.query(sql).to_dataframe()
    else:
        raise ValueError('`sql` or `pth_query` should be set')  
    return df

In [109]:
Query='''


with ADC_data_Sep_2022 as 

(

SELECT distinct
    customer_id as cust_Sep,
    BAN,
    Login_consistency as Login_consistency_sep ,
    Arming_Consistency as Arming_consistency_sep,
    1 as Sept_data_flag

   FROM
    `divgpras-pr-579355.ADC_Feature_Datastore.ADC_Master_Data`
  WHERE
    Month_Snapshot='2022-09-01'
    AND dealer_name='TELUS Communications Inc.' 
    and Login_consistency=0 and Arming_Consistency=0
    and account_type_name='Security System'
    
)

, ADC_data_Oct_2022 as 

(

SELECT distinct
    customer_id as cust_Oct,
    Login_consistency as Login_consistency_Oct,
    Arming_Consistency as Arming_consistency_Oct,
    1 as Oct_data_flag

   FROM
    `divgpras-pr-579355.ADC_Feature_Datastore.ADC_Master_Data`
  WHERE
    Month_Snapshot='2022-10-01'
    AND dealer_name='TELUS Communications Inc.' 
    --and Login_consistency=0 and Arming_Consistency=0
    and account_type_name='Security System'
    
)

, ADC_data_Nov_2022 as 

(

SELECT distinct
    customer_id as cust_Nov,
    Login_consistency as Login_consistency_Nov ,
    Arming_Consistency as Arming_consistency_Nov,
    1 as Nov_data_flag

   FROM
    `divgpras-pr-579355.ADC_Feature_Datastore.ADC_Master_Data`
  WHERE
    Month_Snapshot='2022-11-01'
    AND dealer_name='TELUS Communications Inc.' 
    --and Login_consistency=0 and Arming_Consistency=0
    and account_type_name='Security System'
    
)

, ADC_data_Dec_2022 as 

(

SELECT distinct
    customer_id as cust_Dec,
    Login_consistency as Login_consistency_Dec ,
    Arming_Consistency as Arming_consistency_Dec,
    1 as Nov_data_flag

   FROM
    `divgpras-pr-579355.ADC_Feature_Datastore.ADC_Master_Data`
  WHERE
    Month_Snapshot='2022-12-01'
    AND dealer_name='TELUS Communications Inc.' 
    --and Login_consistency=0 and Arming_Consistency=0
    and account_type_name='Security System'
    
)

  
,Telus_Deacts_Oct as (


select distinct BILLING_ACCOUNT_NUM as BAN ,1 as Telus_Churn_Flag_Oct  
from `divgpras-pr-579355.SHS.SHS_DEACTS_OCT2022`

)

,Telus_Deacts_Nov as (


select distinct BILLING_ACCOUNT_NUM as BAN ,1 as Telus_Churn_Flag_Nov  
from `divgpras-pr-579355.SHS.SHS_DEACTS_NOV2022`

)

,Telus_Deacts_Dec as (


select distinct BILLING_ACCOUNT_NUM as BAN ,1 as Telus_Churn_Flag_Dec 
from `divgpras-pr-579355.SHS.SHS_DEACTS_DEC2022`

)




select * from ADC_data_Sep_2022 a
left join ADC_data_Oct_2022 b
on a.cust_Sep=b.cust_Oct
left join ADC_data_Nov_2022 c
on a.cust_Sep=c.cust_Nov
left join Telus_Deacts_Oct d
on a.BAN=d.BAN
left join Telus_Deacts_Nov e
on a.BAN=e.BAN
left join Telus_Deacts_Dec f
on a.BAN=f.BAN






'''

In [110]:
DF=extract_bq_data(bq_client, sql=Query)

In [111]:
DF.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 24518 entries, 0 to 24517
Data columns (total 19 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   cust_Sep                24518 non-null  Int64  
 1   BAN                     24518 non-null  Int64  
 2   Login_consistency_sep   24518 non-null  float64
 3   Arming_consistency_sep  24518 non-null  float64
 4   Sept_data_flag          24518 non-null  Int64  
 5   cust_Oct                21882 non-null  Int64  
 6   Login_consistency_Oct   21882 non-null  float64
 7   Arming_consistency_Oct  21882 non-null  float64
 8   Oct_data_flag           21882 non-null  Int64  
 9   cust_Nov                20585 non-null  Int64  
 10  Login_consistency_Dec   20585 non-null  float64
 11  Arming_consistency_Dec  20585 non-null  float64
 12  Nov_data_flag           20585 non-null  Int64  
 13  BAN_1                   912 non-null    Int64  
 14  Telus_Churn_Flag_Oct    912 non-null  

In [112]:
# DF.fillna(0,inplace=True)

In [113]:
# DF['Oct_data_flag'].fillna(0,inplace=True)

In [114]:
# DF['Nov_data_flag'].fillna(0,inplace=True)

In [115]:
DF['Oct_data_flag'].value_counts(normalize=True)*100

1    100.0
Name: Oct_data_flag, dtype: Float64

In [116]:
DF['Nov_data_flag'].value_counts(normalize=True)*100

1    100.0
Name: Nov_data_flag, dtype: Float64

In [117]:
# pd.DataFrame(DF_monitored.groupby(['Segment','Best_Practice_All']).agg(
    
#     Customer_count= ('customer_id','count'),
#     Customer_count_1= ('customer_id','nunique'),
#     Churn_total=('Telus_Churn_Flag',lambda x: x.sum()),
#     Churn_rate=('Telus_Churn_Flag',lambda x: x.mean()*100)

In [118]:
def Arming_category_making(var):
    
    if var==0:
        return 'Arming_0%'
    elif var>0 and var<=20 :
        return 'Arming_0_20%%'
    elif var>20 and var<=50 :
        return 'Arming_20_50%%'
    elif var>50:
        return 'Arming_50_more%'
    else:
        return "None_of_Above"

In [119]:
def login_category_making(var):
    
    if var==0:
        return 'Login_0%'
    elif var>0 and var<=20 :
        return 'Login_0_20%%'
    elif var>20 and var<=50 :
        return 'Login_20_50%%'
    elif var>50:
        return 'Login_50_more%'
    else:
        return "None_of_Above"

In [120]:
# def login_amring_segment_making(row):
    
#     if row['Arming_category_Oct']=='Arming_0%' and row['Login_category_Oct']=='Login_0%':
#         return 
        

In [121]:
DF['Arming_category_Sep']=DF['Arming_consistency_sep'].apply(Arming_category_making)
DF['Arming_category_Oct']=DF['Arming_consistency_Oct'].apply(Arming_category_making)
DF['Arming_category_Nov']=DF['Arming_consistency_Dec'].apply(Arming_category_making)


DF['Login_category_Sep']=DF['Login_consistency_sep'].apply(login_category_making)
DF['Login_category_Oct']=DF['Login_consistency_Oct'].apply(login_category_making)
DF['Login_category_Nov']=DF['Login_consistency_Dec'].apply(login_category_making)

# DF['Arming_category_Sep']=DF.apply(Arming_category_making,axis=1)

In [122]:
DF.head()

Unnamed: 0,cust_Sep,BAN,Login_consistency_sep,Arming_consistency_sep,Sept_data_flag,cust_Oct,Login_consistency_Oct,Arming_consistency_Oct,Oct_data_flag,cust_Nov,...,BAN_2,Telus_Churn_Flag_Nov,BAN_3,Telus_Churn_Flag_Dec,Arming_category_Sep,Arming_category_Oct,Arming_category_Nov,Login_category_Sep,Login_category_Oct,Login_category_Nov
0,5693738,0,0.0,0.0,1,5693738,0.0,0.0,1,5693738,...,,,,,Arming_0%,Arming_0%,Arming_0%,Login_0%,Login_0%,Login_0%
1,5556714,0,0.0,0.0,1,5556714,0.0,0.0,1,5556714,...,,,,,Arming_0%,Arming_0%,Arming_0%,Login_0%,Login_0%,Login_0%
2,5699070,0,0.0,0.0,1,5699070,0.0,0.0,1,5699070,...,,,,,Arming_0%,Arming_0%,Arming_0%,Login_0%,Login_0%,Login_0%
3,8628290,0,0.0,0.0,1,8628290,0.0,0.0,1,8628290,...,,,,,Arming_0%,Arming_0%,Arming_0%,Login_0%,Login_0%,Login_0%
4,5321938,0,0.0,0.0,1,5321938,0.0,6.666667,1,5321938,...,,,,,Arming_0%,Arming_0_20%%,Arming_0%,Login_0%,Login_0%,Login_0%


In [123]:
DF['Arming_category_Sep'].value_counts(),DF['Login_category_Sep'].value_counts()

(Arming_0%    24518
 Name: Arming_category_Sep, dtype: int64,
 Login_0%    24518
 Name: Login_category_Sep, dtype: int64)

In [124]:
DF.to_csv('Telus_Arming_login_MoM_analysis.csv',index=False)

In [125]:
pd.DataFrame(DF.groupby(['Login_category_Oct','Login_category_Nov','Arming_category_Oct','Arming_category_Nov']).agg(
    
    Customer_count= ('cust_Sep','count'),
    Customer_count_1= ('cust_Sep','nunique'),
    # # Customer_Share= ('customer_id',lambda x:x.count()*100/Merge_DF_4.shape[0])
    # Churn_total=('Telus_Churn_Flag',lambda x: x.sum()),
    # Churn_rate=('Telus_Churn_Flag',lambda x: x.mean()*100)
).reset_index())

Unnamed: 0,Login_category_Oct,Login_category_Nov,Arming_category_Oct,Arming_category_Nov,Customer_count,Customer_count_1
0,Login_0%,Login_0%,Arming_0%,Arming_0%,14521,14474
1,Login_0%,Login_0%,Arming_0%,Arming_0_20%%,695,691
2,Login_0%,Login_0%,Arming_0%,Arming_20_50%%,16,15
3,Login_0%,Login_0%,Arming_0%,Arming_50_more%,13,13
4,Login_0%,Login_0%,Arming_0_20%%,Arming_0%,777,772
...,...,...,...,...,...,...
134,Login_50_more%,Login_50_more%,Arming_0%,Arming_0_20%%,2,2
135,Login_50_more%,Login_50_more%,Arming_0_20%%,Arming_0%,3,3
136,Login_50_more%,Login_50_more%,Arming_50_more%,Arming_50_more%,2,2
137,Login_50_more%,None_of_Above,Arming_0_20%%,None_of_Above,1,1


In [4]:
rep_query='''

select distinct customer_id,rep_id
from `cio-datahub-enterprise-pr-183a.src_adc.bq_customer_account_details`
where date(last_updt_ts)='2022-10-31'

'''

In [5]:
rep_DF=extract_bq_data(bq_client, sql=rep_query)

In [6]:
rep_DF.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 627068 entries, 0 to 627067
Data columns (total 2 columns):
 #   Column       Non-Null Count   Dtype
---  ------       --------------   -----
 0   customer_id  627068 non-null  Int64
 1   rep_id       627068 non-null  Int64
dtypes: Int64(2)
memory usage: 10.8 MB


In [7]:
rep_DF.to_csv('ADC_rep_data.csv',index=False)

In [8]:
Prizm_demo_query='''

select distinct ban,prizm_name,lifestage_nm,med_income
from `bi-srv-features-pr-ef5a93.ban_demogr.bq_ban_demogr_prizm`
where date(part_dt)='2023-02-01'

'''

In [9]:
prizm_DF=extract_bq_data(bq_client, sql=Prizm_demo_query)

In [10]:
prizm_DF.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7773776 entries, 0 to 7773775
Data columns (total 4 columns):
 #   Column        Dtype  
---  ------        -----  
 0   ban           Int64  
 1   prizm_name    object 
 2   lifestage_nm  object 
 3   med_income    float64
dtypes: Int64(1), float64(1), object(2)
memory usage: 244.7+ MB


In [12]:
Telus_Oct_cust_list=pd.read_csv('Telus_Oct_cust_list.csv')

In [13]:
Telus_Oct_cust_list.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 326768 entries, 0 to 326767
Data columns (total 3 columns):
 #   Column              Non-Null Count   Dtype
---  ------              --------------   -----
 0   customer_id         326768 non-null  int64
 1   dealer_customer_id  326768 non-null  int64
 2   BAN                 326768 non-null  int64
dtypes: int64(3)
memory usage: 7.5 MB


In [14]:
Telus_Oct_cust_list_prizm=Telus_Oct_cust_list.merge(prizm_DF,left_on='BAN',right_on='ban',how='left')

In [15]:
Telus_Oct_cust_list_prizm.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 326768 entries, 0 to 326767
Data columns (total 7 columns):
 #   Column              Non-Null Count   Dtype  
---  ------              --------------   -----  
 0   customer_id         326768 non-null  int64  
 1   dealer_customer_id  326768 non-null  int64  
 2   BAN                 326768 non-null  int64  
 3   ban                 315193 non-null  Int64  
 4   prizm_name          315193 non-null  object 
 5   lifestage_nm        315193 non-null  object 
 6   med_income          308811 non-null  float64
dtypes: Int64(1), float64(1), int64(3), object(2)
memory usage: 20.3+ MB


In [16]:
Telus_Oct_cust_list_prizm.to_csv('Prizm_data.csv',index=False)