In [64]:
#### import global modules
import os
import sys
import pandas as pd
import numpy as np
from pathlib import Path
from yaml import safe_load
import google.oauth2.credentials
from google.cloud import bigquery
import gc

# Set global vars
pth_project = Path(os.getcwd().split('notebooks')[0])
pth_data = pth_project / 'data'
pth_queries = pth_project / 'core' / 'queries'
pth_creds = pth_project / 'conf' / 'local' / 'project_config.yaml'
sys.path.insert(0, str(pth_project))
d_project_config = safe_load(pth_creds.open())
# d_params = safe_load((pth_project / 'core' / 'parameters' / 'common.yaml').open())['data_extract']

# import local modules
from core.utils.gcp import connect_bq_services
# from core.etl.extract import extract_bq_data, extract_pr_codes, format_conv_df, filter_convs

# Connect to google services
bq_client = connect_bq_services(d_project_config['gcp-project-name'])
pd.options.display.max_rows = 100

In [65]:
def extract_bq_data(bq_client, sql=None, pth_query=None):
    if sql is not None:
        df = bq_client.query(sql).to_dataframe()
    elif pth_query is not None:
        sql = pth_query.read_text()
        df = bq_client.query(sql).to_dataframe()
    else:
        raise ValueError('`sql` or `pth_query` should be set')  
    return df

In [31]:
Customer_DF=pd.read_csv('Never_pay_customer_list.csv')

In [32]:
Customer_DF.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19685 entries, 0 to 19684
Data columns (total 2 columns):
 #   Column       Non-Null Count  Dtype
---  ------       --------------  -----
 0   BAN          19685 non-null  int64
 1   CUSTOMER_ID  19685 non-null  int64
dtypes: int64(2)
memory usage: 307.7 KB


In [33]:
Customer_DF.head()

Unnamed: 0,BAN,CUSTOMER_ID
0,123646491,20254266
1,123658190,18753534
2,123737261,19255324
3,123737261,19255324
4,123743990,19504514


In [34]:
Customer_DF.rename(columns={'CUSTOMER_ID':'Telus_customer_ID'},inplace=True)

In [35]:
Customer_DF.head()

Unnamed: 0,BAN,Telus_customer_ID
0,123646491,20254266
1,123658190,18753534
2,123737261,19255324
3,123737261,19255324
4,123743990,19504514


In [36]:
config= bigquery.job.LoadJobConfig()

# config._properties['timePartitioning'] = {'field': 'Month_Year'}
config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE

Table_BQ = 'SHS.Never_pay_customers'

bq_table_instance= bq_client.load_table_from_dataframe(Customer_DF, Table_BQ,job_config=config)

In [68]:
Query='''



with base_data as (
select BAN, cast(Telus_customer_ID as STRING) as Telus_customer_ID  from `divgpras-pr-579355.SHS.Never_pay_customers`
)


, ADC_data_Sep_2022 as 

(

SELECT distinct
    dealer_customer_id as Telus_ID,
    avg(Login_consistency) as Login_consistency_sep ,
    avg(number_of_login_days) as Login_days_sep,
    avg(Arming_Consistency) as Arming_consistency_sep,
    1 as Sept_data_flag

   FROM
    `divgpras-pr-579355.ADC_Feature_Datastore.ADC_Master_Data`
  WHERE
    Month_Snapshot='2022-09-01'
  group by dealer_customer_id
    
)

, ADC_data_oct_2022 as 

(

SELECT distinct
    dealer_customer_id as Telus_ID,
    avg(Login_consistency) as Login_consistency_Oct ,
    avg(number_of_login_days) as Login_days_Oct,
    avg(Arming_Consistency) as Arming_consistency_Oct,
    1 as Oct_data_flag

   FROM
    `divgpras-pr-579355.ADC_Feature_Datastore.ADC_Master_Data`
  WHERE
    Month_Snapshot='2022-10-01'
  group by dealer_customer_id
    
)


, ADC_data_Nov_2022 as 

(

SELECT distinct
    dealer_customer_id as Telus_ID,
    avg(Login_consistency) as Login_consistency_Nov ,
    avg(number_of_login_days) as Login_days_Nov,
    avg(Arming_Consistency) as Arming_consistency_Nov,
    1 as Nov_data_flag

   FROM
    `divgpras-pr-579355.ADC_Feature_Datastore.ADC_Master_Data`
  WHERE
    Month_Snapshot='2022-11-01'
  group by dealer_customer_id
    
)



, ADC_data_Dec_2022 as 

(

SELECT distinct
    dealer_customer_id as Telus_ID,
    avg(Login_consistency) as Login_consistency_Dec ,
    avg(number_of_login_days) as Login_days_Dec,
    avg(Arming_Consistency) as Arming_consistency_Dec,
    1 as Dec_data_flag

   FROM
    `divgpras-pr-579355.ADC_Feature_Datastore.ADC_Master_Data`
  WHERE
    Month_Snapshot='2022-12-01'
  group by dealer_customer_id
    
)


, ADC_data_Jan_2023 as 

(

SELECT distinct
    dealer_customer_id as Telus_ID,
    avg(Login_consistency) as Login_consistency_Jan ,
    avg(number_of_login_days) as Login_days_Jan,
    avg(Arming_Consistency) as Arming_consistency_Jan,
    1 as Jan_data_flag

   FROM
    `divgpras-pr-579355.ADC_Feature_Datastore.ADC_Master_Data`
  WHERE
    Month_Snapshot='2023-01-01'
  group by dealer_customer_id
    
)


select a.BAN,a.Telus_customer_ID
,b.Login_consistency_sep,b.Login_days_sep,b.Arming_consistency_sep,b.Sept_data_flag
,c.Login_consistency_Oct,c.Login_days_Oct,c.Arming_consistency_Oct,c.Oct_data_flag
,d.Login_consistency_Nov,d.Login_days_Nov,d.Arming_consistency_Nov,d.Nov_data_flag
,e.Login_consistency_Dec,e.Login_days_Dec,e.Arming_consistency_Dec,e.Dec_data_flag
,f.Login_consistency_Jan,f.Login_days_Jan,f.Arming_consistency_Jan,f.Jan_data_flag
from base_data a
left join ADC_data_Sep_2022 b
on a.Telus_customer_ID=b.Telus_ID
left join ADC_data_oct_2022 c
on a.Telus_customer_ID=c.Telus_ID
left join ADC_data_Nov_2022 d
on a.Telus_customer_ID=d.Telus_ID
left join ADC_data_Dec_2022 e
on a.Telus_customer_ID=e.Telus_ID
left join ADC_data_Jan_2023 f
on a.Telus_customer_ID=f.Telus_ID
order by BAN



'''

In [69]:
DF=extract_bq_data(bq_client, sql=Query)

In [70]:
DF.info(verbose=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19685 entries, 0 to 19684
Data columns (total 22 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   BAN                     19685 non-null  Int64  
 1   Telus_customer_ID       19685 non-null  object 
 2   Login_consistency_sep   11620 non-null  float64
 3   Login_days_sep          11620 non-null  float64
 4   Arming_consistency_sep  11620 non-null  float64
 5   Sept_data_flag          11620 non-null  Int64  
 6   Login_consistency_Oct   9520 non-null   float64
 7   Login_days_Oct          9520 non-null   float64
 8   Arming_consistency_Oct  9520 non-null   float64
 9   Oct_data_flag           9520 non-null   Int64  
 10  Login_consistency_Nov   8200 non-null   float64
 11  Login_days_Nov          8200 non-null   float64
 12  Arming_consistency_Nov  8200 non-null   float64
 13  Nov_data_flag           8200 non-null   Int64  
 14  Login_consistency_Dec   7147 non-null 

In [71]:
DF.head(10)

Unnamed: 0,BAN,Telus_customer_ID,Login_consistency_sep,Login_days_sep,Arming_consistency_sep,Sept_data_flag,Login_consistency_Oct,Login_days_Oct,Arming_consistency_Oct,Oct_data_flag,...,Arming_consistency_Nov,Nov_data_flag,Login_consistency_Dec,Login_days_Dec,Arming_consistency_Dec,Dec_data_flag,Login_consistency_Jan,Login_days_Jan,Arming_consistency_Jan,Jan_data_flag
0,123646491,20254266,0.0,0.0,4.347826,1.0,,,,,...,,,,,,,,,,
1,123658190,18753534,,,,,0.0,0.0,0.0,1.0,...,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0
2,123737261,19255324,6.666667,2.0,0.0,1.0,32.258065,10.0,0.0,1.0,...,0.0,1.0,53.333333,16.0,0.0,1.0,53.333333,16.0,0.0,1.0
3,123737261,19255324,6.666667,2.0,0.0,1.0,32.258065,10.0,0.0,1.0,...,0.0,1.0,53.333333,16.0,0.0,1.0,53.333333,16.0,0.0,1.0
4,123743990,19504514,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,...,,,,,,,,,,
5,123947310,19258740,,,,,,,,,...,,,,,,,,,,
6,124009108,19760151,,,,,,,,,...,,,,,,,,,,
7,124083769,19260803,,,,,,,,,...,,,,,,,,,,
8,124109671,29007707,,,,,,,,,...,,,,,,,,,,
9,124201615,18509351,0.0,0.0,0.0,1.0,,,,,...,,,,,,,,,,


In [72]:

DF['Sep_Usage_Flag']=DF['Login_consistency_sep'].apply(lambda x: 1 if x>0 else 0)
DF['Oct_Usage_Flag']=DF['Login_consistency_Oct'].apply(lambda x: 1 if x>0 else 0)
DF['Nov_Usage_Flag']=DF['Login_consistency_Nov'].apply(lambda x: 1 if x>0 else 0)
DF['Dec_Usage_Flag']=DF['Login_consistency_Dec'].apply(lambda x: 1 if x>0 else 0)
DF['Jan_Usage_Flag']=DF['Login_consistency_Jan'].apply(lambda x: 1 if x>0 else 0)



In [73]:
DF['Sep_Usage_Flag'].value_counts(normalize=True)*100

0    69.164338
1    30.835662
Name: Sep_Usage_Flag, dtype: float64

In [74]:
DF['Oct_Usage_Flag'].value_counts(normalize=True)*100

0    68.249936
1    31.750064
Name: Oct_Usage_Flag, dtype: float64

In [75]:
DF['Jan_Usage_Flag'].value_counts(normalize=True)*100

0    74.538989
1    25.461011
Name: Jan_Usage_Flag, dtype: float64

In [76]:
DF.head(10)

Unnamed: 0,BAN,Telus_customer_ID,Login_consistency_sep,Login_days_sep,Arming_consistency_sep,Sept_data_flag,Login_consistency_Oct,Login_days_Oct,Arming_consistency_Oct,Oct_data_flag,...,Dec_data_flag,Login_consistency_Jan,Login_days_Jan,Arming_consistency_Jan,Jan_data_flag,Sep_Usage_Flag,Oct_Usage_Flag,Nov_Usage_Flag,Dec_Usage_Flag,Jan_Usage_Flag
0,123646491,20254266,0.0,0.0,4.347826,1.0,,,,,...,,,,,,0,0,0,0,0
1,123658190,18753534,,,,,0.0,0.0,0.0,1.0,...,1.0,0.0,0.0,0.0,1.0,0,0,1,0,0
2,123737261,19255324,6.666667,2.0,0.0,1.0,32.258065,10.0,0.0,1.0,...,1.0,53.333333,16.0,0.0,1.0,1,1,1,1,1
3,123737261,19255324,6.666667,2.0,0.0,1.0,32.258065,10.0,0.0,1.0,...,1.0,53.333333,16.0,0.0,1.0,1,1,1,1,1
4,123743990,19504514,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,...,,,,,,0,0,0,0,0
5,123947310,19258740,,,,,,,,,...,,,,,,0,0,0,0,0
6,124009108,19760151,,,,,,,,,...,,,,,,0,0,0,0,0
7,124083769,19260803,,,,,,,,,...,,,,,,0,0,0,0,0
8,124109671,29007707,,,,,,,,,...,,,,,,0,0,0,0,0
9,124201615,18509351,0.0,0.0,0.0,1.0,,,,,...,,,,,,0,0,0,0,0


In [77]:
DF.to_csv('Never_pay_customer_list_Usage_updated.csv',index=False)