In [2]:
#### import global modules
import os
import sys
import pandas as pd
import numpy as np
from pathlib import Path
from yaml import safe_load
import google.oauth2.credentials
from google.cloud import bigquery
import gc

# Set global vars
pth_project = Path(os.getcwd().split('notebooks')[0])
pth_data = pth_project / 'data'
pth_queries = pth_project / 'core' / 'queries'
pth_creds = pth_project / 'conf' / 'local' / 'project_config.yaml'
sys.path.insert(0, str(pth_project))
d_project_config = safe_load(pth_creds.open())
# d_params = safe_load((pth_project / 'core' / 'parameters' / 'common.yaml').open())['data_extract']

# import local modules
from core.utils.gcp import connect_bq_services
# from core.etl.extract import extract_bq_data, extract_pr_codes, format_conv_df, filter_convs

# Connect to google services
bq_client = connect_bq_services(d_project_config['gcp-project-name'])
pd.options.display.max_rows = 100

In [3]:
def extract_bq_data(bq_client, sql=None, pth_query=None):
    if sql is not None:
        df = bq_client.query(sql).to_dataframe()
    elif pth_query is not None:
        sql = pth_query.read_text()
        df = bq_client.query(sql).to_dataframe()
    else:
        raise ValueError('`sql` or `pth_query` should be set')  
    return df

In [31]:
Query='''

select customer_id, dealer_customer_id,
    CASE
      WHEN Login_consistency = 0 THEN  "NO_User"
      WHEN Login_consistency>=30 THEN 'Heavy_User'
      WHEN Login_consistency<30 THEN "Low_User"
    ELSE
    'Un_assigned'
  END
    AS Login_Segment

from `divgpras-pr-579355.ADC_Feature_Datastore.ADC_Master_Data`
WHERE dealer_name='TELUS Communications Inc.'
and dealer_customer_id!=''
QUALIFY ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY Month_Snapshot DESC) = 1


'''

In [32]:
DF=extract_bq_data(bq_client, sql=Query)

In [33]:
DF.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 417076 entries, 0 to 417075
Data columns (total 3 columns):
 #   Column              Non-Null Count   Dtype 
---  ------              --------------   ----- 
 0   customer_id         417076 non-null  Int64 
 1   dealer_customer_id  417076 non-null  object
 2   Login_Segment       417076 non-null  object
dtypes: Int64(1), object(2)
memory usage: 9.9+ MB


In [34]:
DF.head()

Unnamed: 0,customer_id,dealer_customer_id,Login_Segment
0,1261094,09054513,NO_User
1,1429415,DV102746,NO_User
2,1741007,09056736,NO_User
3,1882205,APE16845,NO_User
4,2292475,60003392,NO_User


In [35]:
DF['Login_Segment'].value_counts(normalize=True)*100

Heavy_User    50.884731
NO_User       29.109083
Low_User      20.006186
Name: Login_Segment, dtype: float64

In [36]:
DF.to_csv('SMHM_Customer_Login_Segment_mapping.csv',index=False)