In [21]:
#### import global modules
import os
import sys
import pandas as pd
import numpy as np
from pathlib import Path
from yaml import safe_load
import google.oauth2.credentials
from google.cloud import bigquery
import gc

# Set global vars
pth_project = Path(os.getcwd().split('notebooks')[0])
pth_data = pth_project / 'data'
pth_queries = pth_project / 'core' / 'queries'
pth_creds = pth_project / 'conf' / 'local' / 'project_config.yaml'
sys.path.insert(0, str(pth_project))
d_project_config = safe_load(pth_creds.open())
# d_params = safe_load((pth_project / 'core' / 'parameters' / 'common.yaml').open())['data_extract']

# import local modules
from core.utils.gcp import connect_bq_services
# from core.etl.extract import extract_bq_data, extract_pr_codes, format_conv_df, filter_convs

# Connect to google services
bq_client = connect_bq_services(d_project_config['gcp-project-name'])
pd.options.display.max_rows = 100

In [22]:
def extract_bq_data(bq_client, sql=None, pth_query=None):
    if sql is not None:
        df = bq_client.query(sql).to_dataframe()
    elif pth_query is not None:
        sql = pth_query.read_text()
        df = bq_client.query(sql).to_dataframe()
    else:
        raise ValueError('`sql` or `pth_query` should be set')  
    return df

In [29]:
Query='''


with ADC_data as 

(

SELECT *
    

   FROM
    `divgpras-pr-579355.ADC_Feature_Datastore.ADC_Master_Data`
  WHERE
    Month_Snapshot='2022-12-01'
    AND dealer_name='TELUS Communications Inc.' 
    and Segment='Heavy_User'
    
)

,Telus_customers as
(
select cust_bus_cust_id,pi_cntrct_start_ts as contract_start_date,pi_cntrct_end_ts as contract_end_date
from `cio-datahub-enterprise-pr-183a.ent_cust_cust.bq_prod_instnc_snpsht` 
WHERE DATE(prod_instnc_ts) = '2022-12-31'
and pi_prod_instnc_typ_cd ='SMHM' #Serice type
and bus_prod_instnc_src_id = 1001 #BANs that are for home services
and pi_prod_instnc_stat_cd in ('A')
and  consldt_cust_typ_cd = 'R'
order by cust_bus_cust_id

)

select * from ADC_data a
inner join Telus_customers b
on a.dealer_customer_id=b.cust_bus_cust_id


'''

In [30]:
DF=extract_bq_data(bq_client, sql=Query)

In [31]:
DF['account_type_name'].value_counts()

Security System             28222
Awareness and Automation        9
Name: account_type_name, dtype: int64

In [9]:
DF.info(verbose=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 28231 entries, 0 to 28230
Data columns (total 123 columns):
 #    Column                                                        Dtype  
---   ------                                                        -----  
 0    Month_Snapshot                                                dbdate 
 1    customer_id                                                   Int64  
 2    dealer_customer_id                                            object 
 3    primary_login_id                                              Int64  
 4    dealer_name                                                   object 
 5    join_date                                                     object 
 6    account_type_name                                             object 
 7    customer_type_name                                            object 
 8    primary_email                                                 object 
 9    primary_phone                                   

In [10]:
TC_columns= [col for col in DF.columns if 'TC_' in col]

In [15]:
DF['TC_columns_total']=DF[TC_columns].apply(lambda x: x.sum(),axis=1)

In [16]:
DF['TC_columns_total'].value_counts()

0.0      17503
1.0       4702
2.0       1902
3.0        886
4.0        588
         ...  
390.0        1
123.0        1
515.0        1
65.0         1
369.0        1
Name: TC_columns_total, Length: 119, dtype: int64

In [17]:
DF_1=DF[DF.TC_columns_total==0]

In [18]:
DF_1.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 17503 entries, 0 to 28229
Columns: 124 entries, Month_Snapshot to TC_columns_total
dtypes: Int64(10), dbdate(1), float64(100), object(13)
memory usage: 16.9+ MB


In [19]:
DF_1.to_csv('Customer_review_sizing.csv',index=False)