In [56]:
import os, sys, inspect
# sys.path.append('../..')

import pandas as pd
from pathlib import Path
import glob as glob
import os
import numpy as np
import re
import json
from unidecode import unidecode
from datetime import datetime, timedelta, date
from connection import SCORE_DB_CONN, ANALYTICS_DB_CONN
from avay_bq import AvayBQServiceAcc
from da_utils.client.bigquery_client import BigQueryClient
from da_utils.client.google_sheets_client import GoogleSheetsClient
from da_utils.repository.google_sheets.google_sheets_repository import GoogleSheetsRepository
from da_utils.repository.bigquery.bigquery_repository import BigqueryRepository



pd.set_option("display.max_row", 100)

In [57]:
adc_path = '/home/linhnguyen/application_default_credentials.json'
avay_bq_acc = AvayBQServiceAcc()
bq_client = BigQueryClient(billing_project_id='prj-ts-p-analytic-8057', adc_file_path_str=adc_path)
bq_repo = BigqueryRepository(bq_client)



In [58]:

lead_phone_infos = pd.read_sql(
    """
select id as lead_id, 
case when (other->>'sender' in ('sms','click') or other->>'source' like '%%sms%%') then 'SMS' else 'AVAY' end as channel,
bound_code as offer_code,
score_range,
score,
to_char(sent_at at time zone 'utc+7', 'YYYY-MM') as month,
other->>'province' as province,
telco_code
from phone_infos
where 1=1
and bank_code in ('lotte')
and sent_at at time zone 'utc+7' >= '2022-09-01'
and sent_at at time zone 'utc+7' < '2023-01-01'
and pre_scoring_data->>'is_qualified'='true'
and was_sent = true
""",
    SCORE_DB_CONN,
)

In [59]:
Lotte_source = bq_repo.get_data_from_query_into_pandas(
        f"""

with rangedate as (
    select
date('2022-09-01') as sdate,
date('2022-12-31') as edate)
,qualified as(
    select 
        lead_id
        ,case  
        when lower(lead_source) like 'google%' then 'google_ads'
        when lower(lead_source) in ('accesstrade', 'accesstrade cps','vaycucde') then 'accesstrade'
        when lower(lead_source) like 'viettel%' then 'VTP'
        else 'Others' end as lead_source
    from `avay-a9925.datamart.avay_compound`, rangedate
    where loan_date between sdate and edate and loan_status='accepted'
    order by 1,2
)
select * from qualified
        """
)
Lotte_source

Unnamed: 0,lead_id,lead_source
0,3683080,Others
1,12652749,Others
2,12652755,google_ads
3,12652758,google_ads
4,12652761,google_ads
...,...,...
224382,13489443,VTP
224383,13489445,accesstrade
224384,13489447,accesstrade
224385,13489595,accesstrade


In [60]:
client = GoogleSheetsClient()
repo = GoogleSheetsRepository(client)
workbook = repo.open_spreadsheet_by_key("1Z5H4XE6wf9JD1k-kRh_rMVnYHtbWPTCmLLGFt7gmOjQ")

Lead = repo.read_data_from_sheet_to_pandas(workbook.worksheet("data_source"),starting_cell_str='B1', end_cell_col_str = 'B71')
Lead['lead_id'] = Lead['lead_id'].astype(int)
Lead['lead_lotte'] = 1

In [76]:
mapping = lead_phone_infos.merge(Lotte_source, how = 'left').merge(Lead, how = 'left', on ='lead_id')
mapping['lead_source'] =  np.where(mapping['lead_source'].isnull(), mapping['telco_code'], mapping['lead_source'])
mapping['score'] = mapping['score'].fillna(0)
mapping


Unnamed: 0,lead_id,channel,offer_code,score_range,score,month,province,telco_code,lead_source,lead_lotte
0,13330794,SMS,LOTTE02,660-669,0,2022-11,Hà Nội,viettel,viettel,
1,13289040,AVAY,LOTTE02,720-724,724,2022-11,Long An,vinaphone,google_ads,
2,13289835,SMS,LOTTE03,705-709,709,2022-11,Đồng Nai,mobifone,mobifone,
3,12975757,SMS,LOTTE02,660-669,664,2022-10,Hậu Giang,mobifone,mobifone,
4,12976151,SMS,LOTTE02,660-669,662,2022-10,Quảng Ngãi,mobifone,mobifone,
...,...,...,...,...,...,...,...,...,...,...
42675,13488344,AVAY,LOTTE02,670-679,678,2022-12,Kiên Giang,viettel,accesstrade,
42676,13488444,AVAY,LOTTE02,670-679,675,2022-12,Quảng Ngãi,viettel,VTP,
42677,13488478,AVAY,LOTTE02,610-619,615,2022-12,Hà Nội,viettel,VTP,
42678,13483651,AVAY,LOTTE02,710-719,719,2022-12,Bình Dương,viettel,VTP,


In [78]:
background2 = mapping.groupby(["channel", "offer_code", "score_range","month", "province", "lead_source", "telco_code"]).agg({"lead_id":"count", "lead_lotte": "count"}).reset_index()

background2

Unnamed: 0,channel,offer_code,score_range,month,province,lead_source,telco_code,lead_id,lead_lotte
0,AVAY,LOTTE02,600-609,2022-09,An Giang,accesstrade,viettel,1,0
1,AVAY,LOTTE02,600-609,2022-09,An Giang,google_ads,viettel,1,0
2,AVAY,LOTTE02,600-609,2022-09,Bình Dương,accesstrade,viettel,5,0
3,AVAY,LOTTE02,600-609,2022-09,Bình Dương,google_ads,viettel,2,0
4,AVAY,LOTTE02,600-609,2022-09,Bình Định,accesstrade,viettel,1,0
...,...,...,...,...,...,...,...,...,...
9623,SMS,LOTTE03,850-850,2022-12,Quảng Nam,mobifone,mobifone,1,0
9624,SMS,LOTTE03,850-850,2022-12,Quảng Ninh,mobifone,mobifone,1,0
9625,SMS,LOTTE03,850-850,2022-12,TP HCM,mobifone,mobifone,9,0
9626,SMS,LOTTE03,850-850,2022-12,Vĩnh Long,mobifone,mobifone,2,0


In [79]:
repo.write_df_to_sheet(workbook.worksheet("raw"),background2)