# * Adhoc-BB MKS by Province
    77 Province only

## Parameter

In [145]:
import configparser
import datetime as dt
import pandas as pd
import numpy as np
import oracledb
import re
# import xlrd

config = configparser.ConfigParser()
config.read('../../my_config.ini')
config.sections()

TDMDBPR_user = config['TDMDBPR']['username']
TDMDBPR_pwd = config['TDMDBPR']['password']
TDMDBPR_db = config['TDMDBPR']['db']
TDMDBPR_host = config['TDMDBPR']['host']
TDMDBPR_port = config['TDMDBPR']['port']

curr_dt = dt.datetime.now().date()
str_curr_dt = curr_dt.strftime('%Y%m%d')

In [146]:
# Input parameter

op_dir = 'output'
op_file = f'BB-MKS by Province_{str_curr_dt}'
v_param = dict(mth_start=202301, mth_end=202405)

print(f"\nParameter input...\n\n   -> op_dir: {op_dir}\n   -> op_file: {op_file}\n   -> mth_start: {v_param['mth_start']}\n   -> mth_end: {v_param['mth_end']}")


Parameter input...

   -> op_dir: output
   -> op_file: BB-MKS by Province_20240622
   -> mth_start: 202301
   -> mth_end: 202405


## ETL Process...

### Query from DB sources to Output file
    DB -> Output file
    
    Source : CORPNSBOX.FCT_BB_SHARE_SUBS_CCAATT
             CDSAPPO.DIM_MOOC_AREA
    
    Target : Output file

In [147]:
# Connect : TDMDBPR
dsn = f'{TDMDBPR_user}/{TDMDBPR_pwd}@{TDMDBPR_host}:{TDMDBPR_port}/{TDMDBPR_db}'
conn = oracledb.connect(dsn)
print(f'\n{TDMDBPR_db} : Connected\n\nProcessing...')
cur = conn.cursor()


query = """
    /*** BB MKS by Province ***/

    WITH W_VAR (V_MONTH_START, V_MONTH_END) AS
    (
        SELECT :mth_start AS V_MONTH_START, :mth_end AS V_MONTH_END
        FROM DUAL
    )
    -----------------------------------------------------------------------------------------------------------------------

    , W_ORG AS 
    ( --7,436 row
        SELECT DISTINCT ZONE_TYPE, ORGID_G, TDS_SGMD, ORGID_R, TDS_RGM_CODE, ORGID_H, HOP_HINT, TDS_PROVINCE
            , SUBSTR(CCAATT,1,2) AS PROVINCE_CD, PROVINCE_ENG, PROVINCE_TH, ORGID_HH, D_CLUSTER, CCAATT
        FROM CDSAPPO.DIM_MOOC_AREA NOLOCK
        WHERE REMARK <> 'Dummy'
    )
    -----------------------------------------------------------------------------------------------------------------------

    , W_BB_MKS_RAWDATA AS 
    (
        SELECT TM_KEY_MTH, ISP
            , ZONE_TYPE, ORGID_G, TDS_SGMD, ORGID_R, TDS_RGM_CODE, ORGID_H, HOP_HINT, TDS_PROVINCE, PROVINCE_CD, PROVINCE_ENG, PROVINCE_TH, ORGID_HH, D_CLUSTER, EAST_FLAG
            , SUM(BASE_SUBS) BASE_SUBS, SUM(SUBS_CHG) SUBS_CHG, SUM(SUBS) SUBS
        FROM (
            SELECT A.TM_KEY_MTH, A.ISP, A.BASE_SUBS, A.SUBS_CHG, A.SUBS
                , B.ZONE_TYPE, B.ORGID_G, B.TDS_SGMD, B.ORGID_R, B.TDS_RGM_CODE, B.ORGID_H, B.HOP_HINT, B.TDS_PROVINCE, B.PROVINCE_CD, B.PROVINCE_ENG, B.PROVINCE_TH, B.ORGID_HH, B.D_CLUSTER
                , CASE WHEN (B.ORGID_G = 'GX3' AND B.HOP_HINT NOT LIKE 'SMP%') THEN 'Y' END EAST_FLAG
            FROM CORPNSBOX.FCT_BB_SHARE_SUBS_CCAATT A
            INNER JOIN W_ORG B
                ON A.CCAATT = B.CCAATT
            WHERE A.TM_KEY_MTH BETWEEN (SELECT V_MONTH_START FROM W_VAR) AND (SELECT V_MONTH_END FROM W_VAR)
            AND ISP IN ('3BB','AIS','CAT','TOL','TOT')
        ) RAWDATA
        GROUP BY TM_KEY_MTH, ISP, ZONE_TYPE, ORGID_G, TDS_SGMD, ORGID_R, TDS_RGM_CODE, ORGID_H, HOP_HINT, TDS_PROVINCE, PROVINCE_CD, PROVINCE_ENG, PROVINCE_TH, ORGID_HH, D_CLUSTER, EAST_FLAG
    ) -->> W_BB_MKS_RAWDATA
    -----------------------------------------------------------------------------------------------------------------------

    , W_BB_MKS_AGG_1 AS 
    (
        SELECT TM_KEY_MTH, ZONE_TYPE, ORGID_G, TDS_SGMD, ORGID_R, TDS_RGM_CODE, ORGID_H, HOP_HINT, TDS_PROVINCE, PROVINCE_CD, PROVINCE_ENG, PROVINCE_TH, ORGID_HH, D_CLUSTER, EAST_FLAG
            , SUM(SUBS) AS SUBS_TOTAL
            , SUM(CASE WHEN ISP = 'TOL' THEN SUBS ELSE 0 END) AS SUBS_TOL
            , SUM(CASE WHEN ISP IN ('AIS', '3BB') THEN SUBS ELSE 0 END) AS SUBS_AIS_3BB
            , SUM(CASE WHEN ISP = '3BB' THEN SUBS ELSE 0 END) AS SUBS_3BB
            , SUM(CASE WHEN ISP = 'AIS' THEN SUBS ELSE 0 END) AS SUBS_AIS
            , SUM(CASE WHEN ISP IN ('CAT', 'TOT') THEN SUBS ELSE 0 END) AS SUBS_NT
            , SUM(CASE WHEN ISP = 'CAT' THEN SUBS ELSE 0 END) AS SUBS_CAT
            , SUM(CASE WHEN ISP = 'TOT' THEN SUBS ELSE 0 END) AS SUBS_TOT
        FROM W_BB_MKS_RAWDATA
        GROUP BY TM_KEY_MTH, ZONE_TYPE, ORGID_G, TDS_SGMD, ORGID_R, TDS_RGM_CODE, ORGID_H, HOP_HINT, TDS_PROVINCE, PROVINCE_CD, PROVINCE_ENG, PROVINCE_TH, ORGID_HH, D_CLUSTER, EAST_FLAG
    ) -->> W_BB_MKS_AGG_1
    -----------------------------------------------------------------------------------------------------------------------

    -->> Results 77 Province
    SELECT SUBSTR(TM_KEY_MTH,1,4) AS TM_KEY_YR, TM_KEY_MTH, PROVINCE_CD, PROVINCE_ENG, PROVINCE_TH
        , (SUM(SUBS_TOL) / SUM(SUBS_TOTAL)) * 100 AS MKS_TOL
        , (SUM(SUBS_AIS_3BB) / SUM(SUBS_TOTAL)) * 100 AS MKS_AIS_3BB
        , (SUM(SUBS_3BB) / SUM(SUBS_TOTAL)) * 100 AS MKS_3BB
        , (SUM(SUBS_AIS) / SUM(SUBS_TOTAL)) * 100 AS MKS_AIS
        , (SUM(SUBS_NT) / SUM(SUBS_TOTAL)) * 100 AS MKS_NT
        , SUM(SUBS_TOL) AS SUBS_TOL, SUM(SUBS_AIS_3BB) AS SUBS_AIS_3BB, SUM(SUBS_3BB) AS SUBS_3BB, SUM(SUBS_AIS) AS SUBS_AIS, SUM(SUBS_NT) AS SUBS_NT
        , SUM(SUBS_TOTAL) AS SUBS_TOTAL
    FROM W_BB_MKS_AGG_1
    GROUP BY TM_KEY_MTH, PROVINCE_CD, PROVINCE_ENG, PROVINCE_TH
    ORDER BY TM_KEY_MTH, PROVINCE_CD
"""


try:
    # Create Dataframe
    execute_datetime = dt.datetime.now().strftime('%Y-%m-%d, %H:%M:%S')
    print(f'\n   -> Execute query... {execute_datetime}')
    cur.execute(query, (v_param['mth_start'], v_param['mth_end']))
    rows = cur.fetchall()
    df = pd.DataFrame.from_records(rows, columns=[x[0] for x in cur.description])
    print(f'\n   -> DataFrame : {df.shape[0]} rows, {df.shape[1]} columns')

    # # Generate CSV file
    # df.to_csv(f'{op_dir}/{op_file}.csv', index=False, encoding='TIS-620')
    # print(f'\n   -> Generate "{op_file}.csv" successfully')

    # Generate Excel file
    df.to_excel(f'{op_dir}/{op_file}.xlsx', sheet_name='Data', index=False)
    print(f'\n   -> Generate "{op_file}.xlsx" successfully')


except oracledb.DatabaseError as e:
    print(f'Error with Oracle : {e}')


finally:
    cur.close()
    conn.close()
    print(f'\n{TDMDBPR_db} : Disconnected')



TDMDBPR : Connected

Processing...

   -> Execute query... 2024-06-22, 17:04:57

   -> DataFrame : 1309 rows, 16 columns

   -> Generate "BB-MKS by Province_20240622.xlsx" successfully

TDMDBPR : Disconnected


## Check Results

In [151]:
''' DataFrame Transaction '''

rec_df = df

''' Filters '''
rec_df = rec_df.loc[rec_df['TM_KEY_MTH']==202405]

rec_df#.tail(3)

Unnamed: 0,TM_KEY_YR,TM_KEY_MTH,PROVINCE_CD,PROVINCE_ENG,PROVINCE_TH,MKS_TOL,MKS_AIS_3BB,MKS_3BB,MKS_AIS,MKS_NT,SUBS_TOL,SUBS_AIS_3BB,SUBS_3BB,SUBS_AIS,SUBS_NT,SUBS_TOTAL
1232,2024,202405,10,Bangkok,กรุงเทพมหานคร,50.311076,34.531007,16.552161,17.978846,15.157917,875271,600742.2491,287960.9812,312781.2679,263705.0589,1.739718e+06
1233,2024,202405,11,Samut Prakan,สมุทรปราการ,47.465524,40.075227,16.704912,23.370315,12.459249,172538,145674.1431,60722.6438,84951.4993,45289.5835,3.635017e+05
1234,2024,202405,12,Nonthaburi,นนทบุรี,46.302225,42.383271,15.041871,27.341399,11.314505,174960,160151.6359,56837.9993,103313.6366,42753.5783,3.778652e+05
1235,2024,202405,13,Pathum Thani,ปทุมธานี,45.353359,43.620765,15.222684,28.398080,11.025876,149432,143723.3799,50156.2881,93567.0918,36328.4825,3.294839e+05
1236,2024,202405,14,Phra Nakhon Si Ayutthaya,พระนครศรีอยุธยา,29.085086,55.497694,30.745173,24.752521,15.417220,36389,69434.4030,38465.9714,30968.4316,19288.8279,1.251122e+05
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1304,2024,202405,92,Trang,ตรัง,28.147596,47.739414,30.201218,17.538197,24.112990,10704,18154.3992,11484.9537,6669.4455,9169.7155,3.802811e+04
1305,2024,202405,93,Phatthalung,พัทลุง,29.075183,47.300788,29.985395,17.315393,23.624028,13564,22066.5126,13988.6272,8077.8854,11020.9563,4.665147e+04
1306,2024,202405,94,Pattani,ปัตตานี,22.645512,56.225115,29.004663,27.220451,21.129373,8258,20503.2674,10576.9525,9926.3149,7705.1188,3.646639e+04
1307,2024,202405,95,Yala,ยะลา,22.881968,55.588622,26.354158,29.234464,21.529410,7524,18278.5323,8665.7182,9612.8141,7079.2549,3.288179e+04


In [149]:
''' DataFrame Summary '''

tmp_df = df.groupby(['TM_KEY_MTH']).agg({'PROVINCE_CD': 'nunique', 'SUBS_TOL': 'sum', 'SUBS_AIS_3BB': 'sum', 'SUBS_3BB': 'sum', 'SUBS_AIS': 'sum', 'SUBS_NT': 'sum', 'SUBS_TOTAL': 'sum'}).reset_index()

tmp_df.rename(columns={'PROVINCE_CD': 'CNT_PROVINCE'}, inplace=True)

tmp_df['%MKS_TOL'] = tmp_df['SUBS_TOL'] / tmp_df['SUBS_TOTAL'] * 100
tmp_df['%MKS_AIS_3BB'] = tmp_df['SUBS_AIS_3BB'] / tmp_df['SUBS_TOTAL'] * 100
tmp_df['%MKS_3BB'] = tmp_df['SUBS_3BB'] / tmp_df['SUBS_TOTAL'] * 100
tmp_df['%MKS_AIS'] = tmp_df['SUBS_AIS'] / tmp_df['SUBS_TOTAL'] * 100
tmp_df['%MKS_NT'] = tmp_df['SUBS_NT'] / tmp_df['SUBS_TOTAL'] * 100
tmp_df['CHECK_SUM %'] = tmp_df['%MKS_TOL'] + tmp_df['%MKS_AIS_3BB'] + tmp_df['%MKS_NT']

# col_mod1 = tmp_df.loc[:, tmp_df.columns.str.contains('^SUBS_')]
# col_mod2 = tmp_df.loc[:, tmp_df.columns.str.contains('^%MKS_')]
mod_col_list = tmp_df.iloc[:, 2:].columns.tolist()

for col in mod_col_list:
    if re.search('^SUBS_', col):
        tmp_df[col] = tmp_df[col].apply(lambda x: format(x, ',.0f'))
    elif re.search('^%MKS_', col):
        tmp_df[col] = tmp_df[col].apply(lambda x: format(x, ',.2f'))

tmp_df

Unnamed: 0,TM_KEY_MTH,CNT_PROVINCE,SUBS_TOL,SUBS_AIS_3BB,SUBS_3BB,SUBS_AIS,SUBS_NT,SUBS_TOTAL,%MKS_TOL,%MKS_AIS_3BB,%MKS_3BB,%MKS_AIS,%MKS_NT,CHECK_SUM %
0,202301,77,3156430,3878505,2365715,1512790,1409600,8444535,37.38,45.93,28.01,17.91,16.69,100.0
1,202302,77,3145220,3874882,2345964,1528918,1409515,8429617,37.31,45.97,27.83,18.14,16.72,100.0
2,202303,77,3099695,3870039,2325512,1544528,1410045,8379779,36.99,46.18,27.75,18.43,16.83,100.0
3,202304,77,3039333,3864993,2305321,1559671,1410279,8314605,36.55,46.48,27.73,18.76,16.96,100.0
4,202305,77,3057444,3873752,2295508,1578244,1411386,8342582,36.65,46.43,27.52,18.92,16.92,100.0
5,202306,77,3054270,3883755,2285609,1598146,1412430,8350456,36.58,46.51,27.37,19.14,16.91,100.0
6,202307,77,3048074,3896532,2275704,1620827,1413472,8358078,36.47,46.62,27.23,19.39,16.91,100.0
7,202308,77,3048406,3900542,2260877,1639665,1414550,8363498,36.45,46.64,27.03,19.61,16.91,100.0
8,202309,77,3046115,3902444,2246082,1656362,1415587,8364146,36.42,46.66,26.85,19.8,16.92,100.0
9,202310,77,3045734,3904385,2236231,1668154,1416622,8366741,36.4,46.67,26.73,19.94,16.93,100.0
