# * Mobile Market Share

## Parameter

In [1]:
import os
import glob
import configparser
import oracledb
import pandas as pd
import datetime as dt

config = configparser.ConfigParser()
config.read('../../my_config.ini')
config.sections()

TDMDBPR_user = config['TDMDBPR']['username']
TDMDBPR_pwd = config['TDMDBPR']['password']
TDMDBPR_db = config['TDMDBPR']['db']
TDMDBPR_host = config['TDMDBPR']['host']
TDMDBPR_port = config['TDMDBPR']['port']

# AKPIPRD_user = config['AKPIPRD']['username']
# AKPIPRD_pwd = config['AKPIPRD']['password']
# AKPIPRD_db = config['AKPIPRD']['db']
# AKPIPRD_host = config['AKPIPRD']['host']
# AKPIPRD_port = config['AKPIPRD']['port']

curr_dt = dt.datetime.now().date()
str_curr_dt = curr_dt.strftime('%Y%m%d')
curr_dt

datetime.date(2024, 7, 11)

## Import data

### TDMDBPR : Source >>>

In [3]:
# TDMDBPR : Source

dsn = f'{TDMDBPR_user}/{TDMDBPR_pwd}@{TDMDBPR_host}:{TDMDBPR_port}/{TDMDBPR_db}'
conn = oracledb.connect(dsn)
print(f'{TDMDBPR_db} : Connected')

cur = conn.cursor()

v_date = (curr_dt, )

query = """
    -->> Check list KPIs(MKS)
    SELECT COMP_CD, METRIC_CD, METRIC_NAME
        , COUNT(DISTINCT TM_KEY_DAY) CNT_DAY
        , COUNT(DISTINCT AREA_TYPE) CNT_AREA_TYPE
        , COUNT(DISTINCT AREA_CD) CNT_AREA_CD
        , COUNT(DISTINCT AREA_DESC) CNT_AREA_DESC
        , MIN(TM_KEY_DAY) FIRST_DAY, MAX(TM_KEY_DAY) LAST_DAY, MAX(LOAD_DATE) LAST_LOAD_DATE
    FROM GEOSPCAPPO.FCT_KPI_NEWCO_DIAMOND_ACTUAL
    WHERE METRIC_CD LIKE 'VIN%'
    AND TM_KEY_DAY LIKE '2024%'
    GROUP BY COMP_CD, METRIC_CD, METRIC_NAME
    ORDER BY METRIC_CD
"""

try:
    print(f'\nExecute query as of {v_date[0]}...')
    cur.execute(query)
    rows = cur.fetchall()
    src_df = pd.DataFrame.from_records(rows, columns=[x[0] for x in cur.description])
    print(f'\ndf : {src_df.shape[0]} rows, {src_df.shape[1]} columns\nDone !!!')

except oracledb.DatabaseError as e:
    print(f'Error with Oracle : {e}')

finally:
    cur.close()
    conn.close()
    print(f'\n{TDMDBPR_db} : Disconnected')

TDMDBPR : Connected

Execute query as of 2024-07-11...

df : 20 rows, 10 columns
Done !!!

TDMDBPR : Disconnected


In [4]:
src_df.tail()

Unnamed: 0,COMP_CD,METRIC_CD,METRIC_NAME,CNT_DAY,CNT_AREA_TYPE,CNT_AREA_CD,CNT_AREA_DESC,FIRST_DAY,LAST_DAY,LAST_LOAD_DATE
15,DTAC,VIN00062,Prepaid Subs Share : DTAC (Subs),180,3,74,74,20240101,20240708,2024-07-10 20:23:09
16,ALL,VIN00063,Postpaid Subs Share : TMH & DTAC (Subs),180,3,74,74,20240101,20240708,2024-07-10 20:23:09
17,ALL,VIN00064,Postpaid Subs Share : AIS (Subs),180,3,74,74,20240101,20240708,2024-07-10 20:23:09
18,TRUE,VIN00065,Postpaid Subs Share : TMH (Subs),180,3,74,74,20240101,20240708,2024-07-10 20:23:09
19,DTAC,VIN00066,Postpaid Subs Share : DTAC (Subs),180,3,74,74,20240101,20240708,2024-07-10 20:23:09


### TDMDBPR : Staging >>>

In [5]:
# TDMDBPR : Staging

dsn = f'{TDMDBPR_user}/{TDMDBPR_pwd}@{TDMDBPR_host}:{TDMDBPR_port}/{TDMDBPR_db}'
conn = oracledb.connect(dsn)
print(f'{TDMDBPR_db} : Connected')

cur = conn.cursor()

v_date = (curr_dt, )

query = """
    -->> Check list KPIs
    SELECT COMP_CD, METRIC_CD, METRIC_NAME
        , COUNT(DISTINCT TM_KEY_DAY) CNT_DAY
        , COUNT(DISTINCT AREA_TYPE) CNT_AREA_TYPE
        , COUNT(DISTINCT AREA_CD) CNT_AREA_CD
        , MIN(TM_KEY_DAY) FIRST_DAY
        , MAX(TM_KEY_DAY) LAST_DAY
        , MAX(LOAD_DATE) LAST_LOAD_DATE
    FROM GEOSPCAPPO.FCT_KPI_NEWCO_MKS_ACTUAL
    WHERE TM_KEY_DAY LIKE '2024%'
    GROUP BY COMP_CD, METRIC_CD, METRIC_NAME
    ORDER BY METRIC_CD
"""

try:
    print(f'\nExecute query as of {v_date[0]}...')
    cur.execute(query)
    rows = cur.fetchall()
    stg_df = pd.DataFrame.from_records(rows, columns=[x[0] for x in cur.description])
    print(f'\ndf : {stg_df.shape[0]} rows, {stg_df.shape[1]} columns\nDone !!!')

except oracledb.DatabaseError as e:
    print(f'Error with Oracle : {e}')

finally:
    cur.close()
    conn.close()
    print(f'\n{TDMDBPR_db} : Disconnected')

TDMDBPR : Connected

Execute query as of 2024-07-11...

df : 24 rows, 9 columns
Done !!!

TDMDBPR : Disconnected


In [6]:
stg_df.tail()

Unnamed: 0,COMP_CD,METRIC_CD,METRIC_NAME,CNT_DAY,CNT_AREA_TYPE,CNT_AREA_CD,FIRST_DAY,LAST_DAY,LAST_LOAD_DATE
19,DTAC,VIN00062,Prepaid Subs Share : DTAC (Subs),180,1,3,20240101,20240708,2024-07-10 20:23:29
20,ALL,VIN00063,Postpaid Subs Share : TMH & DTAC (Subs),180,1,3,20240101,20240708,2024-07-10 20:23:29
21,ALL,VIN00064,Postpaid Subs Share : AIS (Subs),180,1,3,20240101,20240708,2024-07-10 20:23:29
22,TRUE,VIN00065,Postpaid Subs Share : TMH (Subs),180,1,3,20240101,20240708,2024-07-10 20:23:29
23,DTAC,VIN00066,Postpaid Subs Share : DTAC (Subs),180,1,3,20240101,20240708,2024-07-10 20:23:29


### VINSIGHT >>>

In [2]:
# VINSIGHT

dsn = f'{TDMDBPR_user}/{TDMDBPR_pwd}@{TDMDBPR_host}:{TDMDBPR_port}/{TDMDBPR_db}'
conn = oracledb.connect(dsn)
print(f'{TDMDBPR_db} : Connected')

cur = conn.cursor()

v_date = (curr_dt, )

query = """
    SELECT *
    FROM (
        SELECT TM_KEY_DAY, METRIC_CD, METRIC_NAME 
            , CASE 	WHEN AREA_TYPE = 'P' THEN 1
                    WHEN AREA_TYPE = 'Z' THEN (CASE WHEN AREA_CD IN ('BMA', 'UPC') THEN 2 WHEN AREA_CD = 'EAST' THEN 3 ELSE 4 END)
                    WHEN AREA_TYPE = 'G' THEN 5 
                    WHEN AREA_TYPE = 'H' THEN 6 
                    WHEN AREA_TYPE = 'HH' THEN 7
                    ELSE 0 END AREA_NO
            , AREA_TYPE, AREA_CD, AREA_NAME, ACTUAL_SNAP, ACTUAL_AGG, PPN_TM  
        FROM GEOSPCAPPO.AGG_PERF_NEWCO NOLOCK
        WHERE METRIC_CD IN ('VIN00019', 'VIN00020', 'VIN00021', 'VIN00022', 'VIN00023', 'VIN00024', 'VIN00011', 'VIN00012', 'VIN00013', 'VIN00014', 'VIN00048', 'VIN00049', 'VIN00050', 'VIN00051', 'VIN00037', 'VIN00038', 'VIN00039', 'VIN00040', 'VIN00063', 'VIN00064', 'VIN00065', 'VIN00066', 'VIN00029', 'VIN00030', 'VIN00031', 'VIN00032', 'VIN00059', 'VIN00060', 'VIN00061', 'VIN00062')
        AND METRIC_NAME NOT LIKE 'Broadband%'
        AND TM_KEY_MTH >= 202401
        AND TM_KEY_DAY LIKE '%01'
        AND AREA_TYPE IN ('P', 'G', 'Z')
    )TMP
    ORDER BY TM_KEY_DAY, METRIC_CD, AREA_NO, AREA_CD 
"""

try:
    print(f'\nExecute query as of {v_date[0]}...')
    cur.execute(query)
    rows = cur.fetchall()
    df = pd.DataFrame.from_records(rows, columns=[x[0] for x in cur.description])
    print(f'\ndf : {df.shape[0]} rows, {df.shape[1]} columns\nDone !!!')

except oracledb.DatabaseError as e:
    print(f'Error with Oracle : {e}')

finally:
    cur.close()
    conn.close()
    print(f'\n{TDMDBPR_db} : Disconnected')

TDMDBPR : Connected

Execute query as of 2024-07-11...


In [12]:
df.tail()

Unnamed: 0,TM_KEY_DAY,METRIC_CD,METRIC_NAME,AREA_NO,AREA_TYPE,AREA_CD,AREA_NAME,ACTUAL_SNAP,ACTUAL_AGG,PPN_TM
2299,20240601,VIN00066,Postpaid Subs Share (Subs) : DTAC,5,G,GX4,Regional Management 4 (North),462998.6755,462998.6755,2024-06-06 10:34:07
2300,20240601,VIN00066,Postpaid Subs Share (Subs) : DTAC,5,G,GX5,Regional Management 5 (Northeast 1),382065.5079,382065.5079,2024-06-06 10:34:07
2301,20240601,VIN00066,Postpaid Subs Share (Subs) : DTAC,5,G,GX6,Regional Management 6 (Northeast 2),361287.6506,361287.6506,2024-06-06 10:34:07
2302,20240601,VIN00066,Postpaid Subs Share (Subs) : DTAC,5,G,GX7,"Regional Management 7 (Central, West)",529949.1215,529949.1215,2024-06-06 10:34:07
2303,20240601,VIN00066,Postpaid Subs Share (Subs) : DTAC,5,G,GX8,Regional Management 8 (South),513827.3014,513827.3014,2024-06-06 10:34:07


## Reconcile

In [15]:
# TDMDBPR : Source

# src_df[src_df['TM_KEY_DAY']==20240601].reset_index(drop=True)
# src_df[(src_df['TM_KEY_DAY']==20240501) & (src_df['METRIC_CD']=='VIN00020')].sort_values(['TM_KEY_DAY', 'METRIC_CD', 'AREA_NO', 'AREA_CD']).reset_index(drop=True)

src_df

Unnamed: 0,COMP_CD,METRIC_CD,METRIC_NAME,CNT_DAY,CNT_AREA_TYPE,CNT_AREA_CD,CNT_AREA_DESC,FIRST_DAY,LAST_DAY,LAST_LOAD_DATE
0,ALL,VIN00011,Mobile Subs Share : TMH & DTAC,153,3,74,74,20240101,20240603,2024-06-05 20:08:22
1,ALL,VIN00012,Mobile Subs Share : AIS,153,3,74,74,20240101,20240603,2024-06-05 20:08:22
2,TRUE,VIN00013,Mobile Subs Share : TMH,153,3,74,74,20240101,20240603,2024-06-05 20:08:22
3,DTAC,VIN00014,Mobile Subs Share : DTAC,153,3,74,74,20240101,20240603,2024-06-05 20:08:22
4,ALL,VIN00029,Prepaid Subs Share : TMH & DTAC,140,3,74,74,20240101,20240603,2024-06-05 20:08:22
5,ALL,VIN00030,Prepaid Subs Share : AIS,140,3,74,74,20240101,20240603,2024-06-05 20:08:22
6,TRUE,VIN00031,Prepaid Subs Share : TMH,141,3,74,74,20240101,20240603,2024-06-05 20:08:22
7,DTAC,VIN00032,Prepaid Subs Share : DTAC,140,3,72,72,20240101,20240603,2024-06-05 20:08:22
8,ALL,VIN00037,Postpaid Subs Share : TMH & DTAC,140,3,74,74,20240101,20240603,2024-06-05 20:08:22
9,ALL,VIN00038,Postpaid Subs Share : AIS,140,3,74,74,20240101,20240603,2024-06-05 20:08:22


In [29]:
# TDMDBPR : Staging

# stg_df[stg_df['TM_KEY_DAY']==20240601].reset_index(drop=True)
# stg_df[(stg_df['TM_KEY_DAY']==20240501) & (stg_df['METRIC_CD']=='VIN00020')].sort_values(['TM_KEY_DAY', 'METRIC_CD', 'AREA_NO', 'AREA_CD']).reset_index(drop=True)

# stg_df[stg_df['METRIC_CD']=='VIN00011'].reset_index(drop=True)
stg_df

Unnamed: 0,COMP_CD,METRIC_CD,METRIC_NAME,CNT_DAY,CNT_AREA_TYPE,CNT_AREA_CD,FIRST_DAY,LAST_DAY,LAST_LOAD_DATE
0,ALL,VIN00011,Mobile Subs Share : TMH & DTAC,145,1,7,20240101,20240603,2024-06-05 20:10:10
1,ALL,VIN00012,Mobile Subs Share : AIS,145,1,3,20240101,20240603,2024-06-05 20:10:10
2,TRUE,VIN00013,Mobile Subs Share : TMH,145,1,3,20240101,20240603,2024-06-05 20:10:10
3,DTAC,VIN00014,Mobile Subs Share : DTAC,145,1,3,20240101,20240603,2024-06-05 20:10:10
4,ALL,VIN00029,Prepaid Subs Share : TMH & DTAC,145,1,3,20240101,20240603,2024-06-05 20:10:10
5,ALL,VIN00030,Prepaid Subs Share : AIS,145,1,3,20240101,20240603,2024-06-05 20:10:10
6,TRUE,VIN00031,Prepaid Subs Share : TMH,145,1,3,20240101,20240603,2024-06-05 20:10:10
7,DTAC,VIN00032,Prepaid Subs Share : DTAC,145,1,3,20240101,20240603,2024-06-05 20:10:10
8,ALL,VIN00037,Postpaid Subs Share : TMH & DTAC,145,1,3,20240101,20240603,2024-06-05 20:10:10
9,ALL,VIN00038,Postpaid Subs Share : AIS,145,1,3,20240101,20240603,2024-06-05 20:10:10


In [26]:
# VINSIGHT

# df[['METRIC_CD', 'METRIC_NAME']].drop_duplicates().reset_index(drop=True).sort_values('METRIC_CD')

# df[df['TM_KEY_DAY']==20240501].reset_index(drop=True)
df[(df['TM_KEY_DAY']==20240501) & (df['METRIC_CD']=='VIN00011')].sort_values(['TM_KEY_DAY', 'METRIC_CD', 'AREA_NO', 'AREA_CD']).reset_index(drop=True)

Unnamed: 0,TM_KEY_DAY,METRIC_CD,METRIC_NAME,AREA_NO,AREA_TYPE,AREA_CD,AREA_NAME,ACTUAL_SNAP,ACTUAL_AGG,PPN_TM
0,20240501,VIN00011,Mobile Subs Share : TMH & DTAC,1,P,P,Nationwide,53.98,53.98,2024-06-06 10:34:07
1,20240501,VIN00011,Mobile Subs Share : TMH & DTAC,2,Z,BMA,BMA,61.996517,61.996517,2024-06-06 10:34:07
2,20240501,VIN00011,Mobile Subs Share : TMH & DTAC,2,Z,UPC,UPC,49.801704,49.801704,2024-06-06 10:34:07
3,20240501,VIN00011,Mobile Subs Share : TMH & DTAC,3,Z,EAST,EAST,52.632787,52.632787,2024-06-06 10:34:07
4,20240501,VIN00011,Mobile Subs Share : TMH & DTAC,4,Z,10,Bangkok,59.743679,59.743679,2024-06-06 10:34:07
5,20240501,VIN00011,Mobile Subs Share : TMH & DTAC,4,Z,11,Samut Prakan,56.138719,56.138719,2024-06-06 10:34:07
6,20240501,VIN00011,Mobile Subs Share : TMH & DTAC,4,Z,12,Nonthaburi,58.963031,58.963031,2024-06-06 10:34:07
7,20240501,VIN00011,Mobile Subs Share : TMH & DTAC,4,Z,13,Pathum Thani,59.018713,59.018713,2024-06-06 10:34:07
8,20240501,VIN00011,Mobile Subs Share : TMH & DTAC,5,G,GX1,Regional Management 1 (BMA : West),63.4,63.4,2024-06-06 10:34:07
9,20240501,VIN00011,Mobile Subs Share : TMH & DTAC,5,G,GX2,Regional Management 2 (BMA : East),62.22,62.22,2024-06-06 10:34:07


In [30]:
df[(df['METRIC_CD']=='VIN00011') & (df['AREA_CD']=='P')].sort_values(['TM_KEY_DAY', 'METRIC_CD', 'AREA_NO', 'AREA_CD']).reset_index(drop=True)

Unnamed: 0,TM_KEY_DAY,METRIC_CD,METRIC_NAME,AREA_NO,AREA_TYPE,AREA_CD,AREA_NAME,ACTUAL_SNAP,ACTUAL_AGG,PPN_TM
0,20240101,VIN00011,Mobile Subs Share : TMH & DTAC,1,P,P,Nationwide,54.22,54.22,2024-06-06 10:34:07
1,20240201,VIN00011,Mobile Subs Share : TMH & DTAC,1,P,P,Nationwide,54.06,54.06,2024-06-06 10:34:07
2,20240301,VIN00011,Mobile Subs Share : TMH & DTAC,1,P,P,Nationwide,54.08,54.08,2024-06-06 10:34:07
3,20240401,VIN00011,Mobile Subs Share : TMH & DTAC,1,P,P,Nationwide,54.03,54.03,2024-06-06 10:34:07
4,20240501,VIN00011,Mobile Subs Share : TMH & DTAC,1,P,P,Nationwide,53.98,53.98,2024-06-06 10:34:07
5,20240601,VIN00011,Mobile Subs Share : TMH & DTAC,1,P,P,Nationwide,54.07,54.07,2024-06-06 10:34:07


## Generate Output file

In [8]:
# df.to_csv(f'{op_raw_file}', index=False, encoding='utf-8')

# print(f'\n"{op_raw_file}" is generated')