## Reconcile

In [307]:
import pandas as pd
import numpy as np
import matplotlib as mpl
import configparser
from matplotlib import pyplot as plt
from matplotlib.gridspec import GridSpec
from functools import reduce
import datetime as dt
import re

mpl.rcParams['axes.spines.top'] = False
mpl.rcParams['axes.spines.right'] = False

In [294]:
''' Config '''

config = configparser.ConfigParser()
config.read('../my_config.ini')
config.sections()

curr_dt = dt.datetime.now().date()
str_curr_dt = curr_dt.strftime('%Y%m%d')
op_file_name = f'Agg_Perf_Nationwide-{str_curr_dt}'
op_file_name

data_src = f"../data/raw/Agg_Perf_Nationwide-20240605.csv"

In [355]:
''' Rawdata '''

# data_src = f"../data/raw/{op_file_name}.csv"

df = pd.read_csv(data_src, low_memory=False)

cols = ['TM_KEY_DAY', 'TM_KEY_WK', 'TM_KEY_MTH', 'TM_KEY_QTR', 'TM_KEY_YR', 'CENTER', 'METRIC_GRP', 'PRODUCT_GRP', 'COMP_CD', 'METRIC_CD', 'METRIC_NAME', 
        'ACTUAL_AS_OF', 'AGG_TYPE', 'RR_IND', 'GRY_IND', 'UOM', 'PERIOD', 'ACTUAL_SNAP', 'ACTUAL_AGG', 'TARGET_SNAP', 'TARGET_AGG', 'BASELINE_SNAP', 'BASELINE_AGG', 'PPN_TM']

df = df[cols]

''' Filters '''
df = df[df['TM_KEY_YR'] == 2024] # YEAR filter
# df = df[df['TM_KEY_MTH'] >= 202401] # MONTH filter

df.tail()

# ''' Fix Metric CD list '''

# ''' Fix Agg Column Name(Total Revenue) '''
# bg_col_list = ['POST_DTAC', 'POST_TMH', 'PRE_DTAC', 'PRE_TMH', 'TOL', 'TVS']

Unnamed: 0,TM_KEY_DAY,TM_KEY_WK,TM_KEY_MTH,TM_KEY_QTR,TM_KEY_YR,CENTER,METRIC_GRP,PRODUCT_GRP,COMP_CD,METRIC_CD,...,GRY_IND,UOM,PERIOD,ACTUAL_SNAP,ACTUAL_AGG,TARGET_SNAP,TARGET_AGG,BASELINE_SNAP,BASELINE_AGG,PPN_TM
554120,20240604,2024023,202406,20242,2024,Revenue,Sales,TOL,TRUE,TB3R000601AE,...,GYR,baht,M,,,,,,,2024-06-05 10:18:16
554121,20240604,2024023,202406,20242,2024,Revenue,Sales,Prepaid,ALL,B1R000900AF,...,GYR,baht,M,,34253.5896,,,,,2024-06-05 10:18:16
554122,20240604,2024023,202406,20242,2024,Revenue,Sales,TOL,TRUE,TB3R000601AF,...,GYR,baht,M,,,,,,,2024-06-05 10:18:16
554123,20240604,2024023,202406,20242,2024,Revenue,Sales,TOL,TRUE,TB3R000601AG,...,GYR,baht,M,,,,,,,2024-06-05 10:18:16
554124,20240604,2024023,202406,20242,2024,Revenue,Sales,TOL,TRUE,TB3R000601AH,...,GYR,baht,M,,,,,,,2024-06-05 10:18:16


In [356]:
print(f'\ndf : {df.shape[0]} rows, {df.shape[1]} columns')


df : 215872 rows, 24 columns


In [359]:
# df_tmp = df[['METRIC_GRP', 'METRIC_CD', 'METRIC_NAME']].drop_duplicates().sort_values(['METRIC_GRP', 'METRIC_CD'])
# df_tmp

df[['METRIC_GRP']].drop_duplicates().sort_values(['METRIC_GRP'])

Unnamed: 0,METRIC_GRP
338366,Accounts
339202,Digital Growth Champion
338274,Market Share
338282,Retention & Churn
338266,Revenue
338253,Sales
338259,Subs
338263,


In [370]:
''' Function '''

def view_group(grp, cd, name):
    grp = grp
    cd = cd
    name = name
    flag = ''

    # CORP & MCOM
    if re.search('C$|H$|MCOM$', cd) and (not re.search('A[A-K]$', cd)): flag = 'CORP & MCOM'
    elif re.search('CUS$', cd): flag = 'Cust Location'
    # Revenue
    elif grp == 'Revenue' and any(x in name for x in ['New Revenue', 'Existing Revenue']): flag = 'New/Existing'
    elif grp == 'Revenue' and any(x in name for x in ['Paid Amount', 'On Due', 'Overdue']): flag = 'Paid Amount'
    elif grp == 'Revenue' and any(x in name for x in ['Revenue']): flag = 'Revenue'
    # Sales
    elif grp == 'Sales' and any(x in name for x in ['Inflow M1']): flag = 'Inflow M1'
    elif grp == 'Sales' and any(x in name for x in ['Inflow M2']): flag = 'Inflow M2'
    elif grp == 'Sales' and any(x in name for x in ['Gross Add']): flag = 'Gross Adds'
    elif grp == 'Sales' and any(x in name for x in ['%AP']): flag = '%AP'
    elif grp == 'Sales' and any(x in name for x in ['AP 1D']): flag = 'AP 1D'
    elif grp == 'Sales' and any(x in name for x in ['AP In Month']): flag = 'AP MTH'
    elif grp == 'Sales' and any(x in name for x in ['Activation Subs']): flag = 'Activation'
    elif grp == 'Sales' and any(x in name for x in ['Conversion']): flag = '%Conversion'
    elif grp == 'Sales' and any(x in name for x in ['GA ARPU', 'GA RC']): flag = 'GA ARPU/RC'
    # Subs
    elif grp == 'Subs' and any(x in name for x in ['Net Add']): flag = 'Net Adds'
    elif grp == 'Subs' and any(x in name for x in ['%NAD']): flag = '%NAD'
    elif grp == 'Subs' and any(x in name for x in ['%M4']): flag = '%M4'
    elif grp == 'Subs' and any(x in name for x in ['Reported Sub']): flag = 'Reported Subs'
    elif grp == 'Subs' and any(x in name for x in ['Usage Subs', 'Active Caller', 'Active Subs']): flag = 'Active Subs'
    elif grp == 'Subs' and any(x in name for x in ['NAD']): flag = 'NAD'
    elif grp == 'Subs' and any(x in name for x in ['Revenue Subs']): flag = 'Rev Subs'
    # MKS
    elif grp == 'Market Share' and any(x in name for x in ['Broadband']): flag = '%BB MKS'
    elif grp == 'Market Share' and (not any(x in name for x in ['Broadband'])) & any(x in name for x in ['(Subs)']): flag = 'MB MKS(Subs)'
    elif grp == 'Market Share' and (not any(x in name for x in ['Broadband', '(Subs)'])): flag = '%MB MKS'
    # Churn
    elif grp == 'Retention & Churn' and any(x in name for x in ['Churn Subs']): flag = 'Churn Subs'
    elif grp == 'Retention & Churn' and any(x in name for x in ['Churn Rate']): flag = '%Churn Rate'
    # Others
    elif any(x in name for x in ['ARPU']): flag = 'ARPU'
    elif any(x in name for x in ['SubBase']): flag = 'SubBase'
    elif any(x in name for x in ['New Subs']): flag = 'New Subs'
    elif any(x in name for x in ['Silent']): flag = 'Silent'
    elif any(x in name for x in ['60DPD']): flag = '60DPD'
    elif any(x in name for x in ['Quality']): flag = 'Quality'
    else: flag = 'Unknown'
 
    return flag

In [371]:
df[['METRIC_GRP', 'VIEW_GRP']].drop_duplicates().sort_values(['METRIC_GRP', 'VIEW_GRP'])

Unnamed: 0,METRIC_GRP,VIEW_GRP
338366,Accounts,60DPD
338550,Accounts,SubBase
339202,Digital Growth Champion,Unknown
338340,Market Share,%BB MKS
338274,Market Share,%MB MKS
338283,Market Share,MB MKS(Subs)
338282,Retention & Churn,%Churn Rate
338285,Retention & Churn,Churn Subs
338291,Retention & Churn,Unknown
338413,Revenue,ARPU


In [374]:
# df_tmp['FLAG'] = df_tmp['METRIC_GRP'].apply(lambda x: group_flag(str(x)))
df['VIEW_GRP'] = df.apply(lambda x: view_group(str(x['METRIC_GRP']), str(x['METRIC_CD']), str(x['METRIC_NAME'])), axis=1)

# df.sort_values(['VIEW_GRP', 'METRIC_GRP', 'METRIC_CD'])

# df[df['METRIC_GRP']=='Revenue'].sort_values(['VIEW_GRP', 'METRIC_GRP', 'METRIC_CD'])
# df[df['METRIC_GRP']=='Market Share'].sort_values(['VIEW_GRP', 'METRIC_GRP', 'METRIC_CD'])
# df[df['METRIC_GRP']=='Retention & Churn'].sort_values(['VIEW_GRP', 'METRIC_GRP', 'METRIC_CD'])

# df[df['VIEW_GRP']=='Revenue'].sort_values(['VIEW_GRP', 'METRIC_GRP', 'METRIC_CD'])
# df[df['VIEW_GRP']=='Unknown'].sort_values(['VIEW_GRP', 'METRIC_GRP', 'METRIC_CD'])
# df[df['VIEW_GRP']!='CORP & MCOM'].sort_values(['VIEW_GRP', 'METRIC_GRP', 'METRIC_CD'])

df[~df['VIEW_GRP'].isin(['CORP & MCOM'])].sort_values(['VIEW_GRP', 'METRIC_GRP', 'METRIC_CD'])


Unnamed: 0,TM_KEY_DAY,TM_KEY_WK,TM_KEY_MTH,TM_KEY_QTR,TM_KEY_YR,CENTER,METRIC_GRP,PRODUCT_GRP,COMP_CD,METRIC_CD,...,PERIOD,ACTUAL_SNAP,ACTUAL_AGG,TARGET_SNAP,TARGET_AGG,BASELINE_SNAP,BASELINE_AGG,PPN_TM,FLAG,VIEW_GRP
338760,20240101,2024001,202401,20241,2024,Revenue,Sales,Prepaid,ALL,B1S001000,...,M,,,,,,,2024-06-05 10:18:16,%AP,%AP
340127,20240102,2024001,202401,20241,2024,Revenue,Sales,Prepaid,ALL,B1S001000,...,M,,,,,,,2024-06-05 10:18:16,%AP,%AP
341494,20240103,2024001,202401,20241,2024,Revenue,Sales,Prepaid,ALL,B1S001000,...,M,,,,,,,2024-06-05 10:18:16,%AP,%AP
342861,20240104,2024001,202401,20241,2024,Revenue,Sales,Prepaid,ALL,B1S001000,...,M,,,,,,,2024-06-05 10:18:16,%AP,%AP
344229,20240105,2024001,202401,20241,2024,Revenue,Sales,Prepaid,ALL,B1S001000,...,M,,,,,,,2024-06-05 10:18:16,%AP,%AP
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
548427,20240531,2024022,202405,20242,2024,Revenue,,,TRUE,VIN00042,...,M,,3.888672,,4.0,,,2024-06-05 10:31:25,Unknown,Unknown
549813,20240601,2024022,202406,20242,2024,Revenue,,,TRUE,VIN00042,...,M,,,,4.0,,,2024-06-05 10:31:25,Unknown,Unknown
551200,20240602,2024022,202406,20242,2024,Revenue,,,TRUE,VIN00042,...,M,,,,4.0,,,2024-06-05 10:31:25,Unknown,Unknown
552585,20240603,2024023,202406,20242,2024,Revenue,,,TRUE,VIN00042,...,M,,,,4.0,,,2024-06-05 10:31:25,Unknown,Unknown
