# * Mobile Market Share from Meta

In [2]:
import os
import sys
import glob
import configparser
import datetime as dt
import pandas as pd
import numpy as np
import re

curr_dt = dt.datetime.now().date()
next_week_dt = curr_dt + dt.timedelta(days=7)
str_curr_dt = curr_dt.strftime('%Y%m%d')
str_next_week_dt = next_week_dt.strftime('%Y%m%d')
yyyymm_curr = curr_dt.strftime('%Y%m')
yyyymm_next_week = next_week_dt.strftime('%Y%m')

### Sample data source

In [11]:
src_file_gender = 'data/All OS/MB MKS(All)_2025-12-28.csv'
df_gender = pd.read_csv(src_file_gender, skiprows=2)
df_gender.head(3)

Unnamed: 0,Country,TrueMove H-Mobile Market share (normalized),AIS-Mobile Market share (normalized),dtac-Mobile Market share (normalized),TOT3G-Mobile Market share (normalized),TrueMove H-Mobile Market size,AIS-Mobile Market size,dtac-Mobile Market size,TOT3G-Mobile Market size,TrueMove H-Mobile Total customers,AIS-Mobile Total customers,dtac-Mobile Total customers,TOT3G-Mobile Total customers
0,Thailand,31.55,46.91,21.53,0.01,66600000,66600000,66600000,66600000,21800000,32400000,14900000,4100


In [12]:
src_file_gender = 'data/All OS/MB MKS(Gender)_2025-12-28.csv'
df_gender = pd.read_csv(src_file_gender, skiprows=2)
df_gender.head(3)

Unnamed: 0,Country & Gender,TrueMove H-Mobile Market share (normalized),AIS-Mobile Market share (normalized),dtac-Mobile Market share (normalized),TOT3G-Mobile Market share (normalized),TrueMove H-Mobile Market size,AIS-Mobile Market size,dtac-Mobile Market size,TOT3G-Mobile Market size,TrueMove H-Mobile Total customers,AIS-Mobile Total customers,dtac-Mobile Total customers,TOT3G-Mobile Total customers
0,"Thailand, Female",31.29,47.5,21.21,0.01,33600000,33600000,33600000,33600000,10900000,16600000,7420000,2000
1,"Thailand, Male",31.82,46.3,21.87,0.01,32700000,32700000,32700000,32700000,10800000,15700000,7410000,2000


### Step 1 : Import Data Source

In [7]:
# ''' Source Files '''

# # 1. Specify the path to your CSV files
# src_path = './data' 

# # 2. Use glob to match all csv files in that folder
# all_files = glob.glob(os.path.join(src_path, "*.csv"))
# all_files

# # 3. Use a list comprehension to read each file and concat them
# # df = pd.concat((pd.read_csv(f, skiprows=2) for f in all_files), ignore_index=True)

# # 3. Use a list comprehension to read each file and concat them
# df = []

# for f in all_files:

#     # Read the file starting at row 3 (index 2)
#     df_temp = pd.read_csv(f, skiprows=2)

#     # Rename by referencing the first column index (0)
#     df_temp = df_temp.rename(columns={df_temp.columns[0]: 'Areas & Category'})

#     # Optional: add a column to track which file the data came from
#     df_temp['Source'] = os.path.basename(f)
    
#     # Append to our list
#     df.append(df_temp)

# # 4. Combine everything into one big DataFrame
# all_src_df = pd.concat(df, ignore_index=True)

# print(f"Successfully imported {len(all_files)} files.")

In [42]:
''' Source Files '''

# The recursive=True flag allows ** to match all sub-folders
src_path = './data/**/*.csv'
all_files = glob.glob(src_path, recursive=True)

# Use a list comprehension to read each file and concat them
df = []

for f in all_files:

    # Read the file starting at row 3 (index 2)
    df_temp = pd.read_csv(f, skiprows=2)

    # Rename by referencing the first column index (0)
    # df_temp = df_temp.rename(columns={df_temp.columns[0]: 'Areas & Category'})
    df_temp.columns = ['Areas & Category'
                       , 'TMH-MB MKS', 'AIS-MB MKS', 'DTAC-MB MKS', 'TOT-MB MKS'
                       , 'TMH-MB Market Size', 'AIS-MB Market Size', 'DTAC-MB Market Size', 'TOT-MB Market Size'
                       , 'TMH-MB Total Customer', 'AIS-MB Total Customer', 'DTAC-MB Total Customer', 'TOT-MB Total Customer']
                       #, 'Source']
    df_temp.drop(['AIS-MB Market Size', 'DTAC-MB Market Size', 'TOT-MB Market Size'], axis=1, inplace=True)
    df_temp.rename(columns={'TMH-MB Market Size': 'Total Market Size'}, inplace=True)

    # Goal	os.path method	pathlib method
    # Get Directory	os.path.dirname(p)	p.parent
    # Get Filename	os.path.basename(p)	p.name
    # Get Extension	os.path.splitext(p)[1]	p.suffix
    # Join Paths	os.path.join(dir, file)	dir / file
    
    # Optional: add a column to track which file the data came from
    df_temp['System'] = os.path.basename(os.path.dirname(f))
    df_temp['Source'] = os.path.basename(f)
    
    # Append to our list
    df.append(df_temp)

# Combine everything into one big DataFrame
all_src_df = pd.concat(df, ignore_index=True)

print(f"Successfully imported {len(all_files)} files.")

Successfully imported 45 files.


In [45]:
all_src_df.head(3)

Unnamed: 0,Areas & Category,TMH-MB MKS,AIS-MB MKS,DTAC-MB MKS,TOT-MB MKS,Total Market Size,TMH-MB Total Customer,AIS-MB Total Customer,DTAC-MB Total Customer,TOT-MB Total Customer,System,Source
0,"Amphoe Nong Wua So, Changwat Udon Thani, 13-17",25.0,63.57,11.43,--,1300,310,900,200,--,All OS,MB MKS by district(Age)_2025-12-28.csv
1,"Amphoe Nong Wua So, Changwat Udon Thani, 18-24",22.22,67.86,9.92,--,2200,510,1600,240,--,All OS,MB MKS by district(Age)_2025-12-28.csv
2,"Amphoe Nong Wua So, Changwat Udon Thani, 25-34",23.05,65.73,11.22,--,5000,1200,3300,660,--,All OS,MB MKS by district(Age)_2025-12-28.csv


In [46]:
''' Convert Data '''

txn_df = all_src_df.copy()
# txn_df = txn_df.loc[txn_df['Source']=='MB MKS(All)_2025-12-28.csv'].copy()
# txn_df = txn_df.loc[txn_df['Source'].isin(['MB MKS(All)_2025-12-28.csv', 'MB MKS(Gender)_2025-12-28.csv', 'MB MKS by province(All)_2025-12-28.csv', 'MB MKS by province(Gender)_2025-12-28.csv'])].copy()

# txn_df.columns = ['Areas & Category'
#     , 'TMH-MB MKS', 'AIS-MB MKS', 'DTAC-MB MKS', 'TOT-MB MKS'
#     , 'TMH-MB Market Size', 'AIS-MB Market Size', 'DTAC-MB Market Size', 'TOT-MB Market Size'
#     , 'TMH-MB Total Customer', 'AIS-MB Total Customer', 'DTAC-MB Total Customer', 'TOT-MB Total Customer'
#     , 'Source']
# txn_df.drop(['AIS-MB Market Size', 'DTAC-MB Market Size', 'TOT-MB Market Size'], axis=1, inplace=True)
# txn_df.rename(columns={'TMH-MB Market Size': 'Total Market Size'}, inplace=True)

txn_df['Areas & Category'] = np.where(txn_df['Source'].str.contains('All'), txn_df['Areas & Category'] + ', All', txn_df['Areas & Category'])

# # Use expand=True to turn the result into a DataFrame
# new_cols = txn_df['Areas & Category'].str.split(', ', expand=True)

# Splits only at the last comma
txn_df[['AREA_META', 'Category']] = txn_df['Areas & Category'].str.rsplit(', ', n=1, expand=True)
# txn_df['AREA_META'] = txn_df['AREA_META'].str.replace('Changwat ', '')

def metric_group(x):
    x_input = x
    metric = ''
    if re.search('All', x_input): metric = 'All'
    elif re.search('Age', x_input): metric = 'Age'
    elif re.search('Connection', x_input): metric = 'Connection'
    elif re.search('Device', x_input): metric = 'Device'
    elif re.search('Gender', x_input): metric = 'Gender'
    return metric

def area_type(x):
    x_input = x
    area_type = ''
    if re.search('province', x_input): area_type = 'Province'
    elif re.search('district', x_input): area_type = 'District'
    else: area_type = 'Nationwide'
    return area_type

# Pass the function name (without parentheses) to .apply()
txn_df['Metric'] = txn_df['Source'].apply(metric_group)
txn_df['Area_type'] = txn_df['Source'].apply(area_type)

txn_df#.head(3)
# txn_df[txn_df['Source'].str.contains('All')].head(3)

Unnamed: 0,Areas & Category,TMH-MB MKS,AIS-MB MKS,DTAC-MB MKS,TOT-MB MKS,Total Market Size,TMH-MB Total Customer,AIS-MB Total Customer,DTAC-MB Total Customer,TOT-MB Total Customer,System,Source,AREA_META,Category,Metric,Area_type
0,"Amphoe Nong Wua So, Changwat Udon Thani, 13-17",25,63.57,11.43,--,1300,310,900,200,--,All OS,MB MKS by district(Age)_2025-12-28.csv,"Amphoe Nong Wua So, Changwat Udon Thani",13-17,Age,District
1,"Amphoe Nong Wua So, Changwat Udon Thani, 18-24",22.22,67.86,9.92,--,2200,510,1600,240,--,All OS,MB MKS by district(Age)_2025-12-28.csv,"Amphoe Nong Wua So, Changwat Udon Thani",18-24,Age,District
2,"Amphoe Nong Wua So, Changwat Udon Thani, 25-34",23.05,65.73,11.22,--,5000,1200,3300,660,--,All OS,MB MKS by district(Age)_2025-12-28.csv,"Amphoe Nong Wua So, Changwat Udon Thani",25-34,Age,District
3,"Amphoe Nong Wua So, Changwat Udon Thani, 35-44",24.50,60.81,14.70,--,3300,950,2000,350,--,All OS,MB MKS by district(Age)_2025-12-28.csv,"Amphoe Nong Wua So, Changwat Udon Thani",35-44,Age,District
4,"Amphoe Nong Wua So, Changwat Udon Thani, 45-54",24.10,65.47,10.42,--,3100,790,2100,320,--,All OS,MB MKS by district(Age)_2025-12-28.csv,"Amphoe Nong Wua So, Changwat Udon Thani",45-54,Age,District
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
71413,"Thailand, $300 - $399",39.95,40.66,19.39,--,6820000,2190000,3170000,1590000,520,iOS,MB MKS(Device)_2025-12-28_iOS.csv,Thailand,$300 - $399,Device,Nationwide
71414,"Thailand, $400 - $599",42.86,37.85,19.29,--,7780000,2710000,3600000,1650000,790,iOS,MB MKS(Device)_2025-12-28_iOS.csv,Thailand,$400 - $599,Device,Nationwide
71415,"Thailand, $600 - $999",35.87,46.31,17.82,0,9330000,3430000,4450000,1790000,330,iOS,MB MKS(Device)_2025-12-28_iOS.csv,Thailand,$600 - $999,Device,Nationwide
71416,"Thailand, Female",35.26,46.9,17.84,0,33600000,10900000,16600000,7420000,2000,iOS,MB MKS(Gender)_2025-12-28_iOS.csv,Thailand,Female,Gender,Nationwide


In [None]:
# ''' Convert Data '''

# all_src_df.columns = ['Areas & Category'
#     , 'TMH-MB MKS', 'AIS-MB MKS', 'DTAC-MB MKS', 'TOT-MB MKS'
#     , 'TMH-MB Market Size', 'AIS-MB Market Size', 'DTAC-MB Market Size', 'TOT-MB Market Size'
#     , 'TMH-MB Total Customer', 'AIS-MB Total Customer', 'DTAC-MB Total Customer', 'TOT-MB Total Customer'
#     , 'Source']

# all_src_df.drop(['AIS-MB Market Size', 'DTAC-MB Market Size', 'TOT-MB Market Size'], axis=1, inplace=True)
# all_src_df.rename(columns={'TMH-MB Market Size': 'Total Market Size'}, inplace=True)

# all_src_df['Areas & Category'] = np.where(all_src_df['Source'].str.contains('All'), all_src_df['Areas & Category'] + ', All', all_src_df['Areas & Category'])

# # # If 'ID' starts with "A", add "_Local", otherwise keep it as is
# # all_src_df['Areas & Category'] = np.where(
# #     all_src_df['Source'].str.contains('All', regex=True, na=False), 
# #     all_src_df['Areas & Category'] + ', All', 
# #     all_src_df['Areas & Category']
# #     )

# # Use expand=True to turn the result into a DataFrame
# new_cols = all_src_df['Areas & Category'].str.split(', ', expand=True)

# # Assign those back to your main DataFrame
# all_src_df[['AREA_META', 'Category']] = new_cols
# all_src_df['AREA_META'] = all_src_df['AREA_META'].str.replace('Changwat ', '')

# def metric_group(x):
#     x_input = x
#     metric = ''
#     if re.search('Age', x_input): metric = 'Age'
#     elif re.search('Connection', x_input): metric = 'Connection'
#     elif re.search('Device', x_input): metric = 'Device'
#     elif re.search('Gender', x_input): metric = 'Gender'
#     return metric

# def area_type(x):
#     x_input = x
#     area_type = ''
#     if re.search('province', x_input): area_type = 'Provinces'
#     elif re.search('district', x_input): area_type = 'District'
#     else: area_type = 'Nationwide'
#     return area_type

# # Pass the function name (without parentheses) to .apply()
# all_src_df['Metric'] = all_src_df['Source'].apply(metric_group)
# all_src_df['Area_type'] = all_src_df['Source'].apply(area_type)

# all_src_df.head(3)

In [48]:
''' Aggregrate '''

# Replace values in multuple columns
txn_df.iloc[:, 1:10] = txn_df.iloc[:, 1:10].replace('--', '0')
# txn_df.iloc[:, 5:10] = txn_df.iloc[:, 5:10].replace(',', '')
txn_df['Total Market Size'] = txn_df['Total Market Size'].str.replace(',', '')
txn_df['TMH-MB Total Customer'] = txn_df['TMH-MB Total Customer'].str.replace(',', '')
txn_df['AIS-MB Total Customer'] = txn_df['AIS-MB Total Customer'].str.replace(',', '')
txn_df['DTAC-MB Total Customer'] = txn_df['DTAC-MB Total Customer'].str.replace(',', '')
txn_df['TOT-MB Total Customer'] = txn_df['TOT-MB Total Customer'].str.replace(',', '')

# txn_df.iloc[:, 1:4] = txn_df.iloc[:, 1:4].astype(float)
# txn_df.iloc[:, 1:5] = txn_df.iloc[:, 1:5].apply(pd.to_numeric, errors='coerce')

# # Create new measure
# txn_df['TMH & DTAC-MB MKS'] = txn_df['TMH-MB MKS'] + txn_df['DTAC-MB MKS']
# txn_df['TMH & DTAC-MB Total Customer'] = txn_df['TMH-MB Total Customer'] + txn_df['DTAC-MB Total Customer']

txn_df#.head(3)

Unnamed: 0,Areas & Category,TMH-MB MKS,AIS-MB MKS,DTAC-MB MKS,TOT-MB MKS,Total Market Size,TMH-MB Total Customer,AIS-MB Total Customer,DTAC-MB Total Customer,TOT-MB Total Customer,System,Source,AREA_META,Category,Metric,Area_type
0,"Amphoe Nong Wua So, Changwat Udon Thani, 13-17",25,63.57,11.43,0,1300,310,900,200,0,All OS,MB MKS by district(Age)_2025-12-28.csv,"Amphoe Nong Wua So, Changwat Udon Thani",13-17,Age,District
1,"Amphoe Nong Wua So, Changwat Udon Thani, 18-24",22.22,67.86,9.92,0,2200,510,1600,240,0,All OS,MB MKS by district(Age)_2025-12-28.csv,"Amphoe Nong Wua So, Changwat Udon Thani",18-24,Age,District
2,"Amphoe Nong Wua So, Changwat Udon Thani, 25-34",23.05,65.73,11.22,0,5000,1200,3300,660,0,All OS,MB MKS by district(Age)_2025-12-28.csv,"Amphoe Nong Wua So, Changwat Udon Thani",25-34,Age,District
3,"Amphoe Nong Wua So, Changwat Udon Thani, 35-44",24.50,60.81,14.70,0,3300,950,2000,350,0,All OS,MB MKS by district(Age)_2025-12-28.csv,"Amphoe Nong Wua So, Changwat Udon Thani",35-44,Age,District
4,"Amphoe Nong Wua So, Changwat Udon Thani, 45-54",24.10,65.47,10.42,0,3100,790,2100,320,0,All OS,MB MKS by district(Age)_2025-12-28.csv,"Amphoe Nong Wua So, Changwat Udon Thani",45-54,Age,District
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
71413,"Thailand, $300 - $399",39.95,40.66,19.39,0,6820000,2190000,3170000,1590000,,iOS,MB MKS(Device)_2025-12-28_iOS.csv,Thailand,$300 - $399,Device,Nationwide
71414,"Thailand, $400 - $599",42.86,37.85,19.29,0,7780000,2710000,3600000,1650000,,iOS,MB MKS(Device)_2025-12-28_iOS.csv,Thailand,$400 - $599,Device,Nationwide
71415,"Thailand, $600 - $999",35.87,46.31,17.82,0,9330000,3430000,4450000,1790000,,iOS,MB MKS(Device)_2025-12-28_iOS.csv,Thailand,$600 - $999,Device,Nationwide
71416,"Thailand, Female",35.26,46.9,17.84,0,33600000,10900000,16600000,7420000,2000,iOS,MB MKS(Gender)_2025-12-28_iOS.csv,Thailand,Female,Gender,Nationwide


In [49]:
txn_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 71418 entries, 0 to 71417
Data columns (total 16 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   Areas & Category        71418 non-null  object
 1   TMH-MB MKS              71418 non-null  object
 2   AIS-MB MKS              71418 non-null  object
 3   DTAC-MB MKS             71418 non-null  object
 4   TOT-MB MKS              71418 non-null  object
 5   Total Market Size       71418 non-null  object
 6   TMH-MB Total Customer   71418 non-null  object
 7   AIS-MB Total Customer   71418 non-null  object
 8   DTAC-MB Total Customer  71418 non-null  object
 9   TOT-MB Total Customer   71388 non-null  object
 10  System                  71418 non-null  object
 11  Source                  71418 non-null  object
 12  AREA_META               71418 non-null  object
 13  Category                71418 non-null  object
 14  Metric                  71418 non-null  object
 15  Ar

### Step 2 : Areas Mapping

In [303]:
# ''' DIM_MOOC_AREA '''
# mooc_file = '../../CFW/data/dim_mooc_area.csv'
# mooc_cols = ['ZONE_TYPE', 'TEAM_CODE', 'ORGID_G', 'TDS_SGMD', 'ORGID_H', 'HOP_HINT', 'TDS_PROVINCE', 'PROVINCE_ENG', 'PROVINCE_TH', 'ORGID_HH', 'D_CLUSTER', 'DISTRICT_EN', 'CCAATT', 'REMARK']
# mooc_df = pd.read_csv(mooc_file, usecols=mooc_cols)
# mooc_df = mooc_df.loc[(mooc_df['REMARK']!='Dummy') & (mooc_df['TEAM_CODE']!='ไม่ระบุ') & (mooc_df['HOP_HINT']!='True Corp')]

# ''' Provinces Org '''
# mooc_df['ORGID_HH'] = mooc_df['ORGID_HH'].astype(str)
# mooc_df.drop(['ORGID_HH', 'D_CLUSTER', 'DISTRICT_EN', 'CCAATT', 'TEAM_CODE', 'REMARK'], axis=1, inplace=True)
# mooc_df = mooc_df.drop_duplicates()

# # ''' Districts Org '''
# # mooc_df['CCAA'] = mooc_df['CCAATT'].astype(str).str[:4]
# # mooc_df['ORGID_HH'] = mooc_df['ORGID_HH'].astype(str)
# # mooc_df.drop(['CCAATT', 'TEAM_CODE', 'REMARK'], axis=1, inplace=True)
# # mooc_df = mooc_df.drop_duplicates()

# mooc_df.loc[mooc_df['ORGID_H']=='003']
# # mooc_df.loc[mooc_df['ORGID_H'].isin(['003', '004'])]
# # mooc_df#.head(3)

# # mooc_df.info()

In [50]:
''' Nationwide '''

nationwide_df = txn_df.loc[txn_df['Area_type']=='Nationwide'].copy()

# Initialize them all with NaN (Not a Number)
# nationwide_df[['ZONE_TYPE', 'EEC_FLAG', 'ORGID_G', 'TDS_SGMD', 'PROVINCE_CD', 'PROVINCE_ENG']] = None
nationwide_new_cols = ['ZONE_TYPE', 'EEC_FLAG', 'ORGID_G', 'TDS_SGMD', 'PROVINCE_CD', 'PROVINCE_ENG']
nationwide_df[nationwide_new_cols] = np.nan

nationwide_df.head(3)

Unnamed: 0,Areas & Category,TMH-MB MKS,AIS-MB MKS,DTAC-MB MKS,TOT-MB MKS,Total Market Size,TMH-MB Total Customer,AIS-MB Total Customer,DTAC-MB Total Customer,TOT-MB Total Customer,...,AREA_META,Category,Metric,Area_type,ZONE_TYPE,EEC_FLAG,ORGID_G,TDS_SGMD,PROVINCE_CD,PROVINCE_ENG
23782,"Thailand, 13-17",34.23,46.21,19.57,0,2870000,1060000,1440000,611000,0,...,Thailand,13-17,Age,Nationwide,,,,,,
23783,"Thailand, 18-24",33.3,42.59,24.11,0,10500000,3710000,4740000,2680000,160,...,Thailand,18-24,Age,Nationwide,,,,,,
23784,"Thailand, 25-34",32.72,44.57,22.71,0,21700000,7360000,10000000,5110000,770,...,Thailand,25-34,Age,Nationwide,,,,,,


In [51]:
''' Provinces '''

province_org_file = '../../CFW/data/Meta Area Mapping.xlsx'
province_org_sheet = 'Provinces'
province_org_df = pd.read_excel(province_org_file, sheet_name=province_org_sheet)

province_df = pd.merge(txn_df.loc[txn_df['Area_type']=='Province'], province_org_df, how='left', on='AREA_META')
province_df.head(3)

Unnamed: 0,Areas & Category,TMH-MB MKS,AIS-MB MKS,DTAC-MB MKS,TOT-MB MKS,Total Market Size,TMH-MB Total Customer,AIS-MB Total Customer,DTAC-MB Total Customer,TOT-MB Total Customer,...,AREA_META,Category,Metric,Area_type,ZONE_TYPE,EEC_FLAG,ORGID_G,TDS_SGMD,PROVINCE_CD,PROVINCE_ENG
0,"Changwat Chai Nat, 13-17",37.19,50.2,12.61,0,11000,4700,6100,1600,0,...,Changwat Chai Nat,13-17,Age,Province,UPC,N,GX7,Central-West,18,Chai Nat
1,"Changwat Chai Nat, 18-24",38.36,46.81,14.83,0,31000,13000,16000,5100,0,...,Changwat Chai Nat,18-24,Age,Province,UPC,N,GX7,Central-West,18,Chai Nat
2,"Changwat Chai Nat, 25-34",37.59,49.03,13.38,0,67000,26000,34000,9300,0,...,Changwat Chai Nat,25-34,Age,Province,UPC,N,GX7,Central-West,18,Chai Nat


In [52]:
''' Districts '''

district_org_file = '../../CFW/data/Meta Area Mapping.xlsx'
district_org_sheet = 'Districts'
district_org_df = pd.read_excel(district_org_file, sheet_name=district_org_sheet)

district_df = pd.merge(txn_df.loc[txn_df['Area_type']=='District'], district_org_df, how='left', on='AREA_META')
district_df.head(3)

Unnamed: 0,Areas & Category,TMH-MB MKS,AIS-MB MKS,DTAC-MB MKS,TOT-MB MKS,Total Market Size,TMH-MB Total Customer,AIS-MB Total Customer,DTAC-MB Total Customer,TOT-MB Total Customer,...,TDS_SGMD,ORGID_H,HOP_HINT,PROVINCE_CD,PROVINCE_ENG,ORGID_HH,D_CLUSTER,CCAA,DISTRICT_EN,DISTRICT_UNIQUE
0,"Amphoe Nong Wua So, Changwat Udon Thani, 13-17",25.0,63.57,11.43,0,1300,310,900,200,0,...,Northeast1,39X,"UDON THANI, NONG BUA LAM PHU",41,Udon Thani,909088,UDON THANI,4103,Nong Wua So,Nong Wua So
1,"Amphoe Nong Wua So, Changwat Udon Thani, 18-24",22.22,67.86,9.92,0,2200,510,1600,240,0,...,Northeast1,39X,"UDON THANI, NONG BUA LAM PHU",41,Udon Thani,909088,UDON THANI,4103,Nong Wua So,Nong Wua So
2,"Amphoe Nong Wua So, Changwat Udon Thani, 25-34",23.05,65.73,11.22,0,5000,1200,3300,660,0,...,Northeast1,39X,"UDON THANI, NONG BUA LAM PHU",41,Udon Thani,909088,UDON THANI,4103,Nong Wua So,Nong Wua So


In [53]:
''' Final Data '''

final_df = pd.concat([nationwide_df, province_df, district_df], ignore_index=True)

# Convert 'PROVINCE_CD' and 'ORGID_HH' and 'CCAA' to strings
final_df = final_df.astype({'PROVINCE_CD' : str, 'ORGID_HH': str, 'CCAA': str})

final_df

Unnamed: 0,Areas & Category,TMH-MB MKS,AIS-MB MKS,DTAC-MB MKS,TOT-MB MKS,Total Market Size,TMH-MB Total Customer,AIS-MB Total Customer,DTAC-MB Total Customer,TOT-MB Total Customer,...,TDS_SGMD,PROVINCE_CD,PROVINCE_ENG,ORGID_H,HOP_HINT,ORGID_HH,D_CLUSTER,CCAA,DISTRICT_EN,DISTRICT_UNIQUE
0,"Thailand, 13-17",34.23,46.21,19.57,0,2870000,1060000,1440000,611000,0,...,,,,,,,,,,
1,"Thailand, 18-24",33.3,42.59,24.11,0,10500000,3710000,4740000,2680000,160,...,,,,,,,,,,
2,"Thailand, 25-34",32.72,44.57,22.71,0,21700000,7360000,10000000,5110000,770,...,,,,,,,,,,
3,"Thailand, 35-44",31.94,47.5,20.55,0,13300000,4370000,6500000,2820000,620,...,,,,,,,,,,
4,"Thailand, 45-54",29.35,51.18,19.46,0.01,8860000,2670000,4650000,1770000,840,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
71413,"Amphoe Chatturat, Changwat Chaiyaphum, Male",43.54,48.10,8.35,0,21000,7500,13000,1400,0,...,Northeast1,36.0,Chaiyaphum,36X,"CHAIYAPHUM, LOEI",909079.0,CHAIYAPHUM,3606.0,Chatturat,Chatturat
71414,"Amphoe Sida, Changwat Nakhon Ratchasima, Female",40.17,59.83,0,0,4300,1400,2600,440,0,...,Northeast2,30.0,Nakhon Ratchasima,30X,NAKHON RATCHASIMA,909083.0,NAKHON RATCHASIMA,3031.0,Sida,Sida
71415,"Amphoe Sida, Changwat Nakhon Ratchasima, Male",36.92,63.08,0,0,3700,1000,2500,390,0,...,Northeast2,30.0,Nakhon Ratchasima,30X,NAKHON RATCHASIMA,909083.0,NAKHON RATCHASIMA,3031.0,Sida,Sida
71416,"Amphoe Ban Thi, Changwat Lamphun, Female",38.32,45.33,16.36,0,6800,2500,2700,1700,0,...,North,51.0,Lamphun,52X,"LAMPANG, LAMPHUN",904067.0,LAMPHUN,5107.0,Ban Thi,Ban Thi


In [54]:
final_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 71418 entries, 0 to 71417
Data columns (total 29 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   Areas & Category        71418 non-null  object
 1   TMH-MB MKS              71418 non-null  object
 2   AIS-MB MKS              71418 non-null  object
 3   DTAC-MB MKS             71418 non-null  object
 4   TOT-MB MKS              71418 non-null  object
 5   Total Market Size       71418 non-null  object
 6   TMH-MB Total Customer   71418 non-null  object
 7   AIS-MB Total Customer   71418 non-null  object
 8   DTAC-MB Total Customer  71418 non-null  object
 9   TOT-MB Total Customer   71388 non-null  object
 10  System                  71418 non-null  object
 11  Source                  71418 non-null  object
 12  AREA_META               71418 non-null  object
 13  Category                71418 non-null  object
 14  Metric                  71418 non-null  object
 15  Ar

### Step 3 : Export Result

In [55]:
''' Generate CSV file '''

op_dir = 'output'
op_file = f'mobile_mks_{str_curr_dt}'

final_df.to_csv(f'{op_dir}/{op_file}.csv', index=False, encoding='utf-8')
print(f'\nGenerate "{op_file}.csv" successfully')


Generate "mobile_mks_20260202.csv" successfully
