# * Mobile Market Share from Meta

In [294]:
import os
import sys
import glob
import configparser
import datetime as dt
import pandas as pd
import numpy as np
import re

curr_dt = dt.datetime.now().date()
next_week_dt = curr_dt + dt.timedelta(days=7)
str_curr_dt = curr_dt.strftime('%Y%m%d')
str_next_week_dt = next_week_dt.strftime('%Y%m%d')
yyyymm_curr = curr_dt.strftime('%Y%m')
yyyymm_next_week = next_week_dt.strftime('%Y%m')

### Sample data source

In [295]:
src_file_age = 'data/MB MKS by province(Age)_2025-12-28_All.csv'
df_age = pd.read_csv(src_file_age, skiprows=2)
df_age.head(3)

Unnamed: 0,Provinces & Age,TrueMove H-Mobile Market share (normalized),AIS-Mobile Market share (normalized),dtac-Mobile Market share (normalized),TOT3G-Mobile Market share (normalized),TrueMove H-Mobile Market size,AIS-Mobile Market size,dtac-Mobile Market size,TOT3G-Mobile Market size,TrueMove H-Mobile Total customers,AIS-Mobile Total customers,dtac-Mobile Total customers,TOT3G-Mobile Total customers
0,"Changwat Chai Nat, 13-17",37.19,50.2,12.61,--,11000,11000,11000,11000,4700,6100,1600,--
1,"Changwat Chai Nat, 18-24",38.36,46.81,14.83,--,31000,31000,31000,31000,13000,16000,5100,--
2,"Changwat Chai Nat, 25-34",37.59,49.03,13.38,--,67000,67000,67000,67000,26000,34000,9300,--


In [296]:
src_file_connect = 'data/MB MKS by province(Connection)_2025-12-28_All.csv'
df_connect = pd.read_csv(src_file_connect, skiprows=2)
df_connect.head(3)

Unnamed: 0,Provinces & Connection type,TrueMove H-Mobile Market share (normalized),AIS-Mobile Market share (normalized),dtac-Mobile Market share (normalized),TOT3G-Mobile Market share (normalized),TrueMove H-Mobile Market size,AIS-Mobile Market size,dtac-Mobile Market size,TOT3G-Mobile Market size,TrueMove H-Mobile Total customers,AIS-Mobile Total customers,dtac-Mobile Total customers,TOT3G-Mobile Total customers
0,"Changwat Chai Nat, 2G",--,--,--,--,--,--,--,--,--,--,--,--
1,"Changwat Chai Nat, 3G",27.51,60.14,12.35,--,4000,4000,4000,4000,1100,2600,460,--
2,"Changwat Chai Nat, 4G",30.28,56.15,13.57,--,125000,125000,125000,125000,39000,71000,18000,--


In [297]:
src_file_device = 'data/MB MKS by province(Device)_2025-12-28_All.csv'
df_device = pd.read_csv(src_file_device, skiprows=2)
df_device.head(3)

Unnamed: 0,Provinces & Device price (USD),TrueMove H-Mobile Market share (normalized),AIS-Mobile Market share (normalized),dtac-Mobile Market share (normalized),TOT3G-Mobile Market share (normalized),TrueMove H-Mobile Market size,AIS-Mobile Market size,dtac-Mobile Market size,TOT3G-Mobile Market size,TrueMove H-Mobile Total customers,AIS-Mobile Total customers,dtac-Mobile Total customers,TOT3G-Mobile Total customers
0,"Changwat Chai Nat, Price unknown",38.93,48.83,12.24,--,7200,7200,7200,7200,2900,3600,1000,--
1,"Changwat Chai Nat, $1,000 or more",40.1,46.67,13.23,--,36000,36000,36000,36000,15000,18000,5100,--
2,"Changwat Chai Nat, Less than $100",29.66,58.08,12.26,--,11000,11000,11000,11000,3200,6400,1400,--


In [298]:
src_file_gender = 'data/MB MKS by province(Gender)_2025-12-28_All.csv'
df_gender = pd.read_csv(src_file_gender, skiprows=2)
df_gender.head(3)

Unnamed: 0,Provinces & Gender,TrueMove H-Mobile Market share (normalized),AIS-Mobile Market share (normalized),dtac-Mobile Market share (normalized),TOT3G-Mobile Market share (normalized),TrueMove H-Mobile Market size,AIS-Mobile Market size,dtac-Mobile Market size,TOT3G-Mobile Market size,TrueMove H-Mobile Total customers,AIS-Mobile Total customers,dtac-Mobile Total customers,TOT3G-Mobile Total customers
0,"Changwat Chai Nat, Female",34.65,51.59,13.76,--,112000,112000,112000,112000,40000,60000,16000,--
1,"Changwat Chai Nat, Male",33.26,53.48,13.27,--,108000,108000,108000,108000,37000,60000,15000,--
2,"Changwat Surin, Female",28.66,60.78,10.53,0.03,396000,396000,396000,396000,119000,252000,45000,--


### Step 1 : Import Data Source

In [299]:
''' Source Files '''

# 1. Specify the path to your CSV files
src_path = './data' 

# 2. Use glob to match all csv files in that folder
all_files = glob.glob(os.path.join(src_path, "*.csv"))
all_files

# 3. Use a list comprehension to read each file and concat them
# df = pd.concat((pd.read_csv(f, skiprows=2) for f in all_files), ignore_index=True)

# 3. Use a list comprehension to read each file and concat them
df = []

for f in all_files:

    # Read the file starting at row 3 (index 2)
    df_temp = pd.read_csv(f, skiprows=2)

    # Rename by referencing the first column index (0)
    df_temp = df_temp.rename(columns={df_temp.columns[0]: 'Areas & Category'})

    # Optional: add a column to track which file the data came from
    df_temp['Source'] = os.path.basename(f)
    
    # Append to our list
    df.append(df_temp)

# 4. Combine everything into one big DataFrame
all_src_df = pd.concat(df, ignore_index=True)

print(f"Successfully imported {len(all_files)} files.")

Successfully imported 8 files.


In [300]:
''' Convert Data '''

all_src_df.columns = ['Areas & Category'
    , 'TMH-MB MKS', 'AIS-MB MKS', 'DTAC-MB MKS', 'TOT-MB MKS'
    , 'TMH-MB Market Size', 'AIS-MB Market Size', 'DTAC-MB Market Size', 'TOT-MB Market Size'
    , 'TMH-MB Total Customer', 'AIS-MB Total Customer', 'DTAC-MB Total Customer', 'TOT-MB Total Customer'
    , 'Source']

all_src_df.drop(['AIS-MB Market Size', 'DTAC-MB Market Size', 'TOT-MB Market Size'], axis=1, inplace=True)
all_src_df.rename(columns={'TMH-MB Market Size': 'Total Market Size'}, inplace=True)

# Use expand=True to turn the result into a DataFrame
new_cols = all_src_df['Areas & Category'].str.split(', ', expand=True)

# Assign those back to your main DataFrame
all_src_df[['PROVINCE_META', 'Category']] = new_cols
all_src_df['PROVINCE_META'] = all_src_df['PROVINCE_META'].str.replace('Changwat ', '')

def metric_group(x):
    x_input = x
    metric = ''
    if re.search('Age', x_input): metric = 'Age'
    elif re.search('Connection', x_input): metric = 'Connection'
    elif re.search('Device', x_input): metric = 'Device'
    elif re.search('Gender', x_input): metric = 'Gender'
    return metric

def area_type(x):
    x_input = x
    area_type = ''
    if re.search('province', x_input): area_type = 'Provinces'
    elif re.search('district', x_input): area_type = 'District'
    else: area_type = 'Nationwide'
    return area_type

# Pass the function name (without parentheses) to .apply()
all_src_df['Metric'] = all_src_df['Source'].apply(metric_group)
all_src_df['Area_type'] = all_src_df['Source'].apply(area_type)

all_src_df.head(3)

Unnamed: 0,Areas & Category,TMH-MB MKS,AIS-MB MKS,DTAC-MB MKS,TOT-MB MKS,Total Market Size,TMH-MB Total Customer,AIS-MB Total Customer,DTAC-MB Total Customer,TOT-MB Total Customer,Source,PROVINCE_META,Category,Metric,Area_type
0,"Changwat Chai Nat, 13-17",37.19,50.2,12.61,--,11000,4700,6100,1600,--,MB MKS by province(Age)_2025-12-28_All.csv,Chai Nat,13-17,Age,Provinces
1,"Changwat Chai Nat, 18-24",38.36,46.81,14.83,--,31000,13000,16000,5100,--,MB MKS by province(Age)_2025-12-28_All.csv,Chai Nat,18-24,Age,Provinces
2,"Changwat Chai Nat, 25-34",37.59,49.03,13.38,--,67000,26000,34000,9300,--,MB MKS by province(Age)_2025-12-28_All.csv,Chai Nat,25-34,Age,Provinces


In [301]:
''' Aggregrate '''

# Replace values in multuple columns
all_src_df.iloc[:, 1:10] = all_src_df.iloc[:, 1:10].replace('--', '0')
# all_src_df.iloc[:, 5:10] = all_src_df.iloc[:, 5:10].replace(',', '')
all_src_df['Total Market Size'] = all_src_df['Total Market Size'].str.replace(',', '')
all_src_df['TMH-MB Total Customer'] = all_src_df['TMH-MB Total Customer'].str.replace(',', '')
all_src_df['AIS-MB Total Customer'] = all_src_df['AIS-MB Total Customer'].str.replace(',', '')
all_src_df['DTAC-MB Total Customer'] = all_src_df['DTAC-MB Total Customer'].str.replace(',', '')
all_src_df['TOT-MB Total Customer'] = all_src_df['TOT-MB Total Customer'].str.replace(',', '')

# all_src_df.iloc[:, 1:4] = all_src_df.iloc[:, 1:4].astype(float)
# all_src_df.iloc[:, 1:5] = all_src_df.iloc[:, 1:5].apply(pd.to_numeric, errors='coerce')

# # Create new measure
# all_src_df['TMH & DTAC-MB MKS'] = all_src_df['TMH-MB MKS'] + all_src_df['DTAC-MB MKS']
# all_src_df['TMH & DTAC-MB Total Customer'] = all_src_df['TMH-MB Total Customer'] + all_src_df['DTAC-MB Total Customer']

all_src_df#.head(3)

Unnamed: 0,Areas & Category,TMH-MB MKS,AIS-MB MKS,DTAC-MB MKS,TOT-MB MKS,Total Market Size,TMH-MB Total Customer,AIS-MB Total Customer,DTAC-MB Total Customer,TOT-MB Total Customer,Source,PROVINCE_META,Category,Metric,Area_type
0,"Changwat Chai Nat, 13-17",37.19,50.2,12.61,0,11000,4700,6100,1600,0,MB MKS by province(Age)_2025-12-28_All.csv,Chai Nat,13-17,Age,Provinces
1,"Changwat Chai Nat, 18-24",38.36,46.81,14.83,0,31000,13000,16000,5100,0,MB MKS by province(Age)_2025-12-28_All.csv,Chai Nat,18-24,Age,Provinces
2,"Changwat Chai Nat, 25-34",37.59,49.03,13.38,0,67000,26000,34000,9300,0,MB MKS by province(Age)_2025-12-28_All.csv,Chai Nat,25-34,Age,Provinces
3,"Changwat Chai Nat, 35-44",35.03,52.19,12.78,0,43000,15000,22000,5500,0,MB MKS by province(Age)_2025-12-28_All.csv,Chai Nat,35-44,Age,Provinces
4,"Changwat Chai Nat, 45-54",30.87,55.07,14.06,0,30000,9600,17000,4200,0,MB MKS by province(Age)_2025-12-28_All.csv,Chai Nat,45-54,Age,Provinces
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1789,"Thailand, $300 - $399",31.55,45.53,22.91,0.01,6820000,2190000,3170000,1590000,,MB MKS(Device)_2025-12-28.csv,Thailand,$300 - $399,Device,Nationwide
1790,"Thailand, $400 - $599",34.08,45.1,20.81,0.01,7780000,2710000,3590000,1660000,,MB MKS(Device)_2025-12-28.csv,Thailand,$400 - $599,Device,Nationwide
1791,"Thailand, $600 - $999",35.42,46.06,18.52,0,9330000,3430000,4450000,1790000,,MB MKS(Device)_2025-12-28.csv,Thailand,$600 - $999,Device,Nationwide
1792,"Thailand, Female",31.29,47.5,21.21,0.01,33600000,10900000,16600000,7420000,2000,MB MKS(Gender)_2025-12-28.csv,Thailand,Female,Gender,Nationwide


In [302]:
all_src_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1794 entries, 0 to 1793
Data columns (total 15 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   Areas & Category        1794 non-null   object
 1   TMH-MB MKS              1794 non-null   object
 2   AIS-MB MKS              1794 non-null   object
 3   DTAC-MB MKS             1794 non-null   object
 4   TOT-MB MKS              1794 non-null   object
 5   Total Market Size       1794 non-null   object
 6   TMH-MB Total Customer   1794 non-null   object
 7   AIS-MB Total Customer   1794 non-null   object
 8   DTAC-MB Total Customer  1794 non-null   object
 9   TOT-MB Total Customer   1784 non-null   object
 10  Source                  1794 non-null   object
 11  PROVINCE_META           1794 non-null   object
 12  Category                1794 non-null   object
 13  Metric                  1794 non-null   object
 14  Area_type               1794 non-null   object
dtypes: o

### Step 2 : Areas Mapping

In [303]:
# ''' DIM_MOOC_AREA '''
# mooc_file = '../../CFW/data/dim_mooc_area.csv'
# mooc_cols = ['ZONE_TYPE', 'TEAM_CODE', 'ORGID_G', 'TDS_SGMD', 'ORGID_H', 'HOP_HINT', 'TDS_PROVINCE', 'PROVINCE_ENG', 'PROVINCE_TH', 'ORGID_HH', 'D_CLUSTER', 'DISTRICT_EN', 'CCAATT', 'REMARK']
# mooc_df = pd.read_csv(mooc_file, usecols=mooc_cols)
# mooc_df = mooc_df.loc[(mooc_df['REMARK']!='Dummy') & (mooc_df['TEAM_CODE']!='ไม่ระบุ') & (mooc_df['HOP_HINT']!='True Corp')]

# ''' Provinces Org '''
# mooc_df['ORGID_HH'] = mooc_df['ORGID_HH'].astype(str)
# mooc_df.drop(['ORGID_HH', 'D_CLUSTER', 'DISTRICT_EN', 'CCAATT', 'TEAM_CODE', 'REMARK'], axis=1, inplace=True)
# mooc_df = mooc_df.drop_duplicates()

# # ''' Districts Org '''
# # mooc_df['CCAA'] = mooc_df['CCAATT'].astype(str).str[:4]
# # mooc_df['ORGID_HH'] = mooc_df['ORGID_HH'].astype(str)
# # mooc_df.drop(['CCAATT', 'TEAM_CODE', 'REMARK'], axis=1, inplace=True)
# # mooc_df = mooc_df.drop_duplicates()

# mooc_df.loc[mooc_df['ORGID_H']=='003']
# # mooc_df.loc[mooc_df['ORGID_H'].isin(['003', '004'])]
# # mooc_df#.head(3)

# # mooc_df.info()

In [304]:
''' Nationwide '''

nationwide_df = all_src_df.loc[all_src_df['Area_type']=='Nationwide']

# Initialize them all with NaN (Not a Number)
# nationwide_df[['ZONE_TYPE', 'EEC_FLAG', 'ORGID_G', 'TDS_SGMD', 'PROVINCE_CD', 'PROVINCE_ENG']] = None
nationwide_new_cols = ['ZONE_TYPE', 'EEC_FLAG', 'ORGID_G', 'TDS_SGMD', 'PROVINCE_CD', 'PROVINCE_ENG']
nationwide_df[nationwide_new_cols] = np.nan

# nationwide_df.head(3)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nationwide_df[nationwide_new_cols] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nationwide_df[nationwide_new_cols] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nationwide_df[nationwide_new_cols] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try us

Unnamed: 0,Areas & Category,TMH-MB MKS,AIS-MB MKS,DTAC-MB MKS,TOT-MB MKS,Total Market Size,TMH-MB Total Customer,AIS-MB Total Customer,DTAC-MB Total Customer,TOT-MB Total Customer,...,PROVINCE_META,Category,Metric,Area_type,ZONE_TYPE,EEC_FLAG,ORGID_G,TDS_SGMD,PROVINCE_CD,PROVINCE_ENG
1771,"Thailand, 13-17",34.23,46.21,19.57,0,2870000,1060000,1440000,611000,0,...,Thailand,13-17,Age,Nationwide,,,,,,
1772,"Thailand, 18-24",33.3,42.59,24.11,0,10500000,3710000,4740000,2680000,160,...,Thailand,18-24,Age,Nationwide,,,,,,
1773,"Thailand, 25-34",32.72,44.57,22.71,0,21700000,7360000,10000000,5110000,770,...,Thailand,25-34,Age,Nationwide,,,,,,


In [305]:
''' Provinces '''

province_org_file = '../../CFW/data/Meta Provinces Mapping.xlsx'
province_org_sheet = 'Data'
province_org_df = pd.read_excel(province_org_file, sheet_name=province_org_sheet)
province_org_df['PROVINCE_CD'] = province_org_df['PROVINCE_CD'].astype(str)
# province_org_df.info()
# province_org_df.head(3)

province_df = pd.merge(all_src_df.loc[all_src_df['Area_type']=='Provinces'], province_org_df, how='left', on='PROVINCE_META')
province_df.head(3)

Unnamed: 0,Areas & Category,TMH-MB MKS,AIS-MB MKS,DTAC-MB MKS,TOT-MB MKS,Total Market Size,TMH-MB Total Customer,AIS-MB Total Customer,DTAC-MB Total Customer,TOT-MB Total Customer,...,PROVINCE_META,Category,Metric,Area_type,ZONE_TYPE,EEC_FLAG,ORGID_G,TDS_SGMD,PROVINCE_CD,PROVINCE_ENG
0,"Changwat Chai Nat, 13-17",37.19,50.2,12.61,0,11000,4700,6100,1600,0,...,Chai Nat,13-17,Age,Provinces,UPC,N,GX7,Central-West,18,Chai Nat
1,"Changwat Chai Nat, 18-24",38.36,46.81,14.83,0,31000,13000,16000,5100,0,...,Chai Nat,18-24,Age,Provinces,UPC,N,GX7,Central-West,18,Chai Nat
2,"Changwat Chai Nat, 25-34",37.59,49.03,13.38,0,67000,26000,34000,9300,0,...,Chai Nat,25-34,Age,Provinces,UPC,N,GX7,Central-West,18,Chai Nat


In [306]:
''' Districts '''

# district_df = pd.merge(all_src_df.loc[all_src_df['Area_type']=='Districts'], district_org_df, how='left', on='DISTRICT_META')
# district_df.head(3)

' Districts '

In [307]:
''' Final Data '''

final_df = pd.concat([nationwide_df, province_df], ignore_index=True)
final_df

Unnamed: 0,Areas & Category,TMH-MB MKS,AIS-MB MKS,DTAC-MB MKS,TOT-MB MKS,Total Market Size,TMH-MB Total Customer,AIS-MB Total Customer,DTAC-MB Total Customer,TOT-MB Total Customer,...,PROVINCE_META,Category,Metric,Area_type,ZONE_TYPE,EEC_FLAG,ORGID_G,TDS_SGMD,PROVINCE_CD,PROVINCE_ENG
0,"Thailand, 13-17",34.23,46.21,19.57,0,2870000,1060000,1440000,611000,0,...,Thailand,13-17,Age,Nationwide,,,,,,
1,"Thailand, 18-24",33.3,42.59,24.11,0,10500000,3710000,4740000,2680000,160,...,Thailand,18-24,Age,Nationwide,,,,,,
2,"Thailand, 25-34",32.72,44.57,22.71,0,21700000,7360000,10000000,5110000,770,...,Thailand,25-34,Age,Nationwide,,,,,,
3,"Thailand, 35-44",31.94,47.5,20.55,0,13300000,4370000,6500000,2820000,620,...,Thailand,35-44,Age,Nationwide,,,,,,
4,"Thailand, 45-54",29.35,51.18,19.46,0.01,8860000,2670000,4650000,1770000,840,...,Thailand,45-54,Age,Nationwide,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1789,"Changwat Phatthalung, Male",23.3,64.83,11.81,0.06,163000,41000,110000,20000,0,...,Phatthalung,Male,Gender,Provinces,UPC,N,GX8,South,93,Phatthalung
1790,"Changwat Nakhon Sawan, Female",28.47,53.96,17.57,0,355000,107000,201000,66000,0,...,Nakhon Sawan,Female,Gender,Provinces,UPC,N,GX7,Central-West,60,Nakhon Sawan
1791,"Changwat Nakhon Sawan, Male",29.23,54.82,15.95,0,332000,102000,191000,55000,0,...,Nakhon Sawan,Male,Gender,Provinces,UPC,N,GX7,Central-West,60,Nakhon Sawan
1792,"Changwat Mae Hong Son, Female",38.93,55.18,5.9,0,79000,33000,48000,5100,0,...,Mae Hong Son,Female,Gender,Provinces,UPC,N,GX4,North,58,Mae Hong Son


In [308]:
final_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1794 entries, 0 to 1793
Data columns (total 21 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   Areas & Category        1794 non-null   object
 1   TMH-MB MKS              1794 non-null   object
 2   AIS-MB MKS              1794 non-null   object
 3   DTAC-MB MKS             1794 non-null   object
 4   TOT-MB MKS              1794 non-null   object
 5   Total Market Size       1794 non-null   object
 6   TMH-MB Total Customer   1794 non-null   object
 7   AIS-MB Total Customer   1794 non-null   object
 8   DTAC-MB Total Customer  1794 non-null   object
 9   TOT-MB Total Customer   1784 non-null   object
 10  Source                  1794 non-null   object
 11  PROVINCE_META           1794 non-null   object
 12  Category                1794 non-null   object
 13  Metric                  1794 non-null   object
 14  Area_type               1794 non-null   object
 15  ZONE

### Step 3 : Export Result

In [309]:
''' Generate CSV file '''

op_dir = 'output'
op_file = f'mobile_mks_province_{str_curr_dt}'

final_df.to_csv(f'{op_dir}/{op_file}.csv', index=False, encoding='utf-8')
print(f'\nGenerate "{op_file}.csv" successfully')


Generate "mobile_mks_province_20260129.csv" successfully
