In [1]:
import pandas as pd
import glob
import time

In [2]:
#Script settings

#Set input file directory
bgs_input_directory = "data/BGS/"

#Set output file names
unifi_output = "output/bgs_unifi_20220213.csv"
celcom_output = "output/bgs_celcom_20220213.csv"
maxis_output = "output/bgs_maxis_20220213.csv"
digi_output = "output/bgs_digi_20220213.csv"
umobile_output = "output/bgs_umobile_20220213.csv"

In [4]:
cols = [
    'connection_type',
    'is_network_roaming',
    'sim_operator_name',
    'sim_operator_mcc_code',
    'network_operator_name',
    'network_operator_mcc_code',
    'network_operator_mnc_code',
    'client_latitude',
    'client_longitude',
    'tac',
    'pci',
    'cell_identifier',
    'lte_enodeb',
    'rnc_id',
    'cell_id',
    'arfcn',
    'uarfcn',
    'earfcn',
    'rsrp',
    'rsrq',
    'rssi',
    'rssnr',
    'cqi',
    'cell_bandwidth',
    'nr_ss_rsrp',
    'nr_ss_rsrq',
    'nr_ss_sinr',
    'nr_csi_rsrp',
    'nr_csi_rsrq',
    'nr_csi_sinr',
    'nr_level',
    'nr_asu',
    'nr_arfcn',
    'nr_nci',
    'nr_pci',
    'nr_tac',
    'nr_mcc',
    'nr_mnc',
    'nr_state',
    'nr_frequency_range',
    'cell_bandwidths',
    'gsm_additional_plmns',
    'wcdma_additional_plmns',
    'lte_additional_plmns',
    'lte_bands',
    'nr_additional_plmns',
    'nr_bands',
    'gsm_rssi',
    'wcdma_ecno'
]

In [None]:
#Function to extract BGS data
def extract_bgs_data(bgs_filename:str):
    df = pd.read_csv(bgs_filename,usecols=cols)
    df = df[df['network_operator_mcc_code'] == 502]
    df = df[df['network_operator_name'].notna()]
    df = df[df['rsrp'].notna()]
    return df

#Define column name which script can read and exlude others (Faster processing time)
# cols = ['network_operator_mcc_code', 'network_operator_name', 'rsrp']



#Get list of input BGS files from given directory
bgs_input_files = glob.glob(bgs_input_directory+"*.csv")

##Script execution

# starting time
start = time.time()
print(start)
df = pd.DataFrame()
frames = []
for file in bgs_input_files:
    temp_df = extract_bgs_data(file)
    frames.append(temp_df)
    print(file + ' completed')
    
result = pd.concat(frames)
# end time
end = time.time()
print(f"Runtime of the program is {end - start}")

1644924199.011156


In [9]:
#export processed data to output file per operator
unifi = result[result['network_operator_name'] == 'Unifi']
celcom = result[result['network_operator_name'] == 'Celcom']
maxis = result[result['network_operator_name'] == 'Maxis']
digi = result[result['network_operator_name'] == 'Digi']
umobile = result[result['network_operator_name'] == 'U Mobile']

In [10]:
#save output file in csv format
unifi.to_csv(unifi_output)
celcom.to_csv(celcom_output)
maxis.to_csv(maxis_output)
digi.to_csv(digi_output)
umobile.to_csv(umobile_output)