In [17]:
import pandas as pd
import glob
import time

In [18]:
#Script settings

#Set input file directory
speedtest_input_directory = "data/Speedtest/"

#Set output file names
unifi_output = "output/speedtest_unifi_20220213.csv"
celcom_output = "output/speedtest_celcom_20220213.csv"
maxis_output = "output/speedtest_maxis_20220213.csv"
digi_output = "output/speedtest_digi_20220213.csv"
umobile_output = "output/speedtest_umobile_20220213.csv"

In [19]:
#Function to extract Speedtest data
def extract_speedtest_data(speedtest_filename:str):
    df = pd.read_csv(speedtest_filename,usecols=cols, low_memory=False)
    df = df[df['mcc'] == 502]
    df = df[df['network_operator_name'].notna()]
    df = df[df['rsrp_a'].notna()]
    df = df[(df['pre_connection_type'] == 15) | (df['post_connection_type'] == 21)]
    df = df[(df['post_connection_type'] == 15) | (df['post_connection_type'] == 21)]
    return df

#Define column name which script can read and exlude others (Faster processing time)
cols = ['mcc', 'network_operator_name', 'rsrp_a', 'pre_connection_type', 'post_connection_type']

#Get list of input BGS files from given directory
speedtest_input_files = glob.glob(speedtest_input_directory+"*.csv")

##Script execution

# starting time
start = time.time()
print(start)
df = pd.DataFrame()
frames = []
for file in speedtest_input_files:
    temp_df = extract_speedtest_data(file)
    frames.append(temp_df)
    print(file + ' completed')
    
result = pd.concat(frames)
# end time
end = time.time()
print(f"Runtime of the program is {end - start}")

1644736161.626739
data/Speedtest/android_2022-01-02.csv completed
data/Speedtest/android_2022-01-03.csv completed
data/Speedtest/android_2022-01-01.csv completed
Runtime of the program is 2.5388379096984863


In [20]:
#export processed data to output file per operator
unifi = result[result['network_operator_name'] == 'Unifi']
celcom = result[result['network_operator_name'] == 'Celcom']
maxis = result[result['network_operator_name'] == 'Maxis']
digi = result[result['network_operator_name'] == 'Digi']
umobile = result[result['network_operator_name'] == 'U Mobile']

In [21]:
#save output file in csv format
unifi.to_csv(unifi_output)
celcom.to_csv(celcom_output)
maxis.to_csv(maxis_output)
digi.to_csv(digi_output)
umobile.to_csv(umobile_output)