## Get statistics from data sequence logs

In [1]:
import pandas as pd
import os
import numpy as np
import scipy.stats as st

def confidence_intervals(level, data):
    lower, upper= st.t.interval(confidence=level, 
              df=len(data)-1, 
              loc=np.mean(data),  
              scale=st.sem(data)) 
    return (float(lower),float(upper))
    
def interesting_stats(file_path):
    full_path = "../logfiles/datasequence_logs/"+file_path
    df = pd.read_csv(full_path, sep="\t", engine="python", on_bad_lines='skip')
    #Throughput values
    testdownlink = df["TESTDOWNLINK"].values.tolist()
    testuplink = df["TESTUPLINK"].values.tolist()
    testdownlink_max = df["TESTDOWNLINKMAX"].values.tolist()
    testuplink_max = df["TESTUPLINKMAX"].values.tolist()

    #Throughput values when connected to 5G
    testdownlink_5G = df[df["NetworkTech"]=="5G"]["TESTDOWNLINK"].values.tolist()
    testuplink_5G = df[df["NetworkTech"]=="5G"]["TESTUPLINK"].values.tolist()
    testdownlink_max_5G = df[df["NetworkTech"]=="5G"]["TESTDOWNLINKMAX"].values.tolist()
    testuplink_max_5G = df[df["NetworkTech"]=="5G"]["TESTUPLINKMAX"].values.tolist()

    #Throughput values when connected to 4G
    testdownlink_4G = df[df["NetworkTech"]=="4G"]["TESTDOWNLINK"].values.tolist()
    testuplink_4G = df[df["NetworkTech"]=="4G"]["TESTUPLINK"].values.tolist()
    testdownlink_max_4G = df[df["NetworkTech"]=="4G"]["TESTDOWNLINKMAX"].values.tolist()
    testuplink_max_4G = df[df["NetworkTech"]=="4G"]["TESTUPLINKMAX"].values.tolist()
    
    print("Median DL:", (np.median(testdownlink)/1000).round(2),"Mbps \nMedian UP:", (np.median(testuplink)/1000).round(2), "Mbps.")
    print("Mean DL:", (np.mean(testdownlink)/1000).round(2),"Mbps \nMean UP:", (np.mean(testuplink)/1000).round(2), "Mbps.")
    print("Percentage of test performed with 5G connection:", (len(df["NetworkTech"]=="5G")/len(df)*100))
    #print("CI DL:", confidence_intervals(0.95,testdownlink))
    #print("CI UL:", confidence_intervals(0.95,testuplink))
    
    return df

In [2]:
df = interesting_stats("telia_12.02_campustest_datatest.txt")

Median DL: 841.75 Mbps 
Median UP: 170.58 Mbps.
Mean DL: 829.39 Mbps 
Mean UP: 154.64 Mbps.
Percentage of test performed with 5G connection: 100.0


In [3]:
#pd.set_option('display.max_rows', 20) 
#pd.set_option('display.max_columns', 100)

In [4]:
df

Unnamed: 0,Timestamp,Longitude,Latitude,Operatorname,Node,CellID,LAC,NetworkTech,Level,Qual,...,LTERSSI,PINGAVG,PINGMIN,PINGMAX,PINGSTDEV,PINGLOSS,TESTDOWNLINK,TESTUPLINK,TESTDOWNLINKMAX,TESTUPLINKMAX
0,2025.02.12_14.58.39,10.40541,63.417958,Telia N,121687,11,2305,5G,-90,-11,...,-,64,40,237,58,0,828502,128967,944765,145641
1,2025.02.12_14.59.19,10.40446,63.417948,Telia N,121687,11,2305,5G,-90,-11,...,-,42,30,54,6,0,951730,176662,1026560,179481
2,2025.02.12_14.59.59,10.403899,63.418335,Telia N,121687,11,2305,5G,-90,-11,...,-,51,37,61,8,0,676004,184649,871765,186761
3,2025.02.12_15.00.39,10.403314,63.418724,Telia N,121687,31,2305,5G,-90,-11,...,-,42,31,59,7,0,780381,178076,873153,183798
4,2025.02.12_15.01.19,10.403173,63.419167,Telia N,241619,31,2305,5G,-90,-11,...,-,44,40,50,4,0,912362,170583,1019968,176394
5,2025.02.12_15.02.00,10.403056,63.419608,Telia N,241619,31,2305,5G,-90,-11,...,-,45,36,55,6,0,630091,116307,708512,128212
6,2025.02.12_15.02.40,10.402164,63.419603,Telia N,126484,31,2305,5G,-90,-11,...,-,51,41,62,6,0,774668,138417,877451,143608
7,2025.02.12_15.03.20,10.401358,63.419449,Telia N,126484,31,2305,5G,-90,-11,...,-,57,39,94,15,0,620366,118089,711558,120934
8,2025.02.12_15.04.00,10.401193,63.419076,Telia N,241621,11,2305,5G,-90,-11,...,-,50,35,99,17,0,549003,68611,675519,83704
9,2025.02.12_15.04.40,10.402061,63.418828,Telia N,241619,31,2305,5G,-90,-11,...,-,49,30,62,8,0,963436,138386,1020175,162047
