In [1]:
import pandas as pd

# The Parser module is used to convert the raw data to a standard format
from mobvis.preprocessing.parser import Parser as par

# The Locations module is used to find the Geo-locations of the trace, used by almost all metrics
from mobvis.metrics.utils.Locations import Locations as loc
# The HomeLocations module is used to find the Home-locations of the trace, used by some metrics
from mobvis.metrics.utils.HomeLocations import HomeLocations as hloc
# The Contacts module is used to detect the Contacts between the nodes, used by Social metrics
from mobvis.metrics.utils.Contacts import Contacts as cnt

# The MetricBuilder module can be used to instantiate all the metrics 
from mobvis.metrics.utils.MetricBuilder import MetricBuilder as mb

# The metric and spatial plotters contains all the visualizations of MobVis
from mobvis.plots.metric_plotter import *
from mobvis.plots.spatial_plotter import *

Script to interpolate missing data

In [3]:
""" ADD THE TRACE'S FOLDER NAME HERE"""
tracename = "aSIOTmm_30-07-2024_18.44.34.013/" 


prepath = "./traces/" + tracename
# path to the fixed info.csv
infoFixed = pd.read_csv(prepath + "infoFixed.csv", names=["id", "owner", "type", "old_id"], delimiter=",", skiprows=1)


In [4]:
# mobility classes
highMobility = ["smartphone", "wristband"]
midMobility = ["tablet", "laptop"]
lowMobility = ["tv", "console"]

def categorize_mobility(device_type):
    if device_type in highMobility:
        return 'highMobility'
    elif device_type in midMobility:
        return 'midMobility'
    elif device_type in lowMobility:
        return 'lowMobility'
    else:
        return 'other' # it should never be other when human nodes are removed

def getLowMobilityIDs():
    return infoFixed[infoFixed['mobility'] == 'lowMobility']['id'].tolist()

# add new mobility class column to dataframe
infoFixed['mobility'] = infoFixed['type'].apply(categorize_mobility)


In [5]:
# def interpolateTrace(df):
#     maxTimestamp = int(df['timestamp'].max())
#     allTimestamps = list(range(0, maxTimestamp + 30, 30))
    
#     for id in df['id'].unique():

# interpolates low mobility devices
def interpolateLowMobility(df):
    maxTimestamp = int(df['timestamp'].max())


    all_times = pd.DataFrame({
        'timestamp': np.arange(0, maxTimestamp + 30, 30)
    })

    # interpolation for all ids
    interpolated_df = pd.DataFrame()
    for trace_id in getLowMobilityIDs():
        # current id dataframe
        id_df = df[df['id'] == trace_id]

        # Merge with all_times df to include missing timestamps
        id_df_full = pd.merge(all_times, id_df, on='timestamp', how='left')

        # Fill missing IDs
        id_df_full['id'] = trace_id

        # Interpolation
        id_df_full['x'] = id_df_full['x'].interpolate(method='linear')
        id_df_full['y'] = id_df_full['y'].interpolate(method='linear')

        interpolated_df = pd.concat([interpolated_df, id_df_full])

    # Reorder columns
    interpolated_df = interpolated_df[['id', 'timestamp', 'x', 'y']]

    # Sort to get it like the parsed version
    interpolated_df = interpolated_df.sort_values(by=['id', 'timestamp'])

    return interpolated_df


In [None]:
posTraceCSV = pd.read_csv(prepath + "pos_traceAddedZerosAndFixed.csv", names=['id', 'timestamp', 'x', 'y'], sep=",", skiprows=1)
maxTime = int(posTraceCSV['timestamp'].max())
print(maxTime)

# Interpolated trace of low mobility devices
interpolatedLowMobility = interpolateLowMobility(posTraceCSV)
interpolatedLowMobility.to_csv(prepath + "lowMobilityInterpolated.csv", index=False, header=True)



In [None]:
# Add interpolated trace to the original fixed trace
posTraceWithLowMobility = pd.concat([posTraceCSV, interpolatedLowMobility])
posTraceWithLowMobility = posTraceWithLowMobility.sort_values(by=['id', 'timestamp'])
# Let MobVis parse the trace once again to it's format
posTraceWithLowMobilityParsed = par.parse(posTraceWithLowMobility, is_ordered=False)
posTraceWithLowMobilityParsed.to_csv(prepath + "pos_traceAddedZerosAndFixedAndLowsInterpolated.csv", index=False, header=True)


In [None]:
asd = posTraceWithLowMobilityParsed[posTraceWithLowMobilityParsed['x'].isnull()]
print(len(asd))
# remove data points with missing x or y
posTraceWithLowMobilityParsed1 = posTraceWithLowMobilityParsed.dropna(subset=['x', 'y'])
posTraceWithLowMobilityParsed1

In [9]:
# Interpolated trace with null values removed
posTraceWithLowMobilityParsed1.to_csv(prepath + "pos_traceAddedZerosAndFixedAndLowsInterpolatedNaNRemoved.csv", index=False, header=True)