In [1]:
import pandas as pd

# The Parser module is used to convert the raw data to a standard format
from mobvis.preprocessing.parser import Parser as par

# The Locations module is used to find the Geo-locations of the trace, used by almost all metrics
from mobvis.metrics.utils.Locations import Locations as loc
# The HomeLocations module is used to find the Home-locations of the trace, used by some metrics
from mobvis.metrics.utils.HomeLocations import HomeLocations as hloc
# The Contacts module is used to detect the Contacts between the nodes, used by Social metrics
from mobvis.metrics.utils.Contacts import Contacts as cnt

# The MetricBuilder module can be used to instantiate all the metrics 
from mobvis.metrics.utils.MetricBuilder import MetricBuilder as mb

# The metric and spatial plotters contains all the visualizations of MobVis
from mobvis.plots.metric_plotter import *
from mobvis.plots.spatial_plotter import *

Script to "fix"/standardize the trace.
Given a generated trace with human nodes, get a trace without human nodes and initialize any devices that haven't moved in the whole trace


In [None]:

import sys


def fixInfoCSV(filename):
    """
    Transforms the first dataset by removing 'HumanNode' entries, adjusting IDs to start from 0,
    and mapping old owner IDs to new ones incrementing by 1 for each new owner
    """
    # Convert to DataFrame
    df = pd.read_csv(filename, names=['id', 'owner', 'type'], sep=",", skiprows=1)

    # Standardize the type column by stripping whitespace and converting to lowercase
    df['type'] = df['type'].str.strip().str.lower()

    # Filter out 'humannode' types
    df = df[df['type'] != 'humannode']

    # Add an extra column for the old id
    df['old_id'] = df['id']

    # Adjust ids to start from 0
    df['id'] = range(len(df))

    # Map old owner IDs to new ones incrementing by 1 for each new owner
    unique_owners = {}
    current_owner_id = 1

    def get_new_owner_id(old_owner_id):
        nonlocal current_owner_id
        if old_owner_id not in unique_owners:
            unique_owners[old_owner_id] = current_owner_id
            current_owner_id += 1
        return unique_owners[old_owner_id]

    df['owner'] = df['owner'].apply(get_new_owner_id)

    return df

def fixPosTraceCSV(filename, id_mapping):
    """
    Replaces the old IDs in pos_trace with the new ones
    """
    # Convert to DataFrame
    df = pd.read_csv(filename, names=['id', 'x', 'y', 'time'], sep=",", skiprows=1)

    # Replace old IDs with new IDs using the mapping
    df['id'] = df['id'].map(id_mapping)

    return df

def fixHomeLocationsCSV(filename, id_mapping):
    # Convert to DataFrame
    df = pd.read_csv(filename, names=['id', 'x', 'y'], sep=",", skiprows=1)

    # Replace old IDs with new IDs using the mapping
    df['id'] = df['id'].map(id_mapping)
    df = df.dropna(subset=['id'])
    # convert id to int
    df['id'] = df['id'].astype(int)

    return df

def generateInitTimestamps(fixedHomeLocationFilename):
    '''
    Extend the home location dataframe (id, x, y) to include a timestamp column with value 0
    Can be appended manually to the trace file
    '''
    # Convert to DataFrame
    df = pd.read_csv(fixedHomeLocationFilename, names=['id', 'x', 'y'], sep=",", skiprows=1)
    # Add a timestamp column with value 0
    df['timestamp'] = 0.0
    return df

def getHumanNodes():
    '''
    Returns the IDs of the human nodes
    '''
    df = pd.read_csv("info.csv", names=['id', 'owner', 'type'], sep=",", skiprows=1)
    return df[df['type'] == ' HumanNode']['id'] # yes the space is intentional

def removeHumanNodesFromPosTrace(humanNodes:list, posTraceFilename):
    '''
    Removes the human nodes from the pos_trace file
    '''
    # Convert to DataFrame
    df = pd.read_csv(posTraceFilename, names=['id', 'x', 'y', 'time'], sep=",", skiprows=1)
    # Remove the human nodes
    df = df[~df['id'].isin(humanNodes)]
    return df

def removeHumanNodesFromHomeLocation(humanNodes:list, homelocationFilename):
    '''
    Removes the human nodes from the pos_trace file
    '''
    # Convert to DataFrame
    df = pd.read_csv(homelocationFilename, names=['id', 'x', 'y'], sep=",", skiprows=1)
    # Remove the human nodes
    df = df[~df['id'].isin(humanNodes)]
    return df

# Add a lot of redundant data/files, in case of errors

originalPosTrace = pd.read_csv("pos_trace.csv", names=['id', 'x', 'y', 'time'], sep=",", skiprows=1)
originalPosTrace.to_csv("pos_traceOriginal.csv", index=False, header=True)

# Remove Human Nodes from the trace 
removedHumanNodes = removeHumanNodesFromPosTrace(getHumanNodes(), "pos_trace.csv")
# Overwrite pos_trace with the one where no Human Nodes are present
removedHumanNodes.to_csv("pos_trace.csv", index=False, header=True)

# Transform the first dataset
infoCSV = fixInfoCSV("info.csv")
# Save transformed infocsv
infoCSV.to_csv("infoFixed.csv", index=False, header=True)

# Create a dictionary to map old IDs to new IDs
id_mapping = dict(zip(infoCSV['old_id'], infoCSV['id']))

# Replace the old IDs in the second dataset with the new IDs
posTraceCSV = fixPosTraceCSV("pos_trace.csv", id_mapping)
posTraceCSV.to_csv("pos_traceFixed.csv", index=False, header=True)



# # Remove humanNodes from home_locations.csv
fixedHomesCSV = fixHomeLocationsCSV("home_locations.csv", id_mapping)
fixedHomesCSV.to_csv("home_locationsFixed.csv", index=False, header=True)


# Initialize/Add devices: Homelocation to be their (x,y) for timestep 0
initTimeStamps = generateInitTimestamps("home_locationsFixed.csv")
initTimeStamps.to_csv("init_timestamps.csv", index=False, header=True)

# Parse the trace once
parsed_trace = par.parse(posTraceCSV, is_ordered=False)
parsed_trace.to_csv('pos_traceParsed.csv', index=False, header=True)
parsed_trace = pd.read_csv('pos_traceParsed.csv')

# # Crop the trace here to only include the rows that have a timestamp < x and slightly adjust the code
# x = 86400*8 # x days
# croppedTrace = parsed_trace[parsed_trace['timestamp'] < x]
# croppedTrace.to_csv('pos_traceFixed5Days.csv', index=False, header=True)

# print the ids that have no rows in posTraceCSV
maxID = posTraceCSV['id'].max()
traceZeros = posTraceCSV.groupby(['id']).size().reindex(range(0, maxID), fill_value=0)
zeroRows = traceZeros[traceZeros == 0]
# for i in zeroRows.index:
#     print(i)
zeroRowsDF = pd.DataFrame(zeroRows.index, columns=['id'])
zeroRowsDF.to_csv("idsWithZeroRows.txt", index=False, header=True)

# # Get ids from the croppedTrace that have zero rows
# croppedTraceZeros = croppedTrace.groupby(['id']).size().reindex(range(0, 534), fill_value=0)
# zeroRows = croppedTraceZeros[croppedTraceZeros == 0]

# Get ids with from idsWithZeroRows.txt
idsWithZeroRows = pd.read_csv("idsWithZeroRows.txt", names=['id'], sep=",", skiprows=1)
idsWithZeroRows['id'] = idsWithZeroRows['id'].astype(int)

# Get initTimeStamps for ids in idsWithZeroRows
initTimeStampsZeroRows = initTimeStamps[initTimeStamps['id'].isin(idsWithZeroRows['id'])]
initTimeStampsZeroRows.to_csv("init_timestampsZeroRows.csv", index=False, header=True)


In [None]:
# Get ids from the croppedTrace that have zero rows
# croppedTraceZeros = croppedTrace.groupby(['id']).size().reindex(range(0, 534), fill_value=0)
# zeroRows = croppedTraceZeros[croppedTraceZeros == 0]
# print(zeroRows)

# for every id in zeroRows, get the corresponding init location from initTimeStamps
initTimeStampsZeroRows = initTimeStamps[initTimeStamps['id'].isin(zeroRows.index)]
print(initTimeStampsZeroRows)

# Append initTimeStampsZeroRows to trace
concat = pd.concat([parsed_trace, initTimeStampsZeroRows])

# Parse the trace to MobVis' format and save it
final = par.parse(concat, is_ordered=False)
final.to_csv('pos_traceAddedZerosAndFixed.csv', index=False, header=True)
