In [1]:
import time
import pandas as pd
import os.path
import numpy as np
from datetime import date

In [2]:
DAY_MS = 24*60*60*1000 # a day length in ms

def tf_hour_mapper(ts_init, ts_end, ts_occurr_str, days_offset):
    """Convert the given timestamp into a 24 hour format one, on the basis of the
       timestamp interval given"""
    
    ts_occurr = pd.to_datetime(ts_occurr_str)
    
    today = date.today().strftime('%Y-%m-%d')
    
    if ts_occurr == ts_init:
        # TODO: return 00:00:00
        return pd.to_datetime(f"{today} 00:00:00.000000") - pd.Timedelta(days=days_offset)
    if ts_occurr == ts_end:
        # TODO: return 23:59:59
        return pd.to_datetime(f"{today} 23:59:59.999999") - pd.Timedelta(days=days_offset)
    
    delta = ts_end - ts_init
    delta_ms = delta.total_seconds() * 1000
    
    occurr_delta = ts_occurr - ts_init
    occurr_delta_ms = occurr_delta.total_seconds() * 1000
    
    # day in ms : ms of simulated day = x : ms of ts_occurr from init
    
    x = DAY_MS * occurr_delta_ms / delta_ms
    seconds = (x / 1000) % 60;
    minutes = int((x / (1000*60)) % 60);
    hours   = int((x / (1000*60*60)) % 24);
    
    # TODO: return timestamp with the calculated metrics
    ts_out_str = "{} {}:{}:{:.6f}".format(today, hours, minutes, seconds)
    ts_out = pd.to_datetime(ts_out_str) 
    
    ts_out = ts_out - pd.Timedelta(days=days_offset)
    return ts_out



x = tf_hour_mapper(pd.to_datetime("2022-05-19 13:34:18.951007"),
                   pd.to_datetime("2022-05-19 13:36:18.951007"),
                                  "2022-05-19 13:35:18.951007", 5)
x

Timestamp('2022-06-22 12:00:00')

In [3]:
def read_workload_csv(filepath) -> pd.DataFrame:
    """Read and clean csv workload from filepath"""
    file_exists = os.path.exists(filepath)
    
    if not file_exists:
        raise FileNotFoundError
        return

    df = pd.read_csv(filepath)
    
    # drop id column
    df = df.iloc[: , 2:]
    
    return df

In [4]:
def convert_and_store_workload(folder, filename, days_offset):
    """Given a workload csv filepath and a days_offset, this function convert timestamps in 24 hours format,
       taking into account the days_offset"""
    
    path_to_csv = f"{folder}/{filename}"
    
    # create a folder for the simulated workloads if not exists
    sim_folder = f"{folder}/sim"
    is_exist = os.path.exists(sim_folder)

    if not is_exist:
        # Create a new directory because it does not exist
        os.makedirs(sim_folder)
        display(f"Folder {sim_folder} created")
        
    display(f"Converting {filename} with day offset of {days_offset}")
    df = read_workload_csv(path_to_csv)
    
    df = df.sort_values(by=['timestamp'])
    
    # identify start and end timestamps
    ts_init = df['timestamp'].min() # init -> 00:00:00

    ts_end = df['timestamp'].max() # end -> 23:59:59

    ts_init = pd.to_datetime(ts_init) # str -> pandas.Timestamp
    ts_end = pd.to_datetime(ts_end) 
    
    # CONVERT TIMESTAMPS TO 24 HOUR FORMAT
    df['sim_timestamp']=df.apply(lambda row: tf_hour_mapper(ts_init, ts_end, row['timestamp'], days_offset), axis=1)
    
    # drop old ts column
    df.drop('timestamp', axis=1, inplace=True)
    
    # re-ordering columns
    timestamp_sim_col = df.pop('sim_timestamp')
    df.insert(0, 'timestamp', timestamp_sim_col)
    
    # store
    df.to_csv(f'{sim_folder}/{filename[:-4]}_sim.csv')

In [13]:
ids = np.arange(101, 114 +1)

for i in ids:

    day_offset = ids[-1] - i + 1
    folder = "../postgres/data/rr_week/rt"
    filename = f"workload_{i}.csv"
    
    is_exist = os.path.exists(folder)

    if not is_exist:
        # Create a new directory because it does not exist
        os.makedirs(folder)
        display(f"Folder {folder} created")
    
    convert_and_store_workload(folder, filename, day_offset)

'Converting workload_101.csv with day offset of 14'

'Converting workload_102.csv with day offset of 13'

'Converting workload_103.csv with day offset of 12'

'Converting workload_104.csv with day offset of 11'

'Converting workload_105.csv with day offset of 10'

'Converting workload_106.csv with day offset of 9'

'Converting workload_107.csv with day offset of 8'

'Converting workload_108.csv with day offset of 7'

'Converting workload_109.csv with day offset of 6'

'Converting workload_110.csv with day offset of 5'

'Converting workload_111.csv with day offset of 4'

'Converting workload_112.csv with day offset of 3'

'Converting workload_113.csv with day offset of 2'

'Converting workload_114.csv with day offset of 1'