In [None]:
import json
import re
import matplotlib.pyplot as plt
import matplotlib
from matplotlib.patches import Rectangle
import matplotlib.patches as mpatches
import copy
from datetime import datetime

In [None]:
matplotlib.rcParams['ps.useafm'] = True
matplotlib.rcParams['pdf.use14corefonts'] = True
matplotlib.rcParams['text.usetex'] = True

text_font_size = 16
marker_font_size = 11
label_font_size = 15
axes_font_size = 12

plt.rc('font', size=text_font_size)         
plt.rc('axes', labelsize=axes_font_size)    
plt.rc('xtick', labelsize=label_font_size)    
plt.rc('ytick', labelsize=label_font_size)    
plt.rc('legend', fontsize=label_font_size) 

In [None]:
# set global vars
DATA_FILE_JSON = "data.json"
DATA_FILE_SUMMARY = "summary"

In [None]:
# Load the summary to identify files to follow
#
# Assumes summary format is in this order
#     trivial:    -> all trivial moves
#                 order of:
#                     job [src_level]->[dest_level] [src_table]->[dest_table]
#                     R: [zone_resets]
#     flush:      -> all flushes
#                 order of:
#                     job [table]
#                     R: [zone_resets]
#     compaction: -> all compactions
#                 order of:
#                     job src_level->dest_level [src_tables] | [merge_tables] -> [dest_tables] 
#                     D: [table_deletes]
#                     R: [zone_resets]

summary_dict = dict()

with open(DATA_FILE_SUMMARY) as sum_file:
    trivial = False
    flush = False
    compaction = False
    summary_dict["trivial"] = dict()
    summary_dict["flush"] = dict()
    summary_dict["compaction"] = dict()
    job_stack = []

    
    for line in sum_file:
        if trivial:
            if "flush:" in line:
                trivial = False
                flush = True
            else:
                line_split = line.split()
                if "R" in line:
                    summary_dict["trivial"][job_stack.pop()]["resets"] = eval(line[line.find("["):line.find("]")+1])
                else:
                    job = line_split[0]
                    summary_dict["trivial"][job] = dict()
                    job_stack.append(job)
                    levels = list(map(int, re.findall(r'\d+', line_split[1])))
                    summary_dict["trivial"][job]["src_level"] = levels[0] 
                    summary_dict["trivial"][job]["dest_level"] = levels[1]

                    # Tables are identical as the moves are trivial
                    tables = line[line.find("["):line.find("]")+1]
                    summary_dict["trivial"][job]["tables"] = eval(tables)
            
        elif flush:
            if "compaction:" in line:
                flush = False
                compaction = True
            else:
                line_split = line.split()
                if "R" in line:
                    summary_dict["flush"][job_stack.pop()]["resets"] = eval(line[line.find("["):line.find("]")+1])
                else:
                    job = line_split[0]
                    job_stack.append(job)
                    summary_dict["flush"][job] = dict()
                    summary_dict["flush"][job]["table"] = int(line[line.find("[")+2:line.find("]")-1]) # strip the ' char

        elif compaction:
            line_split = line.split()

            if "R" in line:
                summary_dict["compaction"][job_stack.pop()]["resets"] = \
                    eval(line[line.find("["):line.find("]")+1])
            elif "D" in line:
                job = job_stack.pop()
                job_stack.append(job)
                summary_dict["compaction"][job]["deletes"] = \
                    eval(line[line.find("["):line.find("]")+1])
            else:
                job = line_split[0]
                summary_dict["compaction"][job] = dict()
                job_stack.append(job)
                levels = list(map(int, re.findall(r'\d+', line_split[1])))
                summary_dict["compaction"][job]["src_level"] = levels[0] 
                summary_dict["compaction"][job]["dest_level"] = levels[1]
                table_split = line.split("|")
                src_tables = line[table_split[0].find("["):table_split[0].find("]")+1]
                summary_dict["compaction"][job]["src_tables"] = eval(src_tables)
                info = table_split[1].split("->")
                summary_dict["compaction"][job]["merge_tables"] = eval(info[0])
#                 if info[0][2:-2] != "": # This was for manual if eval did not work
#                     summary_dict["compaction"][job]["merge_tables"] = list(map(int, info[0][2:-2].split(',')))
#                 else:
#                     summary_dict["compaction"][job]["merge_tables"] = []
                summary_dict["compaction"][job]["dest_tables"] = eval(info[1])

        if "trivial:" in line:
            trivial = True

In [None]:
# Load the json data
file = open(DATA_FILE_JSON) 
json_data = json.load(file)

## Class Defintitions

In [None]:
class Zone:
    def __init__(self, zone, temperature, status, file):
        self.zone = zone
        self.temperature = temperature
        self.status = status # Valid, Invalid, Reset
        self.files = []
        self.files.append(file)
        
    def __str__(self):
        return f"Zone Number: {self.zone}, Temp: {self.temperature}, Status: {self.status}, Files: {self.files}\n"
    
    def changestatus(self, status):
        self.status = status

In [None]:
class TimeUnit:
    def __init__(self, id, job, operation, src, srclevel, destlevel, mergetables, time, timestamp_object):
        self.id = id
        self.operation = operation
        self.job = job
        self.zones = []
        self.files = []
        self.srctables = src
        self.mergetables = mergetables
        self.srclevel = srclevel # M for memtable, X for non RocksDB op, otherwise the level number
        self.destlevel = destlevel
        self.timestamp = time
        self.timestamp_object = timestamp_object
        
    def __str__(self):
        msg = f"ID: {self.id}, Job: {self.job}, OP: {self.operation}, Time: {self.timestamp}, \
Level: {self.srclevel}->{self.destlevel} Files: {self.files}, Source Files: {self.srctables}, \
Merged Files: {self.mergetables}\n"
        msg += f"Total Zones: {len(self.zones)}\n"
        for zone in self.zones:
            msg += str(zone)
        return msg
    
    def addfile(self, file):
        if file != "":
            self.files.append(file)
        
    def hasfile(self, filename):
        for file in self.files:
            if filename == file:
                return True
        return False
    
    def addzone(self, zoneid, temperature, status, file):
        self.zones.append(Zone(zoneid, temperature, status, file))
        if not file in self.files and file != "":
            self.files.append(file)
        
    def haszone(self, zoneid):
        for zone in self.zones:
            if zoneid == zone.zone:
                return True
        return False
        
    def getzone(self, zoneid):
        for zone in self.zones:
            if zoneid == zone.zone:
                return zone
        return None
    
    def updatezonestatus(self, zoneid, status):
        for i in range(len(self.zones)):
            if self.zones[i].zone == zoneid:
                self.zones[i].changestatus(status)
                
    def setoldzones(self, oldzones, isreset):
        for zone in oldzones:
            if zone.temperature == 'RESET_ZONE':
                pass
            elif isreset:
                self.addzone(zone.zone, zone.temperature, zone.status, "")
            else:
                self.addzone(zone.zone, "INVALID_ZONE", "Invalid", "")

In [None]:
def hastable(file, list):
    for i in list:
        if int(file) == int(i):
            return True
    return False

## Set configuration variables for plotting

In [None]:
MAX_TIME_UNITS = 4 # Set the number of time units to show (i.e., operations; flush, compaction, trivial, F2FS GC)
TRACE_FILE = 31 # Number of the SST file to trace

In [None]:
timeline = []

# Ignore trivial moves as they do not modify anything in the storage
def constructdata(timeline):
    maxzone = 0 # highest zone to plot to
    timectr = 0
    files = []

    files.append(TRACE_FILE)
    for op, item in summary_dict.items():
        if op == "flush":
            for job, data in item.items():
                for file in files:
                    if file == data["table"]: 
                        timestamp = datetime.strptime(json_data[job]["datetime"], '%Y-%m-%d %H:%M:%S.%f')
                        if timectr == 0:
                            time = 0
                        else:
                            delta = timestamp - timeline[timectr - 1].timestamp_object
                            time = delta.total_seconds()
                        timeline.append(TimeUnit(timectr, job, "Flush", [file], "M", "0", [], 
                                                 time, timestamp))
                        if timectr > 0:
                            timeline[timectr].setoldzones(timeline[timectr - 1].zones, False)

                        # find the zone in the operation
                        for entry in json_data[job]["f2fs_file_snapshot_after"]:
                            if f"0{file}.sst" in entry["filename"]:
                                for zone in entry["hint"]:
                                    if not timeline[timectr].haszone(zone[0]):
                                        timeline[timectr].addzone(zone[0], zone[1], "Valid", file)
                                        files.append(file)
                                        if zone[0] > maxzone:
                                            maxzone = zone[0]

                        timectr+=1
                        if timectr >= MAX_TIME_UNITS:
                            return maxzone
                        
                        if len(data["resets"]) > 0:
                            timestamp = datetime.strptime(json_data[job]["datetime"], '%Y-%m-%d %H:%M:%S.%f')
                            if timectr == 0:
                                time = 0
                            else:
                                delta = timestamp - timeline[timectr - 1].timestamp_object
                                time = delta.total_seconds()
                            timeline.append(TimeUnit(timectr, job, "reset", "X", "X", "X", [], 
                                                     time, timestamp))
                            if timectr > 0:
                                timeline[timectr].setoldzones(timeline[timectr - 1].zones , True)
                            for reset in data["resets"]:
                                timeline[timectr].addzone(reset, "RESET_ZONE", "Reset", "")
                            timectr+=1
                            if timectr >= MAX_TIME_UNITS:
                                return maxzone
                        break


        if op == "compaction":
            for job, data in item.items():
                for file in files:
                    if hastable(file, data["src_tables"]) or hastable(file, data["merge_tables"]):
                        timestamp = datetime.strptime(json_data[job]["datetime"], '%Y-%m-%d %H:%M:%S.%f')
                        if timectr == 0:
                            time = 0
                        else:
                            delta = timestamp - timeline[timectr - 1].timestamp_object
                            time = delta.total_seconds()
                        timeline.append(TimeUnit(timectr, job, "Compaction", data["src_tables"], 
                                                 data["src_level"], data["dest_level"], data["merge_tables"], 
                                                 time, timestamp))
                        if timectr > 0:
                            timeline[timectr].setoldzones(timeline[timectr - 1].zones, False)
                        for destination in data["dest_tables"]:

                            # find the zone in the operation
                            for entry in json_data[job]["f2fs_file_snapshot_after"]:
                                if f"0{destination}.sst" in entry["filename"]:
                                    for zone in entry["hint"]:
                                        if not timeline[timectr].haszone(zone[0]):
                                            timeline[timectr].addzone(zone[0], zone[1], "Valid", destination)
                                            files.append(destination)
                                            if zone[0] > maxzone:
                                                maxzone = zone[0]

        # TODO: Delete is messy, only doing if we need it in the figure 
        #                 for delete in data["deletes"]:
        #                     if timeline[timectr - 1].hasfile(delete):
        #                         timeline.append(TimeUnit(timectr, job, "delete"))
        #                         zonecopy = copy.deepcopy(timeline[timectr - 1].zones)
        #                         for item in zonecopy:
        #                             timeline[timectr].addzone(item.zone, item.temperature, item.status, item.files)
        #                         timeline[timectr].updatezonestatus(delete, "Invalid")  

                        timectr+=1
                        if timectr >= MAX_TIME_UNITS:
                            return maxzone

                        if len(data["resets"]) > 0:
                            timestamp = datetime.strptime(json_data[job]["datetime"], '%Y-%m-%d %H:%M:%S.%f')
                            if timectr == 0:
                                time = 0
                            else:
                                delta = timestamp - timeline[timectr - 1].timestamp_object
                                time = delta.total_seconds()
                            timeline.append(TimeUnit(timectr, job, "reset", "X", "X", "X", [], 
                                                     time, timestamp))
                            if timectr > 0:
                                timeline[timectr].setoldzones(timeline[timectr - 1].zones, True)
                            for reset in data["resets"]:
                                timeline[timectr].addzone(reset, "RESET_ZONE", "Reset", "")
                            timectr+=1
                            if timectr >= MAX_TIME_UNITS:
                                return maxzone

                        break
    return maxzone
                            
maxzone = constructdata(timeline)
for i in timeline:
    print(i)

In [None]:
fig = plt.figure(figsize=(min(MAX_TIME_UNITS, len(timeline))*2.5, 5), facecolor="1") # For debugging made it gray and show ticks so we see things. drop later!
ax = fig.add_axes([0, 0, 1, 1], frameon=False, aspect=1, xticks=[], yticks=[])

color_mapping = {
    'CURSEG_WARM_DATA': 'ORANGE',
    'CURSEG_HOT_DATA': 'RED',
    'CURSEG_COLD_DATA': 'CYAN',
    'RESET_ZONE': 'GREEN',
    'INVALID_ZONE': 'MAGENTA'
}

gridsize = 2
# print(maxzone)
for timepoint, timeunit in zip(range(len(timeline)), timeline):
    # Add all zones at time point
#     print(timeunit)
    zonewidth = gridsize/maxzone
    if len(timeunit.files) > 0:
        filewidth = (gridsize / len(timeunit.files))
    files = {}
    if timeunit.operation == "reset":
        ax.add_patch(Rectangle((gridsize*timepoint+ 0.5*timepoint+x*filewidth+0.2, 2.1), filewidth*0.9, 0.5,
                     edgecolor = 'black',
                     facecolor = 'white',
                     fill=True, zorder=1,
                     lw=1))
        if filewidth > 0.20:
            ax.annotate("Zone Reset",
                xy=(gridsize*timepoint+ 0.5*timepoint+x*filewidth+0.2 + filewidth*0.9*0.5, 2.1+0.25), ha='center', va='center')
        files[file] = (gridsize*timepoint+ 0.5*timepoint+x*filewidth+0.2, 2.1)    
        for i in range(maxzone+1):
            color = color_mapping[timeunit.getzone(i).temperature] if timeunit.haszone(i) else 'white'

            ax.add_patch(Rectangle((i*zonewidth+0.1 + gridsize*timepoint + 0.5*timepoint, 0.1), zonewidth, 0.5,
                         edgecolor = 'black',
                         facecolor = color,
                         fill=True, zorder=1,
                         lw=1))
            if timeunit.haszone(i):
                zonefiles = timeunit.getzone(i).files
                for file in zonefiles:
                    zx = i*zonewidth+0.1 + gridsize*timepoint + 0.5*timepoint + 0.5 * zonewidth
                    zy = 0.6
                    print('...', i, zonefiles, file)
                    filewidth = gridsize
                    fx =  gridsize*timepoint+ 0.5*timepoint+x*filewidth+0.2
                    if fx < zx and fx + filewidth*0.9 > zx:
                        fx = zx
                    else:
                        fx = fx + filewidth*0.9*0.5
                    fy = gridsize*timepoint+ 0.5*timepoint+x*filewidth+0.2
                    if color == 'GREEN':
                        ax.plot([zx,fx], [zy, fy-0.6], color=color)
    else:
        # All timestamps that are not zone resets
        skip = 0
        for x, file in zip(range(len(timeunit.files)), timeunit.files):
            ax.add_patch(Rectangle((gridsize*timepoint+ 0.5*timepoint+(x-skip)*filewidth+0.2, 2.1), filewidth*0.9, 0.5,
                         edgecolor = 'black',
                         facecolor = 'white',
                         fill=True, zorder=1,
                         lw=1))
            if filewidth > 0.20:
                ax.annotate(str(file),
                    xy=(gridsize*timepoint+ 0.5*timepoint+(x-skip)*filewidth+0.2 + filewidth*0.9*0.5, 2.1+0.25), ha='center', va='center')
            files[file] = (gridsize*timepoint+ 0.5*timepoint+(x-skip)*filewidth+0.2, 2.1)    
        for i in range(maxzone+1):
            color = color_mapping[timeunit.getzone(i).temperature] if timeunit.haszone(i) else 'white'

            ax.add_patch(Rectangle((i*zonewidth+0.1 + gridsize*timepoint + 0.5*timepoint, 0.1), zonewidth, 0.5,
                         edgecolor = 'black',
                         facecolor = color,
                         fill=True, zorder=1,
                         lw=1))
            if timeunit.haszone(i):
                zonefiles = timeunit.getzone(i).files
                for file in zonefiles:
                    if file != "":
                        zx = i*zonewidth+0.1 + gridsize*timepoint + 0.5*timepoint + 0.5 * zonewidth
                        zy = 0.6
                        print('...', i, zonefiles, file)
                        fx = files[file][0]
                        if fx < zx and fx + filewidth*0.9 > zx:
                            fx = zx
                        else:
                            fx = fx + filewidth*0.9*0.5
                        fy = files[file][1]
                        if color != "MAGENTA" and color != "GREEN":
                            ax.plot([zx,fx], [zy, fy], color=color)
        if timepoint != 0:
    #             print('timepoint', (timepoint-1)*gridsize)
    #             ax.plot([0.2+gridsize*0.5 + (timepoint-1)*gridsize*1.25+0.1, 
    #                      0.2+gridsize*0.5 + (timepoint)*gridsize*1.25],
    #                     [2.6, 4],
    #                     color='black')
            if timeline[timepoint - 1].operation == "reset":
                ax.arrow(x=0.2+gridsize*0.5 + (timepoint-1)*gridsize*1.25+0.1 - 2.5, 
                        dx=-0.15 + 0.2+gridsize*0.5 + (timepoint)*gridsize*1.25 - (0.2+gridsize*0.5 + (timepoint-1)*gridsize*1.25+0.1) + 2.5,
                        y=2.6, dy=1.4-0.13+0.1,
                        color='black', head_width=0.15, head_length=0.13)
            else:
                ax.arrow(x=0.2+gridsize*0.5 + (timepoint-1)*gridsize*1.25+0.1, 
                         dx=-0.15 + 0.2+gridsize*0.5 + (timepoint)*gridsize*1.25 - (0.2+gridsize*0.5 + (timepoint-1)*gridsize*1.25+0.1),
                        y=2.6, dy=1.4-0.13+0.1,
                        color='black', head_width=0.15, head_length=0.13)
        if str(timeunit.srclevel) == "M":
            ax.arrow(x=0.2+gridsize*0.5+ timepoint*gridsize*1.25, 
            dx=0, y=4, dy=-1.4+0.13, color='black', head_width=0.15, head_length=0.13)
            ax.annotate(f"{str(timeunit.operation)}\n\nmem-$>$L{str(timeunit.destlevel)}",
            xy=(0.2+gridsize*0.5+ timepoint*gridsize*1.25,4.5), ha='center', va='center')
        else:
            ax.arrow(x=0.2+gridsize*0.5+ timepoint*gridsize*1.25, 
            dx=0, y=4, dy=-1.4+0.13, color='black', head_width=0.15, head_length=0.13)
            ax.annotate(f"{str(timeunit.operation)}\n{str(timeunit.srctables)} + {str(timeunit.mergetables)}\n \
    L{str(timeunit.srclevel)}-$>$L{str(timeunit.destlevel)}",
            xy=(0.2+gridsize*0.5+ timepoint*gridsize*1.25,4.5), ha='center', va='center')
        ax.annotate(f"{str(round(timeunit.timestamp, 2))}s", xy=(0.2+gridsize*0.5+ timepoint*gridsize*1.25,5.1), ha='center', va='center')
        


# ZNS zones annotation
plt.plot([-1,min(MAX_TIME_UNITS, len(timeline))*2.5], [1.4,1.4], linestyle='dashed', color="black")
ax.add_patch(Rectangle((-1, 0.01), min(MAX_TIME_UNITS, len(timeline))*2.5+1, 0.8,
                     edgecolor = 'black',
                     facecolor = 'gray',
                     fill=True, zorder=0,
                     lw=1, alpha=0.15))
ax.annotate("  ZNS\nZones", xy=(-1, 0), xytext=(-0.9, 0.2))

# F2FS annotation
plt.plot([-1,min(MAX_TIME_UNITS, len(timeline))*2.5], [3.3,3.3], linestyle='dashed', color="black")
ax.add_patch(Rectangle((-1, 1.95), min(MAX_TIME_UNITS, len(timeline))*2.5+1, 0.8,
                     edgecolor = 'black',
                     facecolor = 'gray',
                     fill=True, zorder=0,
                     lw=1, alpha=0.15))
ax.annotate('F2FS', xy=(-1, 0), xytext=(-0.9, 2.3))

# RocksDB annotation
ax.add_patch(Rectangle((-1, 4.1), min(MAX_TIME_UNITS, len(timeline))*2.5+1, 0.8,
                     edgecolor = 'black',
                     facecolor = 'gray',
                     fill=True, zorder=0,
                     lw=1, alpha=0.15))
ax.annotate('RocksDB', xy=(-1, 0), xytext=(-0.9, 4.45))

ax.set_ylim(bottom=0, top=5.1)
ax.set_xlim(-1)

handles = []
handles.append(mpatches.Patch(color="RED", label="HOT Zone"))
handles.append(mpatches.Patch(color="ORANGE", label="WARM Zone"))
handles.append(mpatches.Patch(color="CYAN", label="COLD Zone"))
handles.append(mpatches.Patch(color="GREEN", label="Reset Zone"))
handles.append(mpatches.Patch(color="MAGENTA", label="Deleted file"))

ax.legend(loc=(0,1.1), handles=handles, ncol=5)

# plt.show()
plt.savefig(f"RocksDB-{TRACE_FILE}_sst-timeline.pdf", bbox_inches="tight")