# Imports

In [None]:
# imports
import json
import pprint

import sys, getopt
import os
import glob
import math
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from datetime import datetime

# Configs

In [None]:
# configs
filesystem_type = 'f2fs'
data_dir = '/tmp/rocksdb/f2fs/'

# ZNS (true for the used device/namespace)
cap = 131072
page_size = 512
level0 = 0

# Gather data

In [None]:
db_path_dict = {
    'f2fs': '/mnt/f2fs/db0/'
} 
db_path = db_path_dict[filesystem_type] if filesystem_type in db_path_dict else '/rocksdbtest/dbbench/'

compactions = {}
jobs = []
table_levels = {}
level_table = {} 
flushes = []
trivial_compactions = []

In [None]:
log_data = []
with open(f'{data_dir}/LOG', 'r') as f: #open the file
    log_data = f.readlines()
log_data = [d.rstrip() for d in log_data]

In [None]:
# first pass: create compactions
for log_data_line in log_data:
    if ': "compaction_start' in log_data_line:
        compaction = json.loads(log_data_line[log_data_line.find('{'):])
        date = log_data_line.split(' ')[0]
        datetime_object = datetime.strptime(date, '%Y/%m/%d-%H:%M:%S.%f')
        compaction_level = compaction['scores_level'][0]
        jobs.append(compaction['job'])
        compactions[compaction['job']] = {
            'type': 'compaction',
            'scores': compaction['scores_']
            ,'scores_levels': compaction['scores_level']
            ,'high_score': compaction['score']
            ,'level': compaction_level
            ,'from': compaction[f'files_L{compaction_level}']
            ,'into': compaction[f'files_L{compaction_level+1}'] if f'files_L{compaction_level+1}' in compaction else []
            ,'file_snapshot': [{
                'level': int(c.split('-')[0][1]), 
                'size':c.split('-')[1], 
                'id':c.split('-')[2],
                'compacting':c.split('-')[3]
            }  for c in compaction['files']]
            ,"creation": []
            ,"deletion": []
            ,"zenfs_file_snapshot_before": []
            ,"zenfs_file_snapshot_after": []
            ,"zones_snapshot_before": []
            ,"zones_snapshot_after": []
            ,"datetime": datetime_object
        }

In [None]:
# second pass: flush pass
for log_data_line in log_data:
    if 'job":' in log_data_line:
        if "flush_finished" in log_data_line:
            creation = json.loads(log_data_line[log_data_line.find('{'):])
            jobs.append(creation['job'])
            date = log_data_line.split(' ')[0]
            datetime_object = datetime.strptime(date, '%Y/%m/%d-%H:%M:%S.%f')
            compactions[creation['job']] = {
                'type': 'flush'
                ,'creation': []
                ,"f2fs_file_snapshot_before": []
                ,"f2fs_file_snapshot_after": []
                ,"zones_snapshot_before": []
                ,"zones_snapshot_after": []
                ,"datetime": datetime_object
            }

In [None]:
# third pass: assign table creations/deletions to jobs
for log_data_line in log_data:
    if 'job":' in log_data_line:
        if "table_file_creation" in log_data_line:
            # Determine name of file
            table_creation = json.loads(log_data_line[log_data_line.find('{'):])
            job_id = table_creation['job']
            table_file_id = str(table_creation['file_number'])
            table_file_preamble = '000000'[:6-len(table_file_id)]
            table_file_name = f'{db_path}{table_file_preamble}{table_file_id}.sst'
            # Assign to table/flush dictionary
            if table_file_name not in table_levels:
                table_levels[table_file_name] = []
            if job_id not in compactions or compactions[job_id]['type'] != 'compaction':
                table_levels[table_file_name].append((job_id, level0))  
                if job_id in compactions and compactions[job_id]['type'] == 'flush':
                    flushes.append((job_id, table_file_id))
                    compactions[job_id]['creation'].append(table_file_id)
            else:
                compactions[job_id]['creation'].append(table_file_id)
                table_levels[table_file_name].append((job_id, compactions[job_id]['level']+1))

        if "table_file_deletion" in log_data_line:
            table_file_deletion = json.loads(log_data_line[log_data_line.find('{'):])
            table_file_deletion_id = table_file_deletion['file_number']
            job_id = table_file_deletion['job']
            if job_id not in compactions or compactions[job_id]['type'] != 'compaction':
                continue
            compactions[job_id]['deletion'].append(table_file_deletion_id)


In [None]:
# fourth pass: trivial moves to higher levels
for log_data_line in log_data:
    if 'job":' in log_data_line:
        if "trivial_move" in log_data_line:
            # Load
            trivial_move = json.loads(log_data_line[log_data_line.find('{'):])
            job_id = trivial_move['job']
            next_level = trivial_move['destination_level']
            date = log_data_line.split(' ')[0]
            datetime_object = datetime.strptime(date, '%Y/%m/%d-%H:%M:%S.%f')
            compactions[job_id] = {
                'type': 'trivial_move'
                ,'creation': []
                ,'level': next_level-1
                ,"datetime": datetime_object
            }
            # Monkey path the json so that we can read the files
            tmp_moved_file_ids = trivial_move['files']
            moved_file_ids = json.loads("{" + f'"hack": {tmp_moved_file_ids}' + "}")['hack']
            compactions[job_id]['creation'] = moved_file_ids
            # Create names for all files
            for moved_file_id in moved_file_ids:
                table_file_id = str(moved_file_id)                
                table_file_preamble = '000000'[:6-len(table_file_id)]
                table_file_name = f'{db_path}{table_file_preamble}{table_file_id}.sst'
                if table_file_name not in table_levels:
                    table_levels[table_file_name] = []
                table_levels[table_file_name].append((job_id, next_level))  
                trivial_compactions.append((job_id, moved_file_id, next_level))
            # Assign to jobs
            jobs.append(job_id)    

In [None]:
# Fifth pass: zone resets
zone_resets = []
with open(f'{data_dir}/bpftrace_reset', 'r') as f: #open the file
    reset_data = f.readlines()
    reset_data = [d.rstrip() for d in reset_data]
    line = -1
    for reset_data_line in reset_data:
        line = line + 1
        if line == 0:
            continue
        date = reset_data_line.split(' ')[0]
        datetime_object = datetime.strptime(date, '%Y/%m/%d-%H:%M:%S:%f') 
        zone = int(int(reset_data_line.split(' ')[2]) / cap)
        zone_resets.append( (datetime_object, zone) )

sorted_jobs = sorted(jobs)

ptr_jobs   = 0
ptr_resets = 0

while ptr_jobs < len(sorted_jobs):
    compactions[sorted_jobs[ptr_jobs]]['past_resets'] = []
    while ptr_resets < len(zone_resets) and zone_resets[ptr_resets][0] < compactions[sorted_jobs[ptr_jobs]]['datetime']:
        compactions[sorted_jobs[ptr_jobs]]['past_resets'].append( (f'{zone_resets[ptr_resets][0]}', zone_resets[ptr_resets][1]) )
        ptr_resets = ptr_resets + 1
    new_date = compactions[sorted_jobs[ptr_jobs]]['datetime']
    compactions[sorted_jobs[ptr_jobs]]['datetime'] = f'{new_date}'
    ptr_jobs = ptr_jobs + 1


In [None]:
# ZenFS snapshot file parsing
if filesystem_type == 'zenfs':
    # before 
    for job in jobs:
        # We only have snapshot support for compactions
        if compactions[job]['type'] != 'compaction':
            continue
        
        # Parse before and after compactions
        with open(f'{data_dir}/files_before_compaction_{job}', 'r') as f: #open the file
            zenfs_file_data = f.read()
            # Init files
            compactions[job]['zenfs_file_snapshot_before'] =  [
                {'filename': f['filenames'][0]['filename'], 
                 'hint':f['hint'], 
                 'size': f['size'],
                 'zones': {(int(e['start']/(cap*512))):0 for e in f['extents']}, 
                } for f in json.loads(zenfs_file_data)['files']
            ]
            # Patch zone occupancy
            file_counter = 0
            for f in json.loads(f.read())['files']:
                print(f)
                for e in f['extents']:
                    zone = (int(e['start']/(cap*512)))
                    compactions[job]['zenfs_file_snapshot_before'][file_counter]['zones'][zone] = compactions[job]['zenfs_file_snapshot_before'][file_counter]['zones'][zone]  + e['length']
                file_counter = file_counter+1
        with open(f'{data_dir}/files_after_compaction_{job}', 'r') as f: #open the file
            zenfs_file_data = f.read()
            # Init files
            compactions[job]['zenfs_file_snapshot_after'] =  [
                {'filename': f['filenames'][0]['filename'], 
                 'hint':f['hint'], 
                 'size': f['size'],
                 'zones': {(int(e['start']/(cap*512))):0 for e in f['extents']}, 
                } for f in json.loads(zenfs_file_data)['files']
            ]
            # Patch zone occupancy
            file_counter = 0
            for f in json.loads(f.read())['files']:
                for e in f['extents']:
                    zone = (int(e['start']/(cap*512)))
                    compactions[job]['zenfs_file_snapshot_after'][file_counter]['zones'][zone] = compactions[job]['zenfs_file_snapshot_after'][file_counter]['zones'][zone]  + e['length']
                file_counter = file_counter+1

        # Parse zone info
        with open(f'{data_dir}/zones_before_compaction_{job}', 'r') as f: #open the file
            zone_data = f.read()
            compactions[job]['zones_snapshot_before'] = [{
                'state': z['state'], 
                'wp': z['wp'] - z['slba']
            } for z in json.loads(zone_data)['zone_list']]
        with open(f'{data_dir}/zones_after_compaction_{job}', 'r') as f: #open the file
            zone_data = f.read()
            compactions[job]['zones_snapshot_after'] =  [{
                'state': z['state'],
                'wp': z['wp'] - z['slba']
            } for z in json.loads(zone_data)['zone_list']]      

In [None]:
# F2FS snapshot file parsing
if filesystem_type == 'f2fs':
    for job in jobs:
        # We only support compaction snapshots for now
        if compactions[job]['type'] != 'compaction' and compactions[job]['type'] != 'flush':
            continue
        job_name = 'compaction_' if compactions[job]['type'] == 'compaction' else 'flush_'
        with open(f'{data_dir}/before_{job_name}{job}', 'r') as f: #open the file
            f2fs_snapshot = f.read()
            f2fs_snapshot_file_mappings = json.loads(f2fs_snapshot)['zone_file_mappings']
            f2fs_snapshot_files = {}
            hint = {}
            # Reverse indexing
            for f2fs_snapshot_file_mapping in f2fs_snapshot_file_mappings:
                z = f2fs_snapshot_file_mapping['zone']
                files = f2fs_snapshot_file_mapping['files']
                for file in files:
                    # Special file indicated the hint, not an extent
                    if 'ZONE_TYPE' in file:
                        hint[int(f2fs_snapshot_file_mapping['zone']) - 1] = file['ZONE_TYPE']
                    # Normal extent data
                    if 'Type' in file and 'EXTENT' == file['Type']:
                        # Extent data
                        file_name = file['FILE_NAME']
                        extent_start = file['PBAS']
                        extent_end = file['PBAE']
                        while len(extent_start) < len(extent_end):
                            extent_start += '0'
                        while len(extent_end) < len(extent_start):
                             extent_end += '0'
#                         print(extent_start, extent_end)
                        extent_start = int(extent_start, 16)     
                        extent_end = int(extent_end, 16)
                        extent_size = int(file['SIZE'], 16) * page_size
                        # Initialize file (note that a file can be present in multiple extents)
                        if not file_name in f2fs_snapshot_files:
                            f2fs_snapshot_files[file_name] = {
                                'filename': file_name,
                                'hint': [],
                                'size': 0,
                                'zones': []
                            }
                        # Path file data to be integers and alligned to zones
                        f2fs_snapshot_files[file_name]['size'] =  f2fs_snapshot_files[file_name]['size']  + extent_size 
                        while extent_start <= extent_end:
#                             print(z, int(extent_start / cap), extent_end / cap, extent_end, job)
                            f2fs_snapshot_files[file_name]['zones'].append( int(extent_start / cap) ) 
                            f2fs_snapshot_files[file_name]['hint'].append( ( int(extent_start / cap), 0, extent_size ) ) 
                            extent_start = extent_start + cap
            
            for file_name in f2fs_snapshot_files.keys():
                for fh in range(len(f2fs_snapshot_files[file_name]['zones'])):
                    ptr = f2fs_snapshot_files[file_name]['hint'][fh][0]
                    while ptr not in hint:
                        ptr = ptr - 1
                    f2fs_snapshot_files[file_name]['hint'][fh] = (f2fs_snapshot_files[file_name]['hint'][fh][0], hint[ptr],f2fs_snapshot_files[file_name]['hint'][fh][2])

            # Remove zone duplicates
            for file_name, file_data in f2fs_snapshot_files.items():
                f2fs_snapshot_files[file_name]['zones'] =  list( dict.fromkeys(f2fs_snapshot_files[file_name]['zones']) )
            # Remove duplicate layer of indexing
            compactions[job]['f2fs_file_snapshot_before'] =  list(f2fs_snapshot_files.values())
            
        with open(f'{data_dir}/after_{job_name}{job}', 'r') as f: #open the file
            f2fs_snapshot = f.read()
            if (f2fs_snapshot ==  ''):
                print('empty')
                break
            f2fs_snapshot_file_mappings = json.loads(f2fs_snapshot)['zone_file_mappings']
            f2fs_snapshot_files = {}
            # Reverse indexing
            hint = {}
            for f2fs_snapshot_file_mapping in f2fs_snapshot_file_mappings:
                z = f2fs_snapshot_file_mapping['zone']
                files = f2fs_snapshot_file_mapping['files']
                for file in files:
                    # Special file indicated the hint, not an extent
                    if 'ZONE_TYPE' in file:
                        hint[int(f2fs_snapshot_file_mapping['zone']) - 1] = file['ZONE_TYPE']
                        print(int(f2fs_snapshot_file_mapping['zone']) - 1, file['ZONE_TYPE'])
                    # Normal extent data
                    if 'Type' in file and 'EXTENT' == file['Type']:
                        # Extent data
                        file_name = file['FILE_NAME']
                        extent_start = file['PBAS']
                        extent_end = file['PBAE']
                        while len(extent_start) < len(extent_end):
                            extent_start += '0'
                        while len(extent_end) < len(extent_start):
                             extent_end += '0'
#                         print(extent_start, extent_end)
                        extent_start = int(extent_start, 16)     
                        extent_end = int(extent_end, 16)
                        extent_size = int(file['SIZE'], 16) * page_size
                        # Initialize file (note that a file can be present in multiple extents)
                        if not file_name in f2fs_snapshot_files:
                            f2fs_snapshot_files[file_name] = {
                                'filename': file_name,
                                'hint': [],
                                'size': 0,
                                'zones': []
                            }
                        # Path file data to be integers and alligned to zones
                        f2fs_snapshot_files[file_name]['size'] =  f2fs_snapshot_files[file_name]['size']  + extent_size 
                        while extent_start <= extent_end:
#                             print(z, int(extent_start / cap), extent_end / cap, extent_end, job)
                            f2fs_snapshot_files[file_name]['zones'].append( int(extent_start / cap) ) 
                            f2fs_snapshot_files[file_name]['hint'].append( ( int(extent_start / cap), 0,  extent_size) ) 
                            extent_start = extent_start + cap            # Remove zone duplicates
            for file_name in f2fs_snapshot_files.keys():
                for fh in range(len(f2fs_snapshot_files[file_name]['zones'])):
                    ptr = f2fs_snapshot_files[file_name]['hint'][fh][0]
                    while ptr not in hint:
                        ptr = ptr - 1
                    f2fs_snapshot_files[file_name]['hint'][fh] = (f2fs_snapshot_files[file_name]['hint'][fh][0], hint[ptr], f2fs_snapshot_files[file_name]['hint'][fh][2])

            for file_name, file_data in f2fs_snapshot_files.items():
                f2fs_snapshot_files[file_name]['zones'] =  list( dict.fromkeys(f2fs_snapshot_files[file_name]['zones']) )
            # Remove duplicate layer of indexing
            compactions[job]['f2fs_file_snapshot_after'] =  list(f2fs_snapshot_files.values())


        with open(f'{data_dir}/zones_before_{job_name}{job}', 'r') as f: #open the file
            zone_data = f.read()
            compactions[job]['zones_snapshot_before'] = [{
                'state': z['state'],
                'wp': z['wp'] - z['slba']
            } for z in json.loads(zone_data)['zone_list']]
        with open(f'{data_dir}/zones_after_{job_name}{job}', 'r') as f: #open the file
            zone_data = f.read()
            compactions[job]['zones_snapshot_after'] =  [{
                'state': z['state'], 
                'wp': z['wp'] - z['slba']
            } for z in json.loads(zone_data)['zone_list']]      

# Score recreation

In [None]:
max_bytes_base = 256 * 1048576
max_bytes_base = 1024 * 1024

for job in jobs:
    if 'type' not in compactions[job]:
        continue
    if compactions[job]['type'] != 'compaction':
        continue
    their_score = [0] * len(compactions[job]['scores'])
    for i in range(len(their_score)):
        their_score[compactions[job]['scores_levels'][i]] = compactions[job]['scores'][i]
    print("their score", their_score)
    # print(compactions[jobs[0]]['zenfs_file_snapshot_before'])
    ddd = compactions[job]['f2fs_file_snapshot_before']
    score = []
    for i in range(5):
        score.append(0)
    score[0] = [0,0]
    for dddd in ddd:
        level = -1
        if '.sst' not in dddd['filename']:
            continue
        filename_id = str(int(dddd['filename'].split('/')[-1].split('.sst')[0]))
        skip = True
        for comp in compactions[job]['file_snapshot']:
            if comp['id'] == filename_id:
                skip = False
        if (skip):
            continue
        j = len(table_levels[dddd['filename']]) - 1
        while j >= 0:
            if table_levels[dddd['filename']][j][0] <= job:
                level = table_levels[dddd['filename']][j][1]
                break
            j = j - 1

    #     print(dddd['filename'], level)

        if level > 0:
            score[level] = score[level] + dddd['size']
        if level == 0:
            score[level][0] = score[level][0] + dddd['size'] 
            score[level][1] = score[level][1] + 1

    score[0] = max(score[0][1] / 4, score[0][0] / max_bytes_base)

    for i in range(len(score)-1):
        if i == 0:
            score[i+1] =  score[i+1] / max_bytes_base
    #         print(max_bytes_base)
        else:
            #print(i, i*4, max_bytes_base, (4 ** (i-1)) * max_bytes_base)
    #         print(score[i+1],    (4 ** (i)) * max_bytes_base)            
            score[i+1] =  score[i+1] / ((4 ** (i)) * max_bytes_base)
    print("our score", score)
        
    dif = [0] * len(compactions[job]['scores'])
    for i in range(len(dif)):
        dif[i] = abs(score[i] - their_score[i])
    print("diff", dif)

# Printing

In [None]:
table_levels

In [None]:
print('trivial:')
for job in jobs:
    if 'trivial_move' not in compactions[job]['type']:
        continue
    level = compactions[job]['level']
    print('    ', job, f'L{level}->L{level+1}:', compactions[job]['creation'], '->', compactions[job]['creation'])
    rr = [x[1] for x in compactions[job]['past_resets']]
    print('     R:', rr)   

print('flush:')
for job in jobs:
    if 'flush' not in compactions[job]['type']:
        continue
    print('    ', job, compactions[job]['creation'])
    rr = [x[1] for x in compactions[job]['past_resets']]
    print('     R:', rr)


print('compaction:')
for job in jobs:
    if 'compaction' not in compactions[job]['type']:
        continue
    level = compactions[job]['level']
    print('    ', job, f'L{level}->L{level+1}:', compactions[job]['from'],'|',  compactions[job]['into'], '->', compactions[job]['creation'])
    print('     D:', compactions[job]['deletion'])
    rr = [x[1] for x in compactions[job]['past_resets']]
    print('     R:', rr)


# Generate JSON

In [None]:
with open('../timeline-gen/data.json', 'w') as f:\n",
    json.dump(compactions, f, indent=4)"