In [1]:
import os
import json
import glob
import time
import pickle
import numpy as np
import networkx as nx
from matplotlib import pylab as plt
from plotly import express as px
from tqdm.notebook import tqdm
from time import time as timer

In [2]:
# Get all raw heap files and corresponding json files
def get_dataset_file_paths(path, deploy=False):
    import glob
    import os
    paths = []

    file_paths = []
    key_paths = []

    sub_dir = os.walk(path)
    for directory in sub_dir:
        paths.append(directory[0])

    paths = set(paths)
    for path in paths:
        # print(os.listdir(path))
        files = glob.glob(os.path.join(path, '*.raw'), recursive=False)

        if len(files) == 0:
            continue

        for file in files:
            key_file = file[:-9] + ".json"
            if os.path.exists(key_file) and deploy is False:
                file_paths.append(file)
                key_paths.append(key_file)

            elif deploy is True:
                file_paths.append(file)

            else:
                print("Corresponding Key file does not exist for :%s" % file)

    return file_paths, key_paths

In [3]:
# Open the raw file and create graph structure from the pointers
def load_and_clean_heap(heap_path, json_path):
    with open(heap_path, 'rb') as fp:
        heap = bytearray(fp.read())

    with open(json_path, 'r') as fp:
        info = json.load(fp)
        
    return heap, info

In [4]:
def generate_histogram(heap_paths, json_paths):
    
    sliding_histogram = np.zeros((256, 256))
    histogram = np.zeros((256, 256))
        
    for heap_path, json_path in tqdm(zip(heap_paths, json_paths)):
        heap, info = load_and_clean_heap(heap_path=heap_path, json_path=json_path)
        
        idx = 0
        while idx < len(heap)-1:
            histogram[heap[idx]][heap[idx+1]] += 1
            sliding_histogram[heap[idx]][heap[idx+1]] += 1
            idx += 1
            
            if idx > len(heap)-2:
                continue
                
            sliding_histogram[int(heap[idx])][int(heap[idx+1])] += 1
            idx += 1
            
    return histogram, sliding_histogram

In [29]:
block_size = 20000
root = "../Smart-VMI/data/new/"

heap_paths, json_paths = get_dataset_file_paths(root)
heap_paths = heap_paths[:block_size]
json_paths = json_paths[:block_size]

In [30]:
histogram, sliding_histogram = generate_histogram(heap_paths=heap_paths, json_paths=json_paths)

0it [00:00, ?it/s]

In [31]:
print(histogram[0][0])
np.clip(histogram, a_max=50000, a_min=1, out=histogram)
fig = px.imshow(np.log(histogram))
fig.update_layout(autosize=False, width=1200, height=1200)
fig.show()
fig.write_html("/home/christofer/PycharmProjects/PointerKex/histogram.html")

1016595514.0


In [33]:
print(sliding_histogram[0][0])
np.clip(sliding_histogram, a_max=50000, a_min=1, out=sliding_histogram)
fig = px.imshow(np.log(sliding_histogram))
fig.update_layout(autosize=False, width=1200, height=1200)
fig.show()
fig.write_html("/home/christofer/PycharmProjects/PointerKex/sliding_histogram.html")

1984779426.0
