In [1]:
import matplotlib.pyplot as plt
import numpy as np
import sys
import os 
import csv
from math import log2


sys.path.insert(0, '..')
os.chdir('..')
print(os.getcwd())

%load_ext autoreload
%autoreload 2

/home/andrew/constrained-padding-sequences


In [2]:
from pwod import run_pwod
from pfs import run_pfs
from mvmd import run_mvmd
from bdk import run_bdk

from utils import compute_c_vals
from load_dataset import load_dataset

In [3]:
def set_marker(pt):
    if 'ourAlg' in pt:
        return 'D'
    if 'noDistAlg' in pt or 'pwod' in pt:
        return  'x'
    if 'bdkAlg' in pt:
        return 'o'
    return '+'

### Autocomplete Dataset

In [4]:
dataset = 'autocomplete'
l = 3

vertices, _, _,_,_,_,_ = load_dataset(dataset, False, 4)

In [None]:
max_pad_overheads, mean_pad_overheads = {}, {}

for c in [1.05, 1.25, 1.5, 2.0]:
    pad_scheme_pfs = run_pfs(dataset, c)['pad_scheme']
    pad_scheme_pwod, _ = run_pwod(dataset, c)

    pad_scheme_flat_pwod = {k: v[0][0] for k, v in pad_scheme_pwod.items() }
    pad_scheme_flat_pfs = {k: v[0][0] for k, v in pad_scheme_pfs.items() }

    del pad_scheme_flat_pfs['root']
    del pad_scheme_flat_pwod['root']

    max_pad_overhead, mean_pad_overhead = compute_c_vals(pad_scheme_flat_pfs, vertices)
    max_pad_overheads[f'\\ourAlg (\\padFactor={c})'] = max_pad_overhead
    mean_pad_overheads[f'\\ourAlg (\\padFactor={c})'] = mean_pad_overhead

    max_pad_overhead, mean_pad_overhead = compute_c_vals(pad_scheme_flat_pfs, vertices)
    max_pad_overheads[f'\\noDistAlg (\\padFactor={c})'] = max_pad_overhead
    mean_pad_overheads[f'\\noDistAlg (\\padFactor={c})'] = mean_pad_overhead

pad_scheme_bdk = run_bdk(dataset, num_trials=1000)['pad_scheme']
pad_scheme_flat_bdk = {k: v[0][0] for k,v in pad_scheme_bdk.items()}
del pad_scheme_flat_bdk['root']

max_pad_overhead, mean_pad_overhead = compute_c_vals(pad_scheme_flat_bdk, vertices)
max_pad_overheads['\\bdkAlg'] = max_pad_overhead
mean_pad_overheads['\\bdkAlg'] = mean_pad_overhead

pad_factors_mvmd = run_mvmd(dataset, l)['pad_factors']
max_pad_overhead, mean_pad_overhead = pad_factors_mvmd

max_pad_overheads['\\lDivAlg{3}'] = max_pad_overhead
mean_pad_overheads['\\lDivAlg{3}'] = mean_pad_overhead

Set parameter Username
Academic license - for non-commercial use only - expires 2025-05-28
Gurobi's optimization method runtime (in seconds): 7.103011608123779


In [None]:
# ----------- For Autocomplete Only

f, ax = plt.subplots(figsize=(6.4, 5.2))
max_overheads, mean_overheads = list(max_pad_overheads.values()), list(mean_pad_overheads.values())
for i in range(len(max_pad_overheads)):
    ax.scatter(max_overheads[i], mean_overheads[i], color='black', marker=set_marker(list(max_pad_overheads.keys())[i]))

for method in max_pad_overheads.keys():
    x_pt = max_pad_overheads[method]
    y_pt = mean_pad_overheads[method]
    label = method
    if 'bdkAlg' in method:
        x_pt -= 7
        # label = 'Backes, et al.'
    if 'lDivAlg' in method:
        x_pt -= 11
        if '3' in method:
            y_pt -= 0.01
    if 'noDistAlg' in method and 'padFactor}=1.25)' in method:
        y_pt += 0.01
    ax.annotate(label, ( x_pt + 1, y_pt - 0.007 ), fontsize=18)

# plt.xlabel('\\res{\\padFactor}', fontsize=20)
plt.xlabel('Max Pad Factor', fontsize=20)
# plt.ylabel('\\avg{\\res{\\padFactor}}', fontsize=20)
plt.ylabel('Mean Pad Factor', fontsize=20)
ax.tick_params(axis='both', labelsize=16)
plt.grid(linestyle='dotted')
plt.legend()
plt.title("Pad Factor Comparison")
plt.show()

### Wikipedia Dataset

In [None]:
dataset = 'wikipedia'
l = 3

vertices, _, _,_,_,_,_ = load_dataset(dataset, False, 4)

In [None]:
max_pad_overheads, mean_pad_overheads = {}, {}

for c in [1.05, 1.25, 1.5, 2.0]:
    pad_scheme_pfs = run_pfs(dataset, c)['pad_scheme']
    pad_scheme_pwod, _ = run_pwod(dataset, c)

    pad_scheme_flat_pwod = {k: v[0][0] for k, v in pad_scheme_pwod.items() }
    pad_scheme_flat_pfs = {k: v[0][0] for k, v in pad_scheme_pfs.items() }

    max_pad_overhead, mean_pad_overhead = compute_c_vals(pad_scheme_flat_pfs, vertices)
    max_pad_overheads[f'\\ourAlg (\\padFactor={c})'] = max_pad_overhead
    mean_pad_overheads[f'\\ourAlg (\\padFactor={c})'] = mean_pad_overhead

    max_pad_overhead, mean_pad_overhead = compute_c_vals(pad_scheme_flat_pfs, vertices)
    max_pad_overheads[f'\\noDistAlg (\\padFactor={c})'] = max_pad_overhead
    mean_pad_overheads[f'\\noDistAlg (\\padFactor={c})'] = mean_pad_overhead

pad_scheme_bdk = run_bdk(dataset, num_trials=1000)['pad_scheme']
pad_scheme_flat_bdk = {k: v[0][0] for k,v in pad_scheme_bdk.items()}

max_pad_overhead, mean_pad_overhead = compute_c_vals(pad_scheme_flat_bdk, vertices)
max_pad_overheads['\\bdkAlg'] = max_pad_overhead
mean_pad_overheads['\\bdkAlg'] = mean_pad_overhead

pad_factors_mvmd = run_mvmd(dataset, l)['pad_factors']
max_pad_overhead, mean_pad_overhead = pad_factors_mvmd

max_pad_overheads['\\lDivAlg{3}'] = max_pad_overhead
mean_pad_overheads['\\lDivAlg{3}'] = mean_pad_overhead

In [None]:
# --------- For WIKIPEDIA Only

f, ax = plt.subplots(figsize=(6.4, 5.2))

# for method, precision_recall_map in mp.items():
# ax.scatter(list(max_pad_overheads.values()), list(mean_pad_overheads.values()), color='dimgrey', marker=)
max_overheads, mean_overheads = list(max_pad_overheads.values()), list(mean_pad_overheads.values())
for i in range(len(max_pad_overheads)):
    ax.scatter(max_overheads[i], mean_overheads[i], color='black', marker=set_marker(list(max_pad_overheads.keys())[i]))

ax_ins = ax.inset_axes(
    [0.15, 0.15, 0.45, 0.5],
    xlim=(1,2.2), ylim=(1,1.5)
)

ax_ins.spines['bottom'].set_color('grey')
ax_ins.spines['top'].set_color('grey') 
ax_ins.spines['right'].set_color('grey')
ax_ins.spines['left'].set_color('grey')


for i in range(len(max_pad_overheads)):
    method = list(max_pad_overheads.keys())[i]
    if 'ourAlg' in method or 'noDistAlg' in method or 'pwod' in method:
        ax_ins.scatter(max_overheads[i], mean_overheads[i], color='black', marker=set_marker(list(max_pad_overheads.keys())[i]))
    for method in max_pad_overheads.keys():
        if 'ourAlg' in method or 'noDistAlg' in method:
            x_pt = max_pad_overheads[method] + 0.1
            y_pt = mean_pad_overheads[method]
            label = method

            if 'ourAlg' in method:
                y_pt -= 0.05
            if 'padFactor}=1.25' in method:
                y_pt -= 0.02
            if 'padFactor}=1.5' in method:
                if 'noDistAlg' in method:
                    y_pt += 0.01
                if 'ourAlg' in method:
                    y_pt += 0.02
            if 'padFactor}=2' in method:
                x_pt -= 0.95
                if 'ourAlg' in method:
                    x_pt += 0.1
                    y_pt -= 0.02
            ax_ins.annotate(label, ( x_pt, y_pt ), fontsize=18)

ax.indicate_inset_zoom(ax_ins, edgecolor='black')
ax_ins.tick_params(axis='both', labelsize=14)

for method in max_pad_overheads.keys():
    if 'ourAlg' in method or 'noDistAlg' in method: continue
    x_pt = max_pad_overheads[method]
    y_pt = mean_pad_overheads[method]
    label = method
    if 'bdkAlg' in method:
        x_pt -= 7.5
        y_pt -= 0.2
    if 'lDivAlg' in method:
        x_pt -= 12
        y_pt -= 0.05
    if 'lDivAlg{3}' in method:
        y_pt -= 0.1
    if 'lDivAlg{4}' in method:
        y_pt -= 0.1
    ax.annotate(label, ( x_pt + 0.5, y_pt ), fontsize=18)

ax.tick_params(axis='both', labelsize=16)
plt.xlabel('Max Pad Factor', fontsize=20)
plt.ylabel('Mean Pad Factor', fontsize=20)

plt.show()

### Linode from index Dataset

In [None]:
dataset = 'linode_from_index'
l = 3

vertices, vertices_subset, sequences, prefix_closed_sequences, max_length, edges, Q = load_dataset(dataset, False, 4)

In [None]:
max_pad_overheads, mean_pad_overheads = {}, {}

for c in [1.05, 1.25, 1.5, 2.0]:
    pad_scheme_pfs = run_pfs(dataset, c)['pad_scheme']
    pad_scheme_pwod, _ = run_pwod(dataset, c)

    pad_scheme_flat_pwod = {k: v[0][0] for k, v in pad_scheme_pwod.items() }
    pad_scheme_flat_pfs = {k: v[0][0] for k, v in pad_scheme_pfs.items() }

    max_pad_overhead, mean_pad_overhead = compute_c_vals(pad_scheme_flat_pfs, vertices)
    max_pad_overheads[f'\\ourAlg (\\padFactor={c})'] = max_pad_overhead
    mean_pad_overheads[f'\\ourAlg (\\padFactor={c})'] = mean_pad_overhead

    max_pad_overhead, mean_pad_overhead = compute_c_vals(pad_scheme_flat_pfs, vertices)
    max_pad_overheads[f'\\noDistAlg (\\padFactor={c})'] = max_pad_overhead
    mean_pad_overheads[f'\\noDistAlg (\\padFactor={c})'] = mean_pad_overhead

pad_scheme_bdk = run_bdk(dataset, num_trials=10)['pad_scheme']
pad_scheme_flat_bdk = {k: v[0][0] for k,v in pad_scheme_bdk.items()}

max_pad_overhead, mean_pad_overhead = compute_c_vals(pad_scheme_flat_bdk, vertices)
max_pad_overheads['\\bdkAlg'] = max_pad_overhead
mean_pad_overheads['\\bdkAlg'] = mean_pad_overhead

pad_factors_mvmd = run_mvmd(dataset, l)['pad_factors']
max_pad_overhead, mean_pad_overhead = pad_factors_mvmd

max_pad_overheads['\\lDivAlg{3}'] = max_pad_overhead
mean_pad_overheads['\\lDivAlg{3}'] = mean_pad_overhead

In [None]:
# ---------- FOR LINODE PLOT



f, ax = plt.subplots(figsize=(6.4, 5.2))

# for method, precision_recall_map in mp.items():
# ax.scatter(list(max_pad_overheads.values()), list(mean_pad_overheads.values()), color='dimgrey', marker=)
max_overheads, mean_overheads = list(max_pad_overheads.values()), list(mean_pad_overheads.values())
for i in range(len(max_pad_overheads)):
    ax.scatter(max_overheads[i], mean_overheads[i], color='black', marker=set_marker(list(max_pad_overheads.keys())[i]))

ax_ins = ax.inset_axes(
    [0.1, 0.4, 0.5, 0.5],
    xlim=(1,2.2), ylim=(1,1.6)
)

ax_ins.spines['bottom'].set_color('grey')
ax_ins.spines['top'].set_color('grey') 
ax_ins.spines['right'].set_color('grey')
ax_ins.spines['left'].set_color('grey')


for i in range(len(max_pad_overheads)):
    method = list(max_pad_overheads.keys())[i]
    if 'ourAlg' in method or 'noDistAlg' in method or 'pwod' in method:
        ax_ins.scatter(max_overheads[i], mean_overheads[i], color='black', marker=set_marker(list(max_pad_overheads.keys())[i]))
    
    
    for method in max_pad_overheads.keys():
        if 'ourAlg' in method or 'noDistAlg' in method:
            x_pt = max_pad_overheads[method]
            y_pt = mean_pad_overheads[method]
            label = method

            # if '=1.25' in method:
            x_pt -= 0.42
            if 'padFactor}=2' in method:
                x_pt -= 0.85

            if 'ourAlg' in method:
                y_pt -= 0.05
                if 'padFactor}=2' in method: 
                    y_pt += 0.04
                    x_pt += 0.1
            
            if 'noDistAlg' in method and 'padFactor}=2' in method:
                y_pt -= 0.05
            
            if '1.5' in method:
                y_pt += 0.025
                if 'noDistAlg' in method:
                    y_pt += 0.025

            if '1.25' in method and 'ourAlg' in method:
                y_pt -= 0.02
            ax_ins.annotate(label, ( x_pt + 0.5, y_pt ), fontsize=18)

ax.indicate_inset_zoom(ax_ins, edgecolor='grey')

ax_ins.tick_params(axis='both', labelsize=14)
# ax_ins.tick_params(axis='both', which='minor', labelsize=14)

for method in max_pad_overheads.keys():
    if 'ourAlg' in method or 'noDistAlg' in method: continue
    
    x_pt = max_pad_overheads[method]
    y_pt = mean_pad_overheads[method]
    label = method
    if 'bdkAlg' in method:
        x_pt -= 1500
        y_pt -= 1
    if 'lDivAlg' in method:    
        x_pt -= 2500
        y_pt -= 1
    # if 'lDivAlg{5}' in method:
    #     y_pt -= 1.75
    # if 'lDivAlg{3}' in method:
    #     y_pt -= 1.75
    ax.annotate(label, ( x_pt + 0.5, y_pt ), fontsize=18)

ax.set_yscale('linear')
# plt.xlabel('\\res{\\padFactor}', fontsize=20)
plt.xlabel('Max Pad Factor', fontsize=20)
# plt.ylabel('\\avg{\\res{\\padFactor}}', fontsize=20)
plt.ylabel('Mean Pad Factor', fontsize=20)
ax.tick_params(axis='both', which='major', labelsize=16)
plt.show()