In [59]:
import os

# data collection, nesting is dict(sample size : (time : [estimates])
times = [t for t in range(1, 198111)]
base = {ssize : {time : [] for time in range(1, 198111)} for ssize in [5000, 10000, 20000, 30000, 40000]}
impr = {ssize : {time : [] for time in range(1, 198111)} for ssize in [5000, 10000, 20000, 30000, 40000]}

# note: output files are in format: {input file}_{b / i}_{ssize}_{iteration}
for file in [f for f in os.listdir("./output")]:
    with open (os.path.join("./output", file), 'r') as f:
        for n, est in enumerate(f, start=1):
            if(file.split("_")[1]) == 'b':
                base[int(file.split("_")[2])][int(n)].append(int(est.strip()))
            else:
                impr[int(file.split("_")[2])][int(n)].append(int(est.strip()))

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# helper methods
def getmins(data, ssize):
    mins = []
    for t in times:
        mins.append(min(data[ssize][t]))
    return mins

def getmax(data, ssize):
    maxs = []
    for t in times:
        maxs.append(max(data[ssize][t]))
    return maxs

def getquartile(data, ssize, quartile):
    quartiles = []
    for t in times:
        quartiles.append(np.percentile(data[ssize][t], quartile))
    return quartiles

def doubleplot(data1, alabel1, data2, alabel2, ssize, slabel):
    colors = plt.get_cmap('Set3').colors
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))

    ax1.scatter(times, getmins(data1, ssize), s=5, label="Minimum", color=colors[0])
    ax1.scatter(times, getmax(data1, ssize), s=5, label="Maximum", color=colors[7])
    ax1.scatter(times, getquartile(data1, ssize, 25), s=5, label="1st Quartile", color=colors[2])
    ax1.scatter(times, getquartile(data1, ssize, 50), s=5, label="Median", color=colors[3])
    ax1.scatter(times, getquartile(data1, ssize, 75), s=5, label="3rd Quartile", color=colors[4])
    ax1.set_xlabel("Time")
    ax1.set_ylabel("Estimate for Number of Triangles")
    ax1.set_title(f"{alabel1} Triest Estimate vs. Time for Sample Size {slabel}")

    ax2.scatter(times, getmins(data2, ssize), s=5, color=colors[0])
    ax2.scatter(times, getmax(data2, ssize), s=5, color=colors[7])
    ax2.scatter(times, getquartile(data2, ssize, 25), s=5, color=colors[2])
    ax2.scatter(times, getquartile(data2, ssize, 50), s=5, color=colors[3])
    ax2.scatter(times, getquartile(data2, ssize, 75), s=5, color=colors[4])
    ax2.set_xlabel("Time")
    ax2.set_ylabel("Estimate for Number of Triangles")
    ax2.set_title(f"{alabel2} Triest Estimate vs. Time for Sample Size {slabel}")

    # lines_labels = [ax.get_legend_handles_labels() for ax in fig.axes]
    # lines, labels = [sum(l, []) for l in zip(*lines_labels)]
    # fig.legend(lines, labels, loc='upper center', ncol=5)
    fig.legend(loc="upper center", bbox_to_anchor=(0.5, 1.05), ncol=5)

    plt.tight_layout() 
    plt.show()

Plots: Triest Base Algorithm

In [None]:
doubleplot(base, "Base", impr, "Improved", 5000, "5,000")
doubleplot(base, "Base", impr, "Improved", 10000, "10,000")
doubleplot(base, "Base", impr, "Improved", 20000, "20,000")
doubleplot(base, "Base", impr, "Improved", 30000, "30,000")
doubleplot(base, "Base", impr, "Improved", 40000, "40,000")