In [1]:
%matplotlib widget
import matplotlib.pyplot as plt
# import matplotlib.ticker as tickeri
import pickle
import numpy as np
import os
from scipy.stats import norm
from typing import List
from ExpPipeline import loadResults
from VoteEnsemble import MoVE, ROVE
plt.ioff()

<contextlib.ExitStack at 0x7f0bb41618d0>

In [2]:
default_colors = plt.rcParams['axes.prop_cycle'].by_key()['color']

def plotAverage(baseObjAvg: List, 
                MoVEObjAvg: List,
                ROVEObjAvg: List, 
                ROVEsObjAvg: List, 
                baggingObjAvg: List,
                sampleSizeList: List, 
                filePath: str,
                xLogScale: bool = True,
                yLogScale: bool = False):
    fig, ax = plt.subplots()
    ax.plot(sampleSizeList, baseObjAvg, marker = 'o', color = default_colors[0], linestyle = '-', label = 'base')
    if len(MoVEObjAvg) > 0 and len(MoVEObjAvg[0]) > 0 and len(MoVEObjAvg[0][0]) > 0:
        ax.plot(sampleSizeList, [MoVEObjAvg[i][0][0] for i in range(len(sampleSizeList))], marker = 's', color = default_colors[1], linestyle = '--', label = MoVE.__name__)
    ax.plot(sampleSizeList, [ROVEObjAvg[i][0][0] for i in range(len(sampleSizeList))], marker = 's', color = default_colors[2], linestyle = '--', label = ROVE.__name__)
    ax.plot(sampleSizeList, [ROVEsObjAvg[i][0][0] for i in range(len(sampleSizeList))], marker = 's', color = default_colors[3], linestyle = '--', label = f"{ROVE.__name__}s")
    if len(baggingObjAvg) > 0:
        ax.plot(sampleSizeList, baggingObjAvg, marker = 'o', color = default_colors[4], linestyle = '-.', label = 'Bagging')

    ax.set_xlabel('sample size', size = 16)
    ax.set_ylabel('cost', size = 16)
    if xLogScale:
        ax.set_xscale('log')
    if yLogScale:
        ax.set_yscale('log')
    ax.tick_params(axis='x', labelsize=14)
    ax.tick_params(axis='y', labelsize=14)
    ax.grid()
    # ax.xaxis.set_major_locator(ticker.MaxNLocator(nbins=5))
    ax.legend(loc='upper center', bbox_to_anchor=(0.5, 1.15), ncol=3 + (len(baggingObjAvg) > 0), fontsize = 14, frameon = False)
    
    fig.tight_layout(rect=[0, 0, 1, 0.95])
    os.makedirs(os.path.dirname(filePath), exist_ok = True)
    fig.savefig(filePath, dpi = 500)
    
def error(stdArray, numReplicates, confidenceLevel):
    return norm.ppf(0.5 + confidenceLevel / 2) * np.asarray(stdArray) / np.sqrt(numReplicates)
    
def plotAvgWithError(baseObjList: List, 
                     MoVEObjList: List,
                     ROVEObjList: List, 
                     ROVEsObjList: List, 
                     baggingObjList: List,
                     numReplicates: int,
                     confidenceLevel: float,
                     sampleSizeList: List, 
                     filePath: str,
                     xLogScale: bool = True,
                     yLogScale: bool = False):
    baseObjAvg = np.array([np.mean(baseObjList[i]) for i in range(len(sampleSizeList))])
    baseObjStd = np.array([np.std(baseObjList[i]) for i in range(len(sampleSizeList))])
    baseObjError = error(baseObjStd, numReplicates, confidenceLevel)
    MoVEObjAvg = []
    MoVEObjStd = []
    MoVEObjError = []
    if len(MoVEObjList) > 0 and len(MoVEObjList[0]) > 0 and len(MoVEObjList[0][0]) > 0 and len(MoVEObjList[0][0][0]) > 0:
        MoVEObjAvg = np.array([np.mean(MoVEObjList[i][0][0]) for i in range(len(sampleSizeList))])
        MoVEObjStd = np.array([np.std(MoVEObjList[i][0][0]) for i in range(len(sampleSizeList))])
        MoVEObjError = error(MoVEObjStd, numReplicates, confidenceLevel)
    ROVEObjAvg = np.array([np.mean(ROVEObjList[i][0][0]) for i in range(len(sampleSizeList))])
    ROVEObjStd = np.array([np.std(ROVEObjList[i][0][0]) for i in range(len(sampleSizeList))])
    ROVEObjError = error(ROVEObjStd, numReplicates, confidenceLevel)
    ROVEsObjAvg = np.array([np.mean(ROVEsObjList[i][0][0]) for i in range(len(sampleSizeList))])
    ROVEsObjStd = np.array([np.std(ROVEsObjList[i][0][0]) for i in range(len(sampleSizeList))])
    ROVEsObjError = error(ROVEsObjStd, numReplicates, confidenceLevel)
    baggingObjAvg = []
    baggingObjStd = []
    baggingObjError = []
    if len(baggingObjList) > 0:
        baggingObjAvg = np.array([np.mean(baggingObjList[i]) for i in range(len(sampleSizeList))])
        baggingObjStd = np.array([np.std(baggingObjList[i]) for i in range(len(sampleSizeList))])
        baggingObjError = error(baggingObjStd, numReplicates, confidenceLevel)
    
    if yLogScale:
        globalMin = min(baseObjAvg.min(), ROVEObjAvg.min(), ROVEsObjAvg.min())
        lb = baseObjAvg - baseObjError
        globalMin = min(globalMin, lb[lb > 0].min())
        lb = ROVEObjAvg - ROVEObjError
        globalMin = min(globalMin, lb[lb > 0].min())
        lb = ROVEsObjAvg - ROVEsObjError
        globalMin = min(globalMin, lb[lb > 0].min())
        if len(MoVEObjAvg) > 0:
            globalMin = min(globalMin, MoVEObjAvg.min())
            lb = MoVEObjAvg - MoVEObjError
            globalMin = min(globalMin, lb[lb > 0].min())
        if len(baggingObjAvg) > 0:
            globalMin = min(globalMin, baggingObjAvg.min())
            lb = baggingObjAvg - baggingObjError
            globalMin = min(globalMin, lb[lb > 0].min())
        globalMin /= 2
    else:
        globalMin = 0
    
    fig, ax = plt.subplots()
    markersize = None
    ax.errorbar(sampleSizeList, baseObjAvg, yerr = [baseObjAvg - np.maximum(globalMin, baseObjAvg - baseObjError), baseObjError], marker = 'o', markersize = markersize, capsize = 5, color = default_colors[0], linestyle = '-', label = 'base')
    if len(MoVEObjAvg) > 0:
        ax.errorbar(np.array(sampleSizeList) * 1.03, MoVEObjAvg, yerr = [MoVEObjAvg - np.maximum(globalMin, MoVEObjAvg - MoVEObjError), MoVEObjError], marker = 's', markersize = markersize, capsize = 5, color = default_colors[1], linestyle = '--', label = MoVE.__name__)
    ax.errorbar(np.array(sampleSizeList) * 1.03, ROVEObjAvg, yerr = [ROVEObjAvg - np.maximum(globalMin, ROVEObjAvg - ROVEObjError), ROVEObjError], marker = 's', markersize = markersize, capsize = 5, color = default_colors[2], linestyle = '--', label = ROVE.__name__)
    ax.errorbar(np.array(sampleSizeList) * 1.06, ROVEsObjAvg, yerr = [ROVEsObjAvg - np.maximum(globalMin, ROVEsObjAvg - ROVEsObjError), ROVEsObjError], marker = 's', markersize = markersize, capsize = 5, color = default_colors[3], linestyle = '--', label = f"{ROVE.__name__}s")
    if len(baggingObjAvg) > 0:
        ax.errorbar(np.array(sampleSizeList) * 1.09, baggingObjAvg, yerr = [baggingObjAvg - np.maximum(globalMin, baggingObjAvg - baggingObjError), baggingObjError], marker = 's', markersize = markersize, capsize = 5, color = default_colors[4], linestyle = '-.', label = 'Bagging')

    ax.set_xlabel('sample size', size = 16)
    ax.set_ylabel('cost', size = 16)
    if xLogScale:
        ax.set_xscale('log')
    if yLogScale:
        ax.set_yscale('log')
    ax.tick_params(axis='x', labelsize=14)
    ax.tick_params(axis='y', labelsize=14)
    ax.grid()
    # ax.xaxis.set_major_locator(ticker.MaxNLocator(nbins=5))
    ax.legend(loc='upper center', bbox_to_anchor=(0.5, 1.15), ncol=3 + (len(baggingObjAvg) > 0), fontsize = 14, frameon = False)
    
    fig.tight_layout(rect=[0, 0, 1, 0.95])
    os.makedirs(os.path.dirname(filePath), exist_ok = True)
    fig.savefig(filePath, dpi = 500)

def plotCDF(baseObjList: List, 
            MoVEObjList: List,
            ROVEObjList: List, 
            ROVEsObjList: List, 
            baggingObjList: List,
            filePath: str,
            xLogScale: bool = False,
            yLogScale: bool = True):
    fig, ax = plt.subplots()

    def getCDF(sequence):
        xList = []
        yList = []
        for num in sorted(sequence):
            if len(xList) == 0:
                xList.append(num)
                yList.append(1 / len(sequence))
            elif num > xList[-1]:
                xList.append(num)
                yList.append(yList[-1] + 1 / len(sequence))
            else:
                yList[-1] += 1 / len(sequence)
        
        tailList = []
        for i in range(len(yList)):
            if i == 0:
                tailList.append(1)
            else:
                tailList.append(1 - yList[i - 1])

        return xList, tailList

    xList, yList = getCDF(baseObjList)
    ax.plot(xList, yList, color = default_colors[0], linestyle = '-', label = 'base', linewidth = 2)
    if len(MoVEObjList) > 0 and len(MoVEObjList[0]) > 0 and len(MoVEObjList[0][0]) > 0:
        xList, yList = getCDF(MoVEObjList[0][0])
        ax.plot(xList, yList, color = default_colors[1], linestyle = '-.', label = MoVE.__name__, linewidth = 2)
    xList, yList = getCDF(ROVEObjList[0][0])
    ax.plot(xList, yList, color = default_colors[2], linestyle = '--', label = ROVE.__name__, linewidth = 2)
    xList, yList = getCDF(ROVEsObjList[0][0])
    ax.plot(xList, yList, color = default_colors[3], linestyle = '--', label = f"{ROVE.__name__}s", linewidth = 2)
    if len(baggingObjList) > 0:
        xList, yList = getCDF(baggingObjList)
        ax.plot(xList, yList, color = default_colors[4], linestyle = '-.', label = 'Bagging', linewidth = 2)
    
    ax.set_xlabel('cost', size = 16)
    ax.set_ylabel('tail prob.', size = 16)
    if xLogScale:
        ax.set_xscale('log')
    if yLogScale:
        ax.set_yscale('log')
    ax.tick_params(axis='x', labelsize=14)
    ax.tick_params(axis='y', labelsize=14)
    ax.grid()
    ax.legend(loc='upper center', bbox_to_anchor=(0.5, 1.15), ncol=3 + (len(baggingObjList) > 0), fontsize = 14, frameon = False)
    # # Create a legend using the first subplot
    # handles, labels = ax.get_legend_handles_labels()

    # # Place the combined legend outside the subplots
    # fig.legend(handles, labels, loc = 'upper center', bbox_to_anchor = (0.5, 0.95), fontsize = 'small')
    fig.tight_layout(rect=[0, 0, 1, 0.95])
    os.makedirs(os.path.dirname(filePath), exist_ok = True)
    fig.savefig(filePath, dpi = 500)

In [6]:
def plotSyntheticExp(expDir: str):
    expName = os.path.basename(expDir)
    resultPath = os.path.join(expDir, "evalResults.pkl")
    (
        baseObjList, 
        MoVEObjList, 
        ROVEObjList, 
        ROVEsObjList, 
        baseObjAvg, 
        MoVEObjAvg, 
        ROVEObjAvg, 
        ROVEsObjAvg,
        sampleSizeList, 
        kList, 
        BList, 
        k12List, 
        B12List, 
        numReplicates,
    ) = loadResults(resultPath)
    baggingObjAvg = []
    baggingObjList = []
    
    # if expDir.endswith("NN_d30_l4_ES_WB"):
    if False:
        fileList = [
            "1024_2048.pkl",
            "4096_8192.pkl",
            "16384_32768.pkl",
            "65536.pkl",
        ]
        data = {}
        for file in fileList:
            with open(os.path.join(expDir, file), "rb") as f:
                data.update(pickle.load(f))
        
        for size in sorted(data):
            valueDict = data[size]
            baggingObjList.append([valueDict[idx] for idx in sorted(valueDict)])
            baggingObjAvg.append(np.mean(baggingObjList[-1]))

    if len(baggingObjAvg) > 0 and len(baggingObjAvg) != len(sampleSizeList):
        raise ValueError("incorrect size for bagging objective avg list")
    if len(baggingObjList) > 0 and len(baggingObjList) != len(sampleSizeList):
        raise ValueError("incorrect size for bagging objective list")
    plotAvgWithError(
        baseObjList,
        MoVEObjList,
        ROVEObjList,
        ROVEsObjList,
        baggingObjList,
        numReplicates,
        0.95,
        sampleSizeList,
        os.path.join(expDir, f"{expName}_plots/{expName}_avg.png"),
        yLogScale=False
    )
    plotAvgWithError(
        baseObjList,
        MoVEObjList,
        ROVEObjList,
        ROVEsObjList,
        baggingObjList,
        numReplicates,
        0.95,
        sampleSizeList,
        os.path.join(expDir, f"{expName}_plots/{expName}_avg_ylog.png"),
        yLogScale=True
    )

    # if expDir.endswith("NN_d30_l4_ES_WB"):
    for i in range(len(sampleSizeList)):
        plotCDF(
            baseObjList[i],
            MoVEObjList[i],
            ROVEObjList[i],
            ROVEsObjList[i],
            baggingObjList[i] if len(baggingObjList) > 0 else [],
            os.path.join(expDir, f"{expName}_plots/{expName}_cdf_{int(np.log2(sampleSizeList[i]))}.png"),
            xLogScale = False,
            yLogScale = True
        )
        plotCDF(
            baseObjList[i],
            MoVEObjList[i],
            ROVEObjList[i],
            ROVEsObjList[i],
            baggingObjList[i] if len(baggingObjList) > 0 else [],
            os.path.join(expDir, f"{expName}_plots/{expName}_cdf_{int(np.log2(sampleSizeList[i]))}_xlog.png"),
            xLogScale = True,
            yLogScale = True
        )

In [4]:
def plotRealExp(expDir: str):
    expName = os.path.basename(expDir)
    resultPath = os.path.join(expDir, "evalResults.pkl")
    (
        baseObjList, 
        MoVEObjList, 
        ROVEObjList, 
        ROVEsObjList, 
        baseObjAvg, 
        MoVEObjAvg, 
        ROVEObjAvg, 
        ROVEsObjAvg,
        sampleSizeList, 
        kList, 
        BList, 
        k12List, 
        B12List, 
        numReplicates,
    ) = loadResults(resultPath)
    baggingObjList = []
    
    plotCDF(
        baseObjList[0],
        MoVEObjList[0],
        ROVEObjList[0],
        ROVEsObjList[0],
        baggingObjList[0] if len(baggingObjList) > 0 else [],
        os.path.join(expDir, f"{expName}_plots/{expName}_cdf.png"),
        xLogScale = False,
        yLogScale = True
    )
    plotCDF(
        baseObjList[0],
        MoVEObjList[0],
        ROVEObjList[0],
        ROVEsObjList[0],
        baggingObjList[0] if len(baggingObjList) > 0 else [],
        os.path.join(expDir, f"{expName}_plots/{expName}_cdf_xlog.png"),
        xLogScale = True,
        yLogScale = True
    )

In [125]:
masterDir = "/home/hqian/ResearchProjects/BaggingSAA/ExpData/NNData/RealData"
expDirList = os.listdir(masterDir)
for expDir in expDirList:
    plotRealExp(os.path.join(masterDir, expDir))

In [6]:
masterDir = "/home/hqian/ResearchProjects/BaggingSAA/ExpData/NNData/SyntheticData"
expDirList = os.listdir(masterDir)
for expDir in expDirList:
    plotSyntheticExp(os.path.join(masterDir, expDir))

In [7]:
masterDir = "/home/hqian/ResearchProjects/BaggingSAA/ExpData/LRData"
expDirList = os.listdir(masterDir)
for expDir in expDirList:
    plotSyntheticExp(os.path.join(masterDir, expDir))

  fig, ax = plt.subplots()


In [8]:
plotSyntheticExp("/home/hqian/ResearchProjects/BaggingSAA/ExpData/Port_d10")
plotSyntheticExp("/home/hqian/ResearchProjects/BaggingSAA/ExpData/Port_d50")