In [1]:
from itertools import cycle

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import plotly
import plotly.graph_objects as go
from plotly.subplots import make_subplots

from tqdm.notebook import tqdm

from CP import *
from INIT import *
from Logger import *
from QRF import QRF
from QR import QR
from Load import init as init_real
from Load import *
from L2MVE import L2MVE
from MV import MV

plotly.offline.init_notebook_mode(connected=True)
plt.rcParams['text.usetex'] = True

KeyboardInterrupt: 

In [None]:
dim = 64
drop = 0.2
batch = 64
epochs = 300
layers = 3
rate = 5e-4
decay = 1e-6
target = "cpu"
act = "ReLU"

seeds = [13, 2, 47, 1, 15, 31, 89, 666, 3, 43, 5197, 558213, 4236, 410, 74888, 1563, 1794, 711489, 156874, 123, \
    744741,442262,53750,550509,751836,73427,316551,321489,264450,691340, \
    256212,248195,322953,469088,515349,717046,904096,600809,566875,335409, \
    128274,11299,40740,135231,78276,268373,148066,569507,212406,584182]
seeds = seeds[:10]

alpha = 0.1
var_bins = 3
var_dim = 0
var_control = 1

plot_clusters = False

datasource = "mean"
feature_choice = "uniform"
sampler = ConditionalNormal(*getStatistics(datasource, var_control))
mean_func, var_func = sampler.getFunctions()
features = 10
model_params = {"act": act, "hidden_dim": dim, "drop": drop, "rate": rate, "epochs": epochs, "hidden_layers": layers, "batch": batch, "verbose": False, "decay": decay}
data_params = {"size": 1000, "dim": features, "lambda": var_control, "var_dim": var_dim, "high": 10, "low": 0}
extra = "high_coupling" if data_params["lambda"] >= 1 else "low_coupling"

# datasource = "crime2"
# feature_choice = None

if feature_choice:
    datasource += "_" + sampler.description() + "_" + extra
    folder = "./PLOTS/" + str(data_params["size"]) + "/" + feature_choice + "/DIM" + str(features) + "/"
else:
    folder = "./PLOTS/REAL/"

with open(folder + datasource + "-settings.txt", "w") as file:
    file.write("Data: " + str(data_params))
    file.write("\n\n")
    file.write("Model: " + str(model_params))

In [None]:
def routine(datasource, feature_choice, seed, data_params, model_params):

     if feature_choice:
          data = init_real("synth" + datasource, seed = seed, to_torch = True)
     else:
          data = init_real(datasource, seed = seed, to_torch = True)


     logger = Logger(data, folder, datasource, seed, var_bins = var_bins, alpha = alpha, synthetic = feature_choice != None)


     qr = QR(input = data["X_train"].shape[1], alpha = alpha, params = model_params)
     qr.train(data["X_train"], data["y_train"], MinibatchSampler(batch), target = target, epochs = epochs)
     qr2 = QR(input = data["X_train"].shape[1], alpha = 2 * alpha, params = model_params)
     qr2.train(data["X_train"], data["y_train"], MinibatchSampler(batch), target = target, epochs = epochs)

     qrf = QRF(alpha = alpha, params = model_params)
     qrf.train(data["X_train"], data["y_train"], MinibatchSampler(batch), target = target)
     qrf2 = QRF(alpha = 2 * alpha, params = model_params)
     qrf2.train(data["X_train"], data["y_train"], MinibatchSampler(batch), target = target)

     mv = MV(input = data["X_train"].shape[1], params = model_params)
     mv.train(data["X_train"], data["y_train"], MinibatchSampler(batch), target = target, epochs = epochs)
     
     kg = L2MVE(input = data["X_train"].shape[1], params = model_params)
     kg.train(data["X_train"], data["y_train"], MinibatchSampler(batch), target = target, epochs = epochs)

     logger.register(qr, "QR*")
     logger.register(qr2, "QR")
     logger.register(qrf, "QRF*")
     logger.register(qrf2, "QRF")

     logger.register(mv)
     logger.register(kg)
     
     logger.run()
     return logger

In [None]:
r2s_marginal = []
coverages_marginal = []
widths_marginal = []
columns_marginal = None

r2s_conditional = []
coverages_conditional = []
widths_conditional = []
columns_conditional = []

ks_names = None
ks = []
    
for i, s in enumerate(tqdm(seeds)):
    logger = routine(datasource, feature_choice, s, data_params, model_params)
    
    result = logger.to_array()
    result_conditional = logger.to_array_conditional()

    r2s_marginal.append(result[0])
    coverages_marginal.append(result[1])
    widths_marginal.append(result[2])
    columns_marginal = result[3]
    
    r2s_conditional.append(result_conditional[0])
    coverages_conditional.append(result_conditional[1])
    widths_conditional.append(result_conditional[2])
    columns_conditional = result_conditional[3]

    ks_values, ks_names = logger.run_KS()
    ks.append(ks_values)

r2s_marginal = np.stack(r2s_marginal, axis = 0)
coverages_marginal = np.stack(coverages_marginal, axis = 0)
widths_marginal = np.stack(widths_marginal, axis = 0)

r2s_conditional = np.stack(r2s_conditional, axis = 0)
coverages_conditional = np.stack(coverages_conditional, axis = 0)
widths_conditional = np.stack(widths_conditional, axis = 0)
ks = np.stack(ks, axis = 0)

  0%|          | 0/10 [00:00<?, ?it/s]

In [None]:
mergers = ["QR", "QRF", "ClusterQR"]

index = []
for i, name in enumerate(columns_marginal):
    if name in [n + "*-base" for n in mergers] or(not "*" in name and name not in [n + "-base" for n in mergers]):
        index.append(i)

columns = [columns_marginal[i].replace("*", "") for i in index]
columntext = []
for c in columns:
    text = c.split("-")
    columntext.append((text[0], text[1]))
r2s = r2s_marginal[:, index]
coverages = coverages_marginal[:, index]
widths = widths_marginal[:, index]

r2_data = pd.DataFrame(r2s, columns = columns)
cov_data = pd.DataFrame(coverages, columns = columns)
width_data = pd.DataFrame(widths, columns = columns)

bg_colors = cycle(plotly.colors.qualitative.Pastel2)

fig = go.Figure(layout = go.Layout(height = 750), layout_yaxis_range = [np.floor(np.min(coverages) * 10) / 10, np.ceil(np.max(coverages) * 10) / 10])
fig2 = go.Figure(layout = go.Layout(height = 750))

for i, model in enumerate(columns):
    if i % 4 == 0:
        colors = colors = cycle(plotly.colors.DEFAULT_PLOTLY_COLORS)
    color = next(colors)

    fig.add_trace(go.Violin(y = cov_data[model], x0 = i, spanmode = "hard", showlegend = False, line_color = color))
    fig2.add_trace(go.Violin(y = width_data[model], x0 = i, spanmode = "hard", showlegend = False, line_color = color))

    if i % 4 == 0:
        bg_color = next(bg_colors)
        fig.add_vrect(x0 = i-0.5, x1 = i+3.5, fillcolor = bg_color, layer = "below", line_width = 0, opacity = 0.5, name = model)
        fig.add_trace(go.Scatter(x=[None], y=[None], mode="markers", name = columntext[i][0], marker = dict(size = 7, color = bg_color, symbol = 'square')))
        fig2.add_vrect(x0 = i-0.5, x1 = i+3.5, fillcolor = bg_color, layer = "below", line_width = 0, opacity = 0.5)
        fig2.add_trace(go.Scatter(x=[None], y=[None], mode="markers", name = columntext[i][0], marker = dict(size = 7, color = bg_color, symbol = 'square')))
        
fig.add_trace(go.Scatter(x = [-.5, len(columns)-.5], y = [1 - alpha, 1 - alpha], line = dict(dash = 'dot', color = "red"), name = "Target coverage", marker = {"opacity": 0}, showlegend = False))

text = [""]
text.extend([c[1] for c in columntext])
text.append("")
vals = [-.5]
vals.extend(range(len(columns)))
vals.append(len(cov_data.columns) - 0.5)

fig.update_layout(width = 750, margin = dict(l = 0, r = 0, t = 70, b = 0), title = r'$\text{PI coverage (}\alpha=' + str(alpha) + r'\text{)}$')
fig2.update_layout(width = 750, margin = dict(l = 0, r = 0, t = 70, b = 0), title = r'$\text{PI width (}\alpha=' + str(alpha) + r'\text{)}$')
fig.update_layout(xaxis = {"range": [vals[0], vals[-1]], "tickangle": 45, "tickvals": vals, "ticktext": text})
fig2.update_layout(xaxis = {"range": [vals[0], vals[-1]], "tickangle": 45, "tickvals": vals, "ticktext": text})

fig.write_image(folder + datasource + "_covs.svg", scale = 5)
fig.write_image(folder + datasource + "_covs.png", scale = 5)
fig2.write_image(folder + datasource + "_widths.svg", scale = 5)
fig2.write_image(folder + datasource + "_widths.png", scale = 5)
fig.show()
fig2.show()

colors = cycle(plotly.colors.DEFAULT_PLOTLY_COLORS)
fig = go.Figure(layout = go.Layout(height = 750))
for i, model in enumerate(columns):
    if i % 4 == 0:
        color = next(colors)
        fig.add_trace(go.Scatter(x=[None], y=[None], mode="markers", name = model.split("-")[0], marker = dict(size = 7, color = color, symbol = 'square')))
        fig.add_trace(go.Violin(y = r2_data[model], x0 = i, spanmode = "hard", showlegend = False, line_color = color))
fig.update_layout(width = 750, margin = dict(l = 0, r = 0, t = 70, b = 0), title = r'$R^2\text{-value}$')
fig.update_layout(xaxis = {"range": [vals[1:-1:4][0]-2, vals[1:-1:4][-1]+2], "tickangle": 45, "tickvals": vals[1:-1:4], "ticktext": text[1:-1:4]})

fig.write_image(folder + datasource + "_r2s.svg", scale = 5)
fig.write_image(folder + datasource + "_r2s.png", scale = 5)
fig.show()

In [None]:
conditional_scores = 7

fig = make_subplots(rows = var_bins, cols = 1, subplot_titles = ["Low variance", "Medium variance", "High variance"])
fig2 = make_subplots(rows = var_bins, cols = 1, subplot_titles = ["Low variance", "Medium variance", "High variance"])
fig3 = make_subplots(rows = var_bins, cols = 1, subplot_titles = ["Low variance", "Medium variance", "High variance"])
fig_ks = make_subplots(rows = var_bins, cols = 1, subplot_titles = ["Low - Medium", "Medium - High", "Low - High"])

index = []
for j, name in enumerate(columns_conditional):
    if name in [n + "*-base" for n in mergers] or(not "*" in name and name not in [n + "-base" for n in mergers]):
        index.append(j)

columns = [columns_conditional[i].replace("*", "") for i in index]
columntext = []
for c in columns:
    text = c.split("-")
    columntext.append((text[0], text[1]))

r2s = r2s_conditional[:, index, :]
coverages = coverages_conditional[:, index, :]
widths = widths_conditional[:, index, :]

text = [""]
text.extend([c[1] for c in columntext])
text.append("")
vals = [-.5]
vals.extend(range(len(columns)))
vals.append(len(columns) - 0.5)

for i in range(coverages.shape[-1]):

    bg_colors = cycle(plotly.colors.qualitative.Pastel2)

    r2_data = pd.DataFrame(r2s[:, :, i], columns = columns)
    cov_data = pd.DataFrame(coverages[:, :, i], columns = columns)
    width_data = pd.DataFrame(widths[:, :, i], columns = columns)

    for j, model in enumerate(columns):

        if j % conditional_scores == 0:
            colors = cycle(plotly.colors.DEFAULT_PLOTLY_COLORS)
        color = next(colors)

        fig.add_trace(go.Violin(y = cov_data[model], x0 = j, spanmode = "hard", line_color = color, showlegend = False), row = i + 1, col = 1)
        fig2.add_trace(go.Violin(y = width_data[model], x0 = j, spanmode = "hard", line_color = color, showlegend = False), row = i + 1, col = 1)
        fig3.add_trace(go.Violin(y = r2_data[model], x0 = j, spanmode = "hard", line_color = color, showlegend = False), row = i + 1, col = 1)

        if j % conditional_scores == 0:
            bg_color = next(bg_colors)
            fig.add_vrect(x0 = j-0.5, x1 = j+(conditional_scores-0.5), fillcolor = bg_color, layer = "below", line_width = 0, opacity = 0.5, row = i + 1, col = 1)
            fig2.add_vrect(x0 = j-0.5, x1 = j+(conditional_scores-0.5), fillcolor = bg_color, layer = "below", line_width = 0, opacity = 0.5, row = i + 1, col = 1)
            fig3.add_vrect(x0 = j-0.5, x1 = j+(conditional_scores-0.5), fillcolor = bg_color, layer = "below", line_width = 0, opacity = 0.5, row = i + 1, col = 1)

            if i == 0:
                fig.add_trace(go.Scatter(x=[None], y=[None], mode="markers", name = columntext[j][0], marker = dict(size = 7, color = bg_color, symbol = 'square')), row = 1, col = 1)
                fig2.add_trace(go.Scatter(x=[None], y=[None], mode="markers", name = columntext[j][0], marker = dict(size = 7, color = bg_color, symbol = 'square')), row = 1, col = 1)
                fig3.add_trace(go.Scatter(x=[None], y=[None], mode="markers", name = columntext[j][0], marker = dict(size = 7, color = bg_color, symbol = 'square')), row = 1, col = 1)

    fig.add_trace(go.Scatter(x = [-.5, len(columns)-.5], y = [1 - alpha, 1 - alpha], line = dict(dash = 'dot', color = "red"), name = "Target coverage", marker = {"opacity": 0}, showlegend = False), row = i+1, col = 1)

figparams = {"yaxis" + str(i+1): {"range": [np.floor(np.min(coverages[:, :, i]) * 10) / 10, np.ceil(np.max(coverages[:, :, i]) * 10) / 10]} for i in range(var_bins)}
figparams.update({"xaxis" + str(i+1): {"range": [vals[0], vals[-1]], "tickvals": vals, "ticktext": text, "showticklabels": False} for i in range(var_bins-1)})
figparams.update({"xaxis" + str(var_bins): {"tickangle": 45, "range": [vals[0], vals[-1]], "tickvals": vals, "ticktext": text}})
fig.update_layout(figparams)
fig.update_layout(width = 1000, margin = dict(l = 0, r = 0, t = 70, b = 0), title = r'$\text{Conditional PI coverage (}\alpha=' + str(alpha) + r'\text{)}$')

fig.write_image(folder + datasource + "_conditional_coverage-" + str(alpha) + ".svg", scale = 3)
fig.write_image(folder + datasource + "_conditional_coverage-" + str(alpha) + ".png", scale = 3)
fig.show()

figparams = {"xaxis" + str(i+1): {"range": [vals[0], vals[-1]], "tickvals": vals, "ticktext": text, "showticklabels": False} for i in range(var_bins-1)}
figparams.update({"xaxis" + str(var_bins): {"tickangle": 45, "range": [vals[0], vals[-1]], "tickvals": vals, "ticktext": text}})
fig2.update_layout(figparams)
fig2.update_layout(width = 1000, margin = dict(l = 0, r = 0, t = 70, b = 0), title = r'$\text{Conditional PI width (}\alpha=' + str(alpha) + r'\text{)}$')

fig2.write_image(folder + datasource + "_conditional_widths-" + str(alpha) + ".svg", scale = 3)
fig2.write_image(folder + datasource + "_conditional_widths-" + str(alpha) + ".png", scale = 3)
fig2.show()

fig3.update_layout(figparams)
fig3.update_layout(width = 1000, margin = dict(l = 0, r = 0, t = 70, b = 0), title = r'$\text{Conditional }R^2\text{-value}$')

fig3.write_image(folder + datasource + "_conditional_r2s-" + str(alpha) + ".svg", scale = 3)
fig3.write_image(folder + datasource + "_conditional_r2s-" + str(alpha) + ".png", scale = 3)
fig3.show()

index = []
for j, name in enumerate(ks_names):
    if name in [n + "*-base" for n in mergers] or(not "*" in name and name not in [n + "-base" for n in mergers]):
        index.append(j)

columns = [ks_names[i].replace("*", "") for i in index]
columntext = []
for c in columns:
    text = c.split("-")
    columntext.append((text[0], text[1]))

ks_ = ks[:, index, :]

text = [""]
text.extend([c[1] for c in columntext])
text.append("")
vals = [-.5]
vals.extend(range(len(columns)))
vals.append(len(columns) - 0.5)

groups = len(text) // 4

with open(folder + datasource + "_ks.txt", "w+") as file:

    for i in range(ks.shape[-1]-1):

        colors = cycle(plotly.colors.DEFAULT_PLOTLY_COLORS)
        bg_colors = cycle(plotly.colors.qualitative.Pastel2)
        ks_data = pd.DataFrame(ks_[:, :, i], columns = columns)

        for j, model in enumerate(columns):

            if j % 3 == 0:
                colors = cycle(plotly.colors.DEFAULT_PLOTLY_COLORS)

            color = next(colors)
            fig_ks.add_trace(go.Violin(y = ks_data[model], x0 = j, spanmode = "hard", line_color = color, showlegend = False), row = i+1, col = 1)

            if j % 3 == 0:
                color = next(bg_colors)
                fig_ks.add_vrect(x0 = j-0.5, x1 = j+(3-0.5), fillcolor = color, layer = "below", line_width = 0, opacity = 0.5)

                if i == 0:
                    fig_ks.add_trace(go.Scatter(x=[None], y=[None], mode="markers", name = columntext[j][0], marker = dict(size = 7, color = color, symbol = 'square')), row = i+1, col = 1)

            file.write(model + str(np.mean(ks_data[model] > 0.05)) + "\n")
    
    colors = cycle(plotly.colors.DEFAULT_PLOTLY_COLORS)
    bg_colors = cycle(plotly.colors.qualitative.Pastel2)
    ks_data = pd.DataFrame(ks_[:, :, -1], columns = columns)

    for j, model in enumerate(columns):

        if j % 3 == 0:
            colors = cycle(plotly.colors.DEFAULT_PLOTLY_COLORS)

        color = next(colors)
        fig_ks.add_trace(go.Violin(y = ks_data[model], x0 = j, spanmode = "hard", line_color = color, showlegend = False), row = ks.shape[-1], col = 1)

        if j % 3 == 0:
            color = next(bg_colors)
            fig_ks.add_vrect(x0 = j-0.5, x1 = j+(3-0.5), fillcolor = color, layer = "below", line_width = 0, opacity = 0.5)

        file.write(model + str(np.mean(ks_data[model] > 0.05)) + "\n")

figparams = {"yaxis" + str(i+1):{"range":[np.floor(np.min(ks[:, :, i]) * 10) / 10, np.ceil(np.max(ks[:, :, i]) * 10) / 10]} for i in range(var_bins-1)}
figparams.update({"xaxis" + str(i+1):{"range": [vals[0], vals[-1]], "tickvals": vals, "ticktext": text, "showticklabels": False} for i in range(var_bins-1)})
figparams.update({"xaxis" + str(var_bins): {"tickangle": 45, "range": [vals[0], vals[-1]], "tickvals": vals, "ticktext": text}})
fig_ks.update_layout(figparams)
fig_ks.update_layout(width = 300 * groups, margin = dict(l = 0, r = 0, t = 50, b = 0), title = r'$\text{KS statistic}$')

fig_ks.write_image(folder + datasource + "_ks.svg", scale = 3)
fig_ks.write_image(folder + datasource + "_ks.png", scale = 3)
fig_ks.show()