In [1]:
import plotly as py
import plotly.graph_objs as go
import ipywidgets as widgets
import numpy as np
import pandas as pd

from math import log
from statistics import geometric_mean
import csv
import itertools

py.offline.init_notebook_mode(connected=True)

In [2]:
MIN_NGS_ADDED = 1
BASE = "base"
BASE_HEUR = "base-heur"
TIMEOUT = 900
PROCESSING = "first"

LIMS = {}
LIMS["ground"] = (20, 900)
LIMS["time"] = (0.1, TIMEOUT*1.5)
LIMS["ctime"] = (0.1, TIMEOUT*1.5)


LIMS["csolve"] = (0.1, 900)
LIMS["mem"] = (5_000, 16_800_000)
LIMS["choices"] = (1, 1_000_000_000)
LIMS["conflicts"] = (1, 1_000_000_000)

file_names = {"time": "horizons/results-time.csv",
              "choices": "horizons/results-choices.csv",
              "conflicts": "horizons/results-conflicts.csv",
              "status": "horizons/results-status.csv"
             }
              

In [3]:
def get_dom(instance):
	inst = instance.split("/")

	if "ipc-" in instance:
		#return inst[-4] + "-" + inst[-2]
		return "rintanen"

	if "hard" in instance:
		#return inst[-4] + "-" + inst[-3]
		return "hard"
		
	elif "easy" in instance:
		#return inst[-4] + "-" + inst[-3]
		return "easy"
	
	elif "instance" in instance:
		return inst[-3]
	
	elif "asp" in instance:
		return inst[-2]

	raise RuntimeError(f"No dom for instances {instance}")

In [4]:
def read_data(fname, stat_name="value"):
    data = {}
    with open(fname, "r") as _f:
        scalings = []
        
        config_col = []
        instance_col = []
        scaling_col = []
        domain_col = []
        value_col = []
        horizon_col = []
        
        for line in csv.reader(_f, delimiter=","):
            # parse scalings, should happen at the very start
            if line[0] == "scalings":
                scalings = [int(val) for val in line[1:]]
                
            # parse a new config
            # config is at the moment the same, just different horizon
            # so horizon is extracted from it
            elif len(line) == 1 and "learning-" in line[0]:
                # split first element by - and get the last one which should be the horizon number
                split = line[0].split("-")
                horizon = int(split[-1])
                config = "-".join(split[:-1])

            else:
                domain = line[0].split("/")[0]
                instance = line[0].split("/")[1]
                
                if len(line[0].split("/")) != 2:
                    print("len not 2? instance has / in it!)", instance)
                    print("breaking...")
                    break
                
                #TODO: add row to df with instance, scaling, domain, value of stat
                
                for scaling, val in itertools.zip_longest(scalings, line[1:], fillvalue=""):
                    config_col.append(config)
                    instance_col.append(instance)
                    scaling_col.append(scaling)
                    domain_col.append(domain)
                    value_col.append(val)
                    horizon_col.append(horizon)
    
    data = pd.DataFrame()
    data["config"] = config_col
    data["instance"] = instance_col
    data["scaling"] = scaling_col
    data["domain"] = domain_col
    data[stat_name] = value_col
    data["horizon"] = horizon_col
                
    return data

In [5]:
def plotly_scatter_horizons(data, horizons, stat, scaling, status, sorted_by):

    print(horizons)

    # get only the wanted configs
    pattern = "|".join(sorted_by)
    print(pattern)
    data = data[data["config"].str.contains(pattern, case=False)]
    
    # get only wanted status
    data = data[data["status"].isin(status)]
    
    # get only wanted horizons
    data = data[(data["horizon"] >=horizons[0]) & (data["horizon"] <=horizons[1]) ]
    
    # get only wanted scaling
    # Can not be multiple because of how we make sure we have a the correct scaling vs scaling 0
    # Since we sort, if we have multiple scalings then the sort will not align the scalings with the base
    # can only have 1 scaling for the sort to work
    # maybe there is a work around it?
    df = data[data["scaling"] == scaling]
    
    base_df = data[data["scaling"] == 0]
    
    layout = go.Layout(
        title=dict(
            text=f"Scatter plot - horizons {horizons[0]} - {horizons[-1]} <br> Scaling of {scaling} nogoods <br> {len(df.index)} instances",
            y=0.9,
            x=0.5,
            xanchor="center",
            yanchor="top"
        ),
        yaxis=dict(
            title=f"{stat} with added nogoods",
            type="log",
            range=[log(v,10) for v in LIMS[stat]]
        ),
        xaxis=dict(
            title=f"{stat} with base",
            type="log",
            range=[log(v,10) for v in LIMS[stat]]
        ),
        width=1400,
        height=1000,
    )
    
    
    df_grouped = df.groupby("domain")
    base_df_grouped = base_df.groupby("domain")

    data = []
    for dom in sorted(set(df["domain"])):
        """
        df = grouped.get_group(dom)
        base_gmean = geometric_mean([float(v)+0.1 for v in df['x']])
        base_avg = df['x'].mean()
        config_gmean = geometric_mean([float(v)+0.1 for v in df['y']])
        config_avg = df['y'].mean()
        ng_avg = df["ngadded"].mean()
        print(f"{stat:10}  :  {dom:50}  :  {len(df.index):4}  :  base : {base_gmean:12.3f} : base avg  : {base_avg:10.2f} config: {config_gmean:12.3f} : config avg  : {config_avg:10.2f}  :  avg ng added: {ng_avg:12.2f}")
        """
        
        df_grouped_dom = df_grouped.get_group(dom)
        base_df_grouped_dom = base_df_grouped.get_group(dom)
        
        trace = go.Scatter(
            x=base_df_grouped_dom.sort_values(by=["config", "instance", "horizon"])[stat],
            y=df_grouped_dom.sort_values(by=["config", "instance", "horizon"])[stat],
            mode="markers",
            name=f"{dom}",   
        )
        
        data.append(trace)
        
    fig = go.Figure(data=data, layout=layout)

    if stat in LIMS:
        limits = LIMS[stat]
        fig.add_shape(type="line",x0=0,y0=0, x1=limits[1],y1=limits[1],
                line=dict(color='Red'),
                xref='x',
                yref='y')

    py.offline.iplot(fig)

In [6]:
DATAFRAMES = {}
FRAME = read_data(file_names["time"], "time")
FRAME.sort_values(by=["config", "domain", "instance", "horizon", "scaling"], ignore_index=True, inplace=True)
for stat in file_names.keys():
    DATAFRAMES[stat] = read_data(file_names[stat], stat)
    DATAFRAMES[stat].sort_values(by=["config","domain", "instance", "horizon", "scaling"], ignore_index=True, inplace=True)
    FRAME[stat] = DATAFRAMES[stat][stat]
    
FRAME.head(5)

# do the sorting and then add all stats to ONE frame!!

Unnamed: 0,config,instance,scaling,domain,time,horizon,choices,conflicts,status
0,learning-hor-hor-lbd,instance-22.lp,0,blocks-strips-typed,0.474,5,1,0,UNSATISFIABLE
1,learning-hor-hor-lbd,instance-22.lp,500,blocks-strips-typed,0.462,5,1,0,UNSATISFIABLE
2,learning-hor-hor-lbd,instance-22.lp,1000,blocks-strips-typed,0.477,5,1,0,UNSATISFIABLE
3,learning-hor-hor-lbd,instance-22.lp,1500,blocks-strips-typed,0.472,5,1,0,UNSATISFIABLE
4,learning-hor-hor-lbd,instance-22.lp,0,blocks-strips-typed,0.545,10,1,0,UNSATISFIABLE


In [7]:
layout = widgets.Layout(width='400px')

min_ngs = widgets.BoundedIntText(min=0, max=1000, value=1, step=10, description="Minimum nogoods added")

stats = [s for s in DATAFRAMES.keys()]
stat_widget = widgets.Select(
    options=stats,
    value=stats[0],
    description="Stat"
)


scalings = DATAFRAMES[list(DATAFRAMES.keys())[0]]["scaling"].unique()
scalings = sorted(scalings)[1:]
scaling_widget = widgets.Select(
    options=scalings,
    value=scalings[0],
    description="Scaling"
)

status_widget = widgets.SelectMultiple(
    options=["SATISFIABLE", "UNKNOWN", "UNSATISFIABLE"],
    value=["SATISFIABLE", "UNSATISFIABLE"],
    description="Status"
)

sorted_by_widget = widgets.SelectMultiple(
    options=["lbd", "size"],
    value=["lbd", "size"],
    description="Sorted by"
)

horizons = DATAFRAMES[list(DATAFRAMES.keys())[0]]["horizon"].unique()
horizons = [(int(h), int(h)) for h in sorted(horizons)]
horizon_widget = widgets.SelectionRangeSlider(
    options=horizons,
    index=(0, len(horizons)-1),
    description='Horizons',
)


widgets.interactive(lambda stat, scaling, horizons, status, sorted_by: plotly_scatter_horizons(FRAME, horizons, stat, scaling, status, sorted_by), \
                    horizons=horizon_widget, stat=stat_widget, scaling=scaling_widget, status=status_widget, sorted_by=sorted_by_widget)


interactive(children=(Select(description='Stat', options=('time', 'choices', 'conflicts', 'status'), value='ti…