In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import pickle
import torch
from models import *

In [4]:
from sklearn.preprocessing import MinMaxScaler

dataset = pd.read_csv('expense_prediction.csv').iloc[:, 1:]
random.seed(1)
dataset = dataset.sample(n=1000)
y = np.array(dataset.iloc[:, -5:], dtype=np.float32)
X = dataset.iloc[:, 0:dataset.shape[1] - 5]
X['Household Head Sex'] = [1 if x == 'Male' else 0 for x in X['Household Head Sex']]
scaler = MinMaxScaler()
X[X.columns] = scaler.fit_transform(X[X.columns])
X = np.array(X, dtype=np.float32)
X = np.nan_to_num(X)

In [5]:
from sklearn.metrics import pairwise_distances

def infy_pairwise_distance(a, b):
    return max(abs(a - b))

pairwise_distances(X, X, metric=infy_pairwise_distance).mean()

0.7567898416526057

In [25]:
list(filter(lambda x: 'c' in x, ['cat', 'dog', 'cow']))

['cat', 'cow']

In [15]:
results_baseline

Unnamed: 0,fold,alpha,beta,mse train corrected,mse test corrected,mse train,mse test,violations train,violations test,runtime
0,1,0.49,0.49,35.43887,36.104168,27.141169,29.021078,1717,419,307.74808
1,2,0.49,0.49,28.851023,28.95071,23.054962,23.457022,1861,518,289.812781
2,3,0.49,0.49,29.933538,34.708954,24.165188,24.072674,1927,453,290.633487
3,4,0.49,0.49,33.53762,33.81579,26.210526,25.786938,2484,616,298.880504
4,5,0.49,0.49,31.505772,26.455896,24.514729,23.190573,2270,578,321.818637


In [18]:
folders = range(1, 6)
processed_baseline_results = []
for f in folders:
    results_baseline = pd.read_csv('{}/baseline_output.csv'.format(f)).iloc[:, 1:]
    for i in range(results_baseline.shape[0]):
        processed_baseline_results.append([f, i + 1, results_baseline.iloc[i, :]['mse test corrected'], 
                                           results_baseline.iloc[i, :]['violations test'],
                                          results_baseline.iloc[i, :]['runtime']])
processed_baseline_results = pd.DataFrame(processed_baseline_results)
processed_baseline_results.columns = ['exp num', 'fold', 'test mse', 'test violations', 'runtime']
processed_baseline_results['test violations'] = 1 - processed_baseline_results['test violations']/(0.2*X.shape[0])
processed_baseline_results

Unnamed: 0,exp num,fold,test mse,test violations,runtime
0,1,1,31.619476,0.957988,226.154778
1,1,2,34.02646,0.911751,161.598614
2,1,3,26.707281,0.945916,178.732382
3,1,4,41.539165,0.918874,163.32671
4,1,5,34.257244,0.921409,231.214278
5,2,1,33.1607,0.932878,326.237035
6,2,2,22.972904,0.954125,318.909791
7,2,3,20.042006,0.906922,306.087187
8,2,4,33.179405,0.954487,311.105004
9,2,5,27.86648,0.972837,335.978246


In [19]:
print(np.mean(processed_baseline_results['test mse']), 
      np.std(processed_baseline_results['test mse']),
      np.mean(processed_baseline_results['test violations']), 
      np.std(processed_baseline_results['test violations']),
      np.mean(processed_baseline_results['runtime']), 
      np.std(processed_baseline_results['runtime']))

30.50678116 6.946569139036114 0.9350411666706908 0.020153035838644615 227.3252305984497 70.98404593450617


In [6]:
def data_processing(data):
    data.dropna(inplace=True)
    data.reset_index(drop=True, inplace=True)
    categorical_features = list(data.select_dtypes(include='object').columns)
    categorical_features = list(set(categorical_features))
    numerical_features = [c for c in data.columns if c not in categorical_features]
    scaler = MinMaxScaler()
    data[numerical_features] = scaler.fit_transform(data[numerical_features])

    for _c in categorical_features:
        data[_c] = pd.Categorical(data[_c])
    df_transformed = pd.get_dummies(data, drop_first=True)
    return df_transformed, scaler


In [29]:
def get_adi(model, X, delta = 0.1):
    torch.onnx.export(model,                 # model being run
                  torch.tensor(X[:5]),       # model input (or a tuple for multiple inputs)
                  "baseline.onnx",           # where to save the model (can be a file or file-like object)
                  export_params=True,        # store the trained parameter weights inside the model file
                  opset_version=10,          # the ONNX version to export the model to
                  do_constant_folding=True,  # whether to execute constant folding for optimization
                  input_names = ['input'],   # the model's input names
                  output_names = ['output'], # the model's output names
                  dynamic_axes={'input' : {0 : 'batch_size'},    # variable length axes
                                'output' : {0 : 'batch_size'}})
    onnx_model = onnx.load("baseline.onnx")
    onnx.checker.check_model(onnx_model)
    
    adi = 0
    for instance in X:
        network = Marabou.read_onnx(filename="baseline.onnx")
        income = (instance[-1] - scaler.min_[-1]) / scaler.scale_[-1]
        inputVars = network.inputVars[0][0]
        outputVars = network.outputVars[0][0]
        ranges = [[0, 1]]*len(inputVars)
        
        for i in range(len(instance)):
            network.setLowerBound(inputVars[i], max(ranges[i][0], instance[i] - delta))
            network.setUpperBound(inputVars[i], min(ranges[i][1], instance[i] + delta))
        
        print(outputVars)
        network.addInequality([13, 14, 15, 16, 17], [1, 1, 1, 1, 1], income)
        options = Marabou.createOptions(verbosity = 2)
        vals = network.solve(options = options)
        
        if vals[0] == 'sat':
            adi += 1
    return adi/len(X)


In [None]:
import onnx
from maraboupy import Marabou

for e in folders:
    output = []
    for f in range(0, 5):
        model = torch.load(str(e) + '/baseline_model_fold_{}.pt'.format(f + 1))
        for delta in [0.1, 0.01]:
            adi = get_adi(model, X, delta=delta)
            output.append([e, f + 1, delta, adi])
    output = pd.DataFrame(output)
    output.columns = ['experiment', 'fold', 'delta', 'adi']
    output.to_csv('{}/adi.csv'.format(e))


Instructions for updating:
non-resource variables are not supported in the long term
[13 14 15 16 17]
sat
input 0 = 0.10000000298023223
input 1 = 0.0
input 2 = 0.0
input 3 = 0.0
input 4 = 0.004347825050353998
input 5 = 0.2333333432674408
input 6 = 0.9
input 7 = 0.1666666805744171
input 8 = 0.06666667163372039
input 9 = 0.9
input 10 = 0.10270270109176635
input 11 = 0.0
input 12 = 0.08049484491348266
output 0 = 7.694975485768444
output 1 = -0.6482752357201098
output 2 = -1.3776447719864426
output 3 = 3.0654909380336197
output 4 = -0.6429259687985285
[13 14 15 16 17]
sat
input 0 = 0.0
input 1 = 0.0
input 2 = 0.0
input 3 = 0.0
input 4 = 0.0
input 5 = 0.2333333432674408
input 6 = 0.03370328184019276
input 7 = 0.3000000059604645
input 8 = 0.06666667163372039
input 9 = 0.9
input 10 = 0.3324324429035187
input 11 = 0.1
input 12 = 0.0
output 0 = 5.569320632382849
output 1 = -0.5909043461873373
output 2 = -1.1303321553050063
output 3 = 2.0327780656132832
output 4 = -0.6159801429442924
[13 14 15 1

In [31]:
for delta in [0.1, 0.01]:
    print("adi for delta = {} is {} +/- {}".format(delta, np.mean(output[output.delta == delta].adi.tolist()), np.std(output[output.delta == delta].adi.tolist())))

adi for delta = 0.1 is 0.9772000000000001 +/- 0.003709447398198285
adi for delta = 0.01 is 0.732 +/- 0.03442673379802389


In [4]:
processed_deepsade_results = []
for f in folders:
    results_deepsade = pd.read_csv('{}/deepsade_output_ls.csv'.format(f)).iloc[:, 1:]
    for i in range(results_deepsade.shape[0]):
        processed_deepsade_results.append([f, i + 1, results_deepsade.iloc[i, :]['mse test corrected'], 
                                           results_deepsade.iloc[i, :]['violations test'],
                                          results_deepsade.iloc[i, :]['runtime']])
processed_deepsade_results = pd.DataFrame(processed_deepsade_results)
processed_deepsade_results.columns = ['exp num', 'fold', 'test mse', 'test violations', 'runtime']
processed_deepsade_results['test violations'] = 1 - processed_deepsade_results['test violations']/(0.2*X.shape[0])
processed_deepsade_results

Unnamed: 0,exp num,fold,test mse,test violations,runtime
0,1,1,37.805508,1.0,90858.374725
1,1,2,45.888878,1.0,56351.230809
2,1,3,41.398438,1.0,84622.409244
3,1,4,33.252407,1.0,140165.838116
4,1,5,32.5652,1.0,123710.788937
5,2,1,36.304775,1.0,164471.193083
6,2,2,48.61942,1.0,128328.553013
7,2,3,38.017624,1.0,59940.337054
8,2,4,30.562143,1.0,124155.943672
9,2,5,39.932716,1.0,95958.132902


In [5]:
print(np.mean(processed_deepsade_results['test mse']), 
      np.std(processed_deepsade_results['test mse']),
      np.mean(processed_deepsade_results['test violations']), 
      np.std(processed_deepsade_results['test violations']),
     np.mean(processed_deepsade_results['runtime']), 
      np.std(processed_deepsade_results['runtime']))

38.36308504 4.594864396113996 1.0 0.0 102341.94467770576 40198.77885169761
