## Import Packages 

In [2]:
from spo.data import trivialsurgery, dataset
from spo.model import trivialSurgeryModel
from spo.twostage import sklearnPred
from spo.eval import calUnambSPO

In [3]:
import time
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from tqdm import tqdm

## Build Optimizer

In [4]:
# model for shortest path
k = 10
num_surgeries = 15
sp_model = trivialSurgeryModel(k, num_surgeries)

Academic license - for non-commercial use only - expires 2021-08-04
Using license file /Users/jmosseri/gurobi.lic


## Build Predictor

In [5]:
# linear regression
lr = LinearRegression()
lr_twostage = sklearnPred(lr, sp_model)

In [6]:
# random forest
rf = RandomForestRegressor(random_state=135)
rf_twostage = sklearnPred(rf, sp_model)

## Experiments

### Training Set Size = 100, Noise Half−width = 0 

In [7]:
# set up
n = 100 # number of data
p = 5 # size of feature
e = 0 # noise half−width
degs = [1, 2, 4, 6] # list of param deg

In [8]:
# init DataFrame
df1_lr = pd.DataFrame(columns = degs)
df1_rf = pd.DataFrame(columns = degs)

In [9]:
for i in range(8):
    row = {}
    for deg in degs:
        # generate data
        x, c = trivialsurgery.genData(n+1000, p, num_surgeries, deg=deg, noise_width=e, seed=i)
        # data split
        x_train, x_test, c_train, c_test = train_test_split(x, c, test_size=1000, random_state=i)
        # build data set
        sp_dataset_train = dataset.optDataset(sp_model, x_train, c_train)
        sp_dataset_test = dataset.optDataset(sp_model, x_test, c_test)
        # training
        lr_twostage.fit(sp_dataset_train.x, sp_dataset_train.c)
        # prediction
        c_test_pred = lr_twostage.predict(sp_dataset_test.x)
        # eval
        loss = 0
        print('Evaluate....')
        time.sleep(1)
        for j in tqdm(range(1000)):
            c_pred_i = c_test_pred[j]
            c_true_i = sp_dataset_test.c[j]
            z_true_i = sp_dataset_test.z[j,0]
            loss += calUnambSPO(sp_model, c_pred_i, c_true_i, z_true_i)
        loss /= abs(sp_dataset_test.z).sum()
        time.sleep(1)
        print('Normalized SPO Loss: {:.2f}%'.format(loss * 100))
        row[deg] = loss
        print()
    df1_lr = df1_lr.append(row, ignore_index = True)

Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 1238.68it/s]


Optimizing for optDataset...


100%|██████████| 1000/1000 [00:00<00:00, 1586.80it/s]


Evaluate....


100%|██████████| 1000/1000 [00:02<00:00, 455.38it/s]


Normalized SPO Loss: 0.00%

Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 1446.23it/s]


Optimizing for optDataset...


100%|██████████| 1000/1000 [00:00<00:00, 1620.20it/s]


Evaluate....


100%|██████████| 1000/1000 [00:02<00:00, 487.15it/s]


Normalized SPO Loss: 0.05%

Optimizing for optDataset...


100%|██████████| 100/100 [00:00<00:00, 1441.00it/s]


Optimizing for optDataset...


100%|██████████| 1000/1000 [00:00<00:00, 1674.28it/s]


Evaluate....


100%|██████████| 1000/1000 [00:02<00:00, 437.16it/s]


KeyboardInterrupt: 

In [None]:
# draw boxplot
plt.figure(figsize=(12,6))
plt.boxplot(df1_lr, boxprops=dict(facecolor='g', color='k'), medianprops=dict(color='k'), patch_artist=True)
plt.xlabel('Deg', fontsize=16)
plt.xticks(ticks=[1,2,3,4], labels=[1,2,4,6], fontsize=12)
plt.ylabel('Normalized SPO Loss', fontsize=16)
plt.yticks(fontsize=12)
plt.ylim(0, 0.5)
plt.title('Training Set Size = 100,\nNoise Half−width = 0')
plt.show()

In [None]:
for i in range(8):
    row = {}
    for deg in degs:
        # generate data
        x, c = trivialsurgery.genData(n+1000, p, num_surgeries, deg=deg, noise_width=e, seed=i)
        # data split
        x_train, x_test, c_train, c_test = train_test_split(x, c, test_size=1000, random_state=i)
        # build data set
        sp_dataset_train = dataset.optDataset(sp_model, x_train, c_train)
        sp_dataset_test = dataset.optDataset(sp_model, x_test, c_test)
        # training
        rf_twostage.fit(sp_dataset_train.x, sp_dataset_train.c)
        # prediction
        c_test_pred = rf_twostage.predict(sp_dataset_test.x)
        # eval
        loss = 0
        print('Evaluate....')
        time.sleep(1)
        for j in tqdm(range(1000)):
            c_pred_i = c_test_pred[j]
            c_true_i = sp_dataset_test.c[j]
            z_true_i = sp_dataset_test.z[j,0]
            loss += calUnambSPO(sp_model, c_pred_i, c_true_i, z_true_i)
        loss /= abs(sp_dataset_test.z).sum()
        time.sleep(1)
        print('Normalized SPO Loss: {:.2f}%'.format(loss * 100))
        row[deg] = loss
        print()
    df1_rf = df1_rf.append(row, ignore_index = True)

In [None]:
# draw boxplot
plt.figure(figsize=(12,6))
plt.boxplot(df1_lr, boxprops=dict(facecolor='r', color='k'), medianprops=dict(color='k'), patch_artist=True)
plt.xlabel('Deg', fontsize=16)
plt.xticks(ticks=[1,2,3,4], labels=[1,2,4,6], fontsize=12)
plt.ylabel('Normalized SPO Loss', fontsize=16)
plt.yticks(fontsize=12)
plt.ylim(0, 0.5)
plt.title('Training Set Size = 100,\nNoise Half−width = 0')
plt.show()

### Training Set Size = 100, Noise Half−width = 0.5 

In [None]:
# set up
n = 100 # number of data
p = 5 # size of feature
e = 0.5 # noise half−width
degs = [1, 2, 4, 6] # list of param deg

In [None]:
# init DataFrame
df2_lr = pd.DataFrame(columns = degs)
df2_rf = pd.DataFrame(columns = degs)

In [None]:
for i in range(8):
    row = {}
    for deg in degs:
        # generate data
        x, c = trivialsurgery.genData(n+1000, p, num_surgeries, deg=deg, noise_width=e, seed=i)
        # data split
        x_train, x_test, c_train, c_test = train_test_split(x, c, test_size=1000, random_state=i)
        # build data set
        sp_dataset_train = dataset.optDataset(sp_model, x_train, c_train)
        sp_dataset_test = dataset.optDataset(sp_model, x_test, c_test)
        # training
        lr_twostage.fit(sp_dataset_train.x, sp_dataset_train.c)
        # prediction
        c_test_pred = lr_twostage.predict(sp_dataset_test.x)
        # eval
        loss = 0
        print('Evaluate....')
        time.sleep(1)
        for j in tqdm(range(1000)):
            c_pred_i = c_test_pred[j]
            c_true_i = sp_dataset_test.c[j]
            z_true_i = sp_dataset_test.z[j,0]
            loss += calUnambSPO(sp_model, c_pred_i, c_true_i, z_true_i)
        loss /= abs(sp_dataset_test.z).sum()
        time.sleep(1)
        print('Normalized SPO Loss: {:.2f}%'.format(loss * 100))
        row[deg] = loss
        print()
    df2_lr = df2_lr.append(row, ignore_index = True)

In [None]:
# draw boxplot
plt.figure(figsize=(12,6))
plt.boxplot(df2_lr, boxprops=dict(facecolor='g', color='k'), medianprops=dict(color='k'), patch_artist=True)
plt.xlabel('Deg', fontsize=16)
plt.xticks(ticks=[1,2,3,4], labels=[1,2,4,6], fontsize=12)
plt.ylabel('Normalized SPO Loss', fontsize=16)
plt.yticks(fontsize=12)
plt.ylim(0, 0.5)
plt.title('Training Set Size = 100,\nNoise Half−width = 0.5')
plt.show()

In [None]:
for i in range(8):
    row = {}
    for deg in degs:
        # generate data
        x, c = trivialsurgery.genData(n+1000, p, num_surgeries, deg=deg, noise_width=e, seed=i)
        # data split
        x_train, x_test, c_train, c_test = train_test_split(x, c, test_size=1000, random_state=i)
        # build data set
        sp_dataset_train = dataset.optDataset(sp_model, x_train, c_train)
        sp_dataset_test = dataset.optDataset(sp_model, x_test, c_test)
        # training
        rf_twostage.fit(sp_dataset_train.x, sp_dataset_train.c)
        # prediction
        c_test_pred = rf_twostage.predict(sp_dataset_test.x)
        # eval
        loss = 0
        print('Evaluate....')
        time.sleep(1)
        for j in tqdm(range(1000)):
            c_pred_i = c_test_pred[j]
            c_true_i = sp_dataset_test.c[j]
            z_true_i = sp_dataset_test.z[j,0]
            loss += calUnambSPO(sp_model, c_pred_i, c_true_i, z_true_i)
        loss /= abs(sp_dataset_test.z).sum()
        time.sleep(1)
        print('Normalized SPO Loss: {:.2f}%'.format(loss * 100))
        row[deg] = loss
        print()
    df2_rf = df2_rf.append(row, ignore_index = True)

In [None]:
# draw boxplot
plt.figure(figsize=(12,6))
plt.boxplot(df2_rf, boxprops=dict(facecolor='r', color='k'), medianprops=dict(color='k'), patch_artist=True)
plt.xlabel('Deg', fontsize=16)
plt.xticks(ticks=[1,2,3,4], labels=[1,2,4,6], fontsize=12)
plt.ylabel('Normalized SPO Loss', fontsize=16)
plt.yticks(fontsize=12)
plt.ylim(0, 0.5)
plt.title('Training Set Size = 100,\nNoise Half−width = 0.5')
plt.show()

### Training Set Size = 1000, Noise Half−width = 0

In [None]:
# set up
n = 1000 # number of data
p = 5 # size of feature
e = 0 # noise half−width
degs = [1, 2, 4, 6] # list of param deg

In [None]:
# init DataFrame
df3_lr = pd.DataFrame(columns = degs)
df3_rf = pd.DataFrame(columns = degs)

In [None]:
for i in range(8):
    row = {}
    for deg in degs:
        # generate data
        x, c = trivialsurgery.genData(n+1000, p, num_surgeries, deg=deg, noise_width=e, seed=i)
        # data split
        x_train, x_test, c_train, c_test = train_test_split(x, c, test_size=1000, random_state=i)
        # build data set
        sp_dataset_train = dataset.optDataset(sp_model, x_train, c_train)
        sp_dataset_test = dataset.optDataset(sp_model, x_test, c_test)
        # training
        lr_twostage.fit(sp_dataset_train.x, sp_dataset_train.c)
        # prediction
        c_test_pred = lr_twostage.predict(sp_dataset_test.x)
        # eval
        loss = 0
        print('Evaluate....')
        time.sleep(1)
        for j in tqdm(range(1000)):
            c_pred_i = c_test_pred[j]
            c_true_i = sp_dataset_test.c[j]
            z_true_i = sp_dataset_test.z[j,0]
            loss += calUnambSPO(sp_model, c_pred_i, c_true_i, z_true_i)
        loss /= abs(sp_dataset_test.z).sum()
        time.sleep(1)
        print('Normalized SPO Loss: {:.2f}%'.format(loss * 100))
        row[deg] = loss
        print()
    df3_lr = df3_lr.append(row, ignore_index = True)

In [None]:
# draw boxplot
plt.figure(figsize=(12,6))
plt.boxplot(df3_lr, boxprops=dict(facecolor='g', color='k'), medianprops=dict(color='k'), patch_artist=True)
plt.xlabel('Deg', fontsize=16)
plt.xticks(ticks=[1,2,3,4], labels=[1,2,4,6], fontsize=12)
plt.ylabel('Normalized SPO Loss', fontsize=16)
plt.yticks(fontsize=12)
plt.ylim(0, 0.5)
plt.title('Training Set Size = 1000,\nNoise Half−width = 0')
plt.show()

In [None]:
for i in range(8):
    row = {}
    for deg in degs:
        # generate data
        x, c = trivialsurgery.genData(n+1000, p, num_surgeries, deg=deg, noise_width=e, seed=i)
        # data split
        x_train, x_test, c_train, c_test = train_test_split(x, c, test_size=1000, random_state=i)
        # build data set
        sp_dataset_train = dataset.optDataset(sp_model, x_train, c_train)
        sp_dataset_test = dataset.optDataset(sp_model, x_test, c_test)
        # training
        rf_twostage.fit(sp_dataset_train.x, sp_dataset_train.c)
        # prediction
        c_test_pred = rf_twostage.predict(sp_dataset_test.x)
        # eval
        loss = 0
        print('Evaluate....')
        time.sleep(1)
        for j in tqdm(range(1000)):
            c_pred_i = c_test_pred[j]
            c_true_i = sp_dataset_test.c[j]
            z_true_i = sp_dataset_test.z[j,0]
            loss += calUnambSPO(sp_model, c_pred_i, c_true_i, z_true_i)
        loss /= abs(sp_dataset_test.z).sum()
        time.sleep(1)
        print('Normalized SPO Loss: {:.2f}%'.format(loss * 100))
        row[deg] = loss
        print()
    df3_rf = df3_rf.append(row, ignore_index = True)

In [None]:
# draw boxplot
plt.figure(figsize=(12,6))
plt.boxplot(df3_rf, boxprops=dict(facecolor='r', color='k'), medianprops=dict(color='k'), patch_artist=True)
plt.xlabel('Deg', fontsize=16)
plt.xticks(ticks=[1,2,3,4], labels=[1,2,4,6], fontsize=12)
plt.ylabel('Normalized SPO Loss', fontsize=16)
plt.yticks(fontsize=12)
plt.ylim(0, 0.5)
plt.title('Training Set Size = 1000,\nNoise Half−width = 0')
plt.show()

### Training Set Size = 1000, Noise Half−width = 0.5

In [None]:
# set up
n = 1000 # number of data
p = 5 # size of feature
e = 0.5 # noise half−width
degs = [1, 2, 4, 6] # list of param deg

In [None]:
# init DataFrame
df4_lr = pd.DataFrame(columns = degs)
df4_rf = pd.DataFrame(columns = degs)

In [None]:
for i in range(8):
    row = {}
    for deg in degs:
        # generate data
        x, c = trivialsurgery.genData(n+1000, p, num_surgeries, deg=deg, noise_width=e, seed=i)
        # data split
        x_train, x_test, c_train, c_test = train_test_split(x, c, test_size=1000, random_state=i)
        # build data set
        sp_dataset_train = dataset.optDataset(sp_model, x_train, c_train)
        sp_dataset_test = dataset.optDataset(sp_model, x_test, c_test)
        # training
        lr_twostage.fit(sp_dataset_train.x, sp_dataset_train.c)
        # prediction
        c_test_pred = lr_twostage.predict(sp_dataset_test.x)
        # eval
        loss = 0
        print('Evaluate....')
        time.sleep(1)
        for j in tqdm(range(1000)):
            c_pred_i = c_test_pred[j]
            c_true_i = sp_dataset_test.c[j]
            z_true_i = sp_dataset_test.z[j,0]
            loss += calUnambSPO(sp_model, c_pred_i, c_true_i, z_true_i)
        loss /= abs(sp_dataset_test.z).sum()
        time.sleep(1)
        print('Normalized SPO Loss: {:.2f}%'.format(loss * 100))
        row[deg] = loss
        print()
    df4_lr = df4_lr.append(row, ignore_index = True)

In [None]:
# draw boxplot
plt.figure(figsize=(12,6))
plt.boxplot(df4_lr, boxprops=dict(facecolor='g', color='k'), medianprops=dict(color='k'), patch_artist=True)
plt.xlabel('Deg', fontsize=16)
plt.xticks(ticks=[1,2,3,4], labels=[1,2,4,6], fontsize=12)
plt.ylabel('Normalized SPO Loss', fontsize=16)
plt.yticks(fontsize=12)
plt.ylim(0, 0.5)
plt.title('Training Set Size = 1000,\nNoise Half−width = 0.5')
plt.show()

In [None]:
for i in range(8):
    row = {}
    for deg in degs:
        # generate data
        x, c = trivialsurgery.genData(n+1000, p, num_surgeries, deg=deg, noise_width=e, seed=i)
        # data split
        x_train, x_test, c_train, c_test = train_test_split(x, c, test_size=1000, random_state=i)
        # build data set
        sp_dataset_train = dataset.optDataset(sp_model, x_train, c_train)
        sp_dataset_test = dataset.optDataset(sp_model, x_test, c_test)
        # training
        rf_twostage.fit(sp_dataset_train.x, sp_dataset_train.c)
        # prediction
        c_test_pred = rf_twostage.predict(sp_dataset_test.x)
        # eval
        loss = 0
        print('Evaluate....')
        time.sleep(1)
        for j in tqdm(range(1000)):
            c_pred_i = c_test_pred[j]
            c_true_i = sp_dataset_test.c[j]
            z_true_i = sp_dataset_test.z[j,0]
            loss += calUnambSPO(sp_model, c_pred_i, c_true_i, z_true_i)
        loss /= abs(sp_dataset_test.z).sum()
        time.sleep(1)
        print('Normalized SPO Loss: {:.2f}%'.format(loss * 100))
        row[deg] = loss
        print()
    df4_rf = df4_rf.append(row, ignore_index = True)

In [None]:
# draw boxplot
plt.figure(figsize=(12,6))
plt.boxplot(df4_rf, boxprops=dict(facecolor='r', color='k'), medianprops=dict(color='k'), patch_artist=True)
plt.xlabel('Deg', fontsize=16)
plt.xticks(ticks=[1,2,3,4], labels=[1,2,4,6], fontsize=12)
plt.ylabel('Normalized SPO Loss', fontsize=16)
plt.yticks(fontsize=12)
plt.ylim(0, 0.5)
plt.title('Training Set Size = 1000,\nNoise Half−width = 0.5')
plt.show()

## Save File 

In [None]:
df1_lr.to_csv('./res/ts/lr_n100.csv', index=False)
df1_rf.to_csv('./res/ts/rf_n100.csv', index=False)
df2_lr.to_csv('./res/ts/lr_n100_noise.csv', index=False)
df2_rf.to_csv('./res/ts/rf_n100_noise.csv', index=False)
df3_lr.to_csv('./res/ts/lr_n1000.csv', index=False)
df3_rf.to_csv('./res/ts/rf_n1000.csv', index=False)
df4_lr.to_csv('./res/ts/lr_n1000_noise.csv', index=False)
df4_rf.to_csv('./res/ts/rf_n1000_noise.csv', index=False)