In [1]:
!git clone https://github.com/michaelzcjia/smart_predict_optimize.git

Cloning into 'smart_predict_optimize'...
remote: Enumerating objects: 79, done.[K
remote: Counting objects: 100% (79/79), done.[K
remote: Compressing objects: 100% (57/57), done.[K
remote: Total 79 (delta 31), reused 65 (delta 18), pack-reused 0[K
Unpacking objects: 100% (79/79), done.


In [2]:
from smart_predict_optimize.HelperFunctions import *
import time
import itertools
import matplotlib.pyplot as plt
import pandas as pd
import pickle


In [3]:
def problem_size_experiment(params, noise, degree,sigma, iterations=30):
    
    ''' 
    Runs the direct and SGD solvers with given input paraemters
    
    input: 
        dict{str:list} params: dictionary of parameter values to experiment with. Must specify 'n', 'p', and 'grid_size'
        float noise: multiplicative noise term applied to cost vector, sampled from uniform distribution in [1-noise, 1+noise]
        int degree: polynomial degree of generated cost vector. When degree=1, expected value of c is linear in x. Degree > 1 controls the amount of model misspecification.

    returns: dict{str:list} with experimental results including: runtime, SPO loss, and SPO plus loss for both direct and SGD solvers
    ''' 
    
    # Variable definitions
    experimental_results = {}
      
    # For each parameter combo solve the problem instance and record results
    for n, p, grid_dim in itertools.product(params['n'], params['p'], params['grid_dim']):

        # create sigma of length p
        sigma_arr = np.full(p,sigma)
        
        direct_runtimeparams = []
        SGD_runtimeparams = []
        
        SPO_loss_directparams = []
        SPO_loss_SGDparams = []
        
        SPO_plus_loss_directparams= []
        SPO_plus_loss_SGDparams = []
        # Create shortest path contraints
        A,b = CreateShortestPathConstraints(grid_dim)
        for i in range(iterations):
          print(n,p,grid_dim,i)
          # Generate the dataset
          X, C = generate_data(n, p, grid_dim, sigma_arr, noise, degree)

          #print('for n =', n, 'p = ', p, 'grid_dim = ',grid_dim)
          # Run the direct solution and record the time
          start_direct = time.time()
          B_direct=DirectSolution(A,b, X, C)
          end_direct = time.time() - start_direct
          direct_runtimeparams.append(end_direct)

          # Run the SGD solution and record the time
          start_sgd = time.time()
          B_SGD=GradientDescentSolution(A,b, X, C, batch_size=10,epsilon = 0.001) 
          end_sgd = time.time() - start_sgd
          SGD_runtimeparams.append(end_sgd)

          # Record losses
          solver = ShortestPathSolver(A,b)
          SPO_loss_directparams.append(SPOLoss(solver, X, C, B_direct))
          SPO_loss_SGDparams.append(SPOLoss(solver, X, C, B_SGD))
          SPO_plus_loss_directparams.append(SPOplusLoss(solver, X, C, B_direct))
          SPO_plus_loss_SGDparams.append(SPOplusLoss(solver, X, C, B_SGD))

        #store results from all iterations in dicts
        experimental_results[(n, p, grid_dim,'direct_runtime')] = direct_runtimeparams
        experimental_results[(n, p, grid_dim,'SGD_runtime')] = SGD_runtimeparams
        
        experimental_results[(n, p, grid_dim,'SPO_loss_direct')] = SPO_loss_directparams
        experimental_results[(n, p, grid_dim,'SPO_loss_SGD')] = SPO_loss_SGDparams
        
        experimental_results[(n, p, grid_dim,'SPO_plus_loss_direct')]= SPO_plus_loss_directparams
        experimental_results[(n, p, grid_dim,'SPO_plus_loss_SGD')] = SPO_plus_loss_SGDparams
    experimental_results = pd.DataFrame(experimental_results).transpose()
    experimental_results.index.names = ['n','p','grid_dim','metric']
    return experimental_results

In [5]:
params = {"n": [100,200,300,400,500,600,700,800,900,1000], "p": [5], "grid_dim": [5]}
noise = 0.25
degree = 3
sigma=0.2

experiment1 = problem_size_experiment(params, noise, degree,sigma,iterations=30)

50 5 5 0
Converged after 119 steps
50 5 5 1
Converged after 84 steps
50 5 5 2
Converged after 125 steps
50 5 5 3
Converged after 92 steps
50 5 5 4
Converged after 108 steps
50 5 5 5
Converged after 129 steps
50 5 5 6
Converged after 98 steps
50 5 5 7
Converged after 112 steps
50 5 5 8
Converged after 112 steps
50 5 5 9
Converged after 94 steps
50 5 5 10
Converged after 100 steps
50 5 5 11
Converged after 79 steps
50 5 5 12
Converged after 108 steps
50 5 5 13
Converged after 131 steps
50 5 5 14
Converged after 112 steps
50 5 5 15
Converged after 146 steps
50 5 5 16
Converged after 140 steps
50 5 5 17
Converged after 106 steps
50 5 5 18
Converged after 102 steps
50 5 5 19
Converged after 110 steps
50 5 5 20
Converged after 130 steps
50 5 5 21
Converged after 135 steps
50 5 5 22
Converged after 115 steps
50 5 5 23
Converged after 124 steps
50 5 5 24
Converged after 106 steps
50 5 5 25
Converged after 132 steps
50 5 5 26
Converged after 118 steps
50 5 5 27
Converged after 125 steps
50 5 5 

In [5]:
from google.colab import drive
drive.mount('/content/gdrive')
pickle.dump(experiment1,open('gdrive/MyDrive/SPOresults.pkl','wb'))

Mounted at /content/gdrive
