In [54]:
import numpy as np
import matplotlib.pyplot as plt
import time

from ParallelSolve import gurobi_portfolio, majority_vote

### given budget，price vector (v_i)，mean return vector (mu_i).

objective (minimize conditional value at risk, cvar): \min c  + 1/0.05 E[ (-sum \xi_i * v_i  * x_i - c)_+ ]

s.t. 

\sum x_i * v_i <= budget

\sum \mu_i * v_i * x_i >= budget * 1.5

x_i >= 0 integer

In [67]:
# define a generator for random samples
rng = np.random.default_rng(seed=2024)
def sample_func(rng, distribution_name, **dist_params):
      # generate symmetric pareto distribution with mean = a
     if distribution_name == "sym_pareto":
         a, size = dist_params['a'], dist_params['size']
         samples = rng.pareto(a, size)
         samples_copy = - rng.pareto(a, size)
         return samples + samples_copy + a    
     
     if hasattr(rng, distribution_name):
        dist_func = getattr(rng, distribution_name)
        samples = dist_func(**dist_params)
        return samples + 1 if distribution_name == "pareto" else samples
     else:
        raise ValueError(f"Unsupported distribution: {distribution_name}")


def generate_covariance_matrix(m):
    A = np.random.rand(m, m)  
    cov_matrix = np.dot(A, A.T) 
    return cov_matrix

In [56]:
# function that evaluates the objective (no need to do a second stage gurobi)
def calculate_cvar(sample_xi, p, x, alpha=0.95):
    losses = - np.dot(sample_xi, np.array(x) * np.array(p))
    var_threshold = np.quantile(losses, alpha)
    cvar = np.mean(losses[losses > var_threshold])
    
    return cvar

In [57]:
# SAA and majority vote comparison
def comparison(mu, dist_paras,p, b,B,number_of_iterations,ratio,sample_number, rng):
    name, paras = dist_paras["name"], dist_paras["paras"]
    
    SAA_list = []
    majority_list = []
    for n in sample_number:
        SAA_intermediate = []
        majority_intermediate = []
        for _ in range(number_of_iterations):
            if name == "multivariate_normal":
                sample_n = sample_func(rng, name, size=n, mean=mu, cov=paras)
            elif name == "sym_pareto":
                arrays_list = []
                for i in range(len(mu)):
                    arrays_list.append(sample_func(rng, name, size=n, a=paras[i]))
                sample_n = np.vstack(arrays_list).T
            else: 
                raise ValueError(f"Unsupported distribution: {name}")
            SAA = majority_vote(sample_n, 1, n, gurobi_portfolio, p, mu, b)
            SAA_intermediate.append(SAA)
            majority = majority_vote(sample_n, B, int(n*ratio), gurobi_portfolio, p, mu, b)
            majority_intermediate.append(majority)
            
        SAA_list.append(SAA_intermediate)
        majority_list.append(majority_intermediate)
    return SAA_list, majority_list

def evaluation(SAA_list, majority_list, mu, dist_paras, p, number_of_iterations, sample_number, large_number_sample, rng):
    name, paras = dist_paras["name"], dist_paras["paras"]
    if name == "multivariate_normal":
        large_sample = sample_func(rng, name, size=large_number_sample, mean=mu, cov=paras)
    elif name == "sym_pareto":
        arrays_list = []
        for i in range(len(mu)):
            arrays_list.append(sample_func(rng, name, size=large_number_sample, a=paras[i]))
        large_sample = np.vstack(arrays_list).T
    else:
        raise ValueError(f"Unsupported distribution: {name}")

    SAA_obj_list = []
    majority_obj_list = []
    for i in range(len(sample_number)):
        SAA_obj = 0
        majority_obj = 0
        for j in range(number_of_iterations):
            SAA_obj += calculate_cvar(large_sample, p, SAA_list[i][j])
            majority_obj += calculate_cvar(large_sample, p, majority_list[i][j])  
            
        SAA_obj = SAA_obj/number_of_iterations
        majority_obj = majority_obj/number_of_iterations

        SAA_obj_list.append(SAA_obj)
        majority_obj_list.append(majority_obj)
    return SAA_obj_list, majority_obj_list

def figure_plot(SAA_obj_list, majority_obj_list,sample_number):
    # plot the objective values of SAA and Bagging-SAA
    _, ax = plt.subplots()
    ax.plot(sample_number, SAA_obj_list, marker = 'o', markeredgecolor = 'none', color = 'blue',linestyle = 'solid', linewidth = 2, label = 'SAA')
    ax.plot(sample_number, majority_obj_list, marker = 's', markeredgecolor = 'none', color = 'red',linestyle = 'solid', linewidth = 2, label = 'Majority Vote')
    ax.set_xlabel('Number of samples', size = 20)
    ax.set_ylabel('Objective', size = 20)
    ax.legend(loc = 'lower right')
    plt.show()
    return

In [None]:
# This cell runs a single simulation for the symmetric pareto distribution
m = 8
mu = np.random.uniform(2, 5, m) # which is also the mean of the symmetric pareto distribution
dist_paras_pareto = {"name": "sym_pareto", "paras": mu}

p = np.random.uniform(0, 1, m)
b = np.random.uniform(1, 3)

B = 400
number_of_iterations = 10 # Number of iterations for each sample size (use to take average)
ratio = 0.1
sample_number = np.array([2**i for i in range(5, 16)])
large_number_sample = 1000000

SAA_list_pareto, majority_list_pareto = comparison(mu, dist_paras_pareto, p, b, B, number_of_iterations, ratio, sample_number, rng)
SAA_obj_list_pareto, majority_obj_list_pareto = evaluation(SAA_list_pareto, majority_list_pareto, mu, dist_paras_pareto, p, number_of_iterations, sample_number, large_number_sample,rng)
figure_plot(SAA_obj_list_pareto, majority_obj_list_pareto, sample_number)

In [None]:
# This cell runs a single simulation for the multivariate normal distribution
m = 8
mu = np.random.uniform(2, 5, m) # which is also the mean of the symmetric pareto distribution
cov_matrix = generate_covariance_matrix(m)
dist_paras_normal = {"name": "multivariate_normal", "paras": cov_matrix}

p = np.random.uniform(0, 1, m)
b = np.random.uniform(1, 3)

B = 400
number_of_iterations = 10 # Number of iterations for each sample size (use to take average)
ratio = 0.1
sample_number = np.array([2**i for i in range(5, 16)])
large_number_sample = 1000000

SAA_list_normal, majority_list_normal = comparison(mu, dist_paras_normal, p, b, B, number_of_iterations, ratio, sample_number, rng)
SAA_obj_list_normal, majority_obj_list_normal = evaluation(SAA_list_normal, majority_list_normal, mu, dist_paras_normal, p, number_of_iterations, sample_number, large_number_sample, rng)
figure_plot(SAA_obj_list_normal, majority_obj_list_normal, sample_number)

In [73]:
# script for finding repeatedly run experiments to find good parameters (pareto case)
def find_parameters(m,B,number_of_iterations,ratio,sample_number,large_number_sample, rng):
    top_parameters = []
    for _ in range(30):
        mu = np.random.uniform(2, 5, m)
        dist_paras_pareto = {"name": "sym_pareto", "paras": mu}
        p = np.random.uniform(0, 1, m)
        b = np.random.uniform(1, 4)

        SAA_list, majority_list = comparison(mu, dist_paras_pareto, p, b, B, number_of_iterations, ratio, sample_number, rng)
        SAA_obj_list, majority_obj_list = evaluation(SAA_list, majority_list, mu, dist_paras_pareto, p, number_of_iterations, sample_number, large_number_sample, rng)

        prop = sum((majority_obj_list[i] - SAA_obj_list[i])/majority_obj_list[i] for i in range(len(sample_number)//2,len(sample_number)))
        top_parameters.append((prop, [mu.tolist(), p.tolist(), b], SAA_list, majority_list, SAA_obj_list, majority_obj_list))
        top_parameters.sort(key = lambda x: x[0], reverse = True)
    
    results = []
    for prop, params, SAA_list, majority_list, SAA_obj_list, majority_obj_list in top_parameters:
        result = {
            "parameters": params,
            'SAA_list': SAA_list,
            'majority_list': majority_list,
            'SAA_obj_list': SAA_obj_list,
            'majority_obj_list': majority_obj_list
        }
        results.append(result)
    
    return results

In [74]:
tic = time.time()

m = 6
B = 400
number_of_iterations = 20
ratio = 0.1
sample_number = np.array([2**i for i in range(5, 16)])
large_number_sample = 1000000
results = find_parameters(m, B, number_of_iterations, ratio, sample_number, large_number_sample, rng)

print(f"time = {time.time() - tic} secs")

Set parameter Username
Academic license - for non-commercial use only - expires 2025-03-13
Set parameter Username
Set parameter Username
Academic license - for non-commercial use only - expires 2025-03-13
Academic license - for non-commercial use only - expires 2025-03-13
Set parameter Username
Academic license - for non-commercial use only - expires 2025-03-13
Set parameter Username
Academic license - for non-commercial use only - expires 2025-03-13
Set parameter Username
Academic license - for non-commercial use only - expires 2025-03-13
Set parameter Username
Academic license - for non-commercial use only - expires 2025-03-13
Set parameter Username
Academic license - for non-commercial use only - expires 2025-03-13
Set parameter Username
Academic license - for non-commercial use only - expires 2025-03-13
Set parameter Username
Academic license - for non-commercial use only - expires 2025-03-13
Set parameter Username
Academic license - for non-commercial use only - expires 2025-03-13

In [None]:
for result in results:
    print(f"parameters: {result['parameters']}")
    figure_plot(result['SAA_obj_list'], result['majority_obj_list'], sample_number)
    print("\n")

In [105]:
# one saved parameter from before 
mu, dist_paras_normal, p, b

# (array([4.57825599, 2.59539243, 3.63529675, 3.52406055, 3.24507009,
#         4.72188993, 4.3935796 , 3.42740159]),
#  {'name': 'multivariate_normal',
#   'paras': array([[4.023997  , 2.40310981, 2.20044386, 2.17927984, 3.04517019,
#           3.13852272, 3.0678499 , 2.57802196],
#          [2.40310981, 1.92007872, 1.40783953, 1.35288294, 1.87754314,
#           2.0765766 , 2.12928102, 1.8373703 ],
#          [2.20044386, 1.40783953, 1.90733334, 1.76914087, 1.6352554 ,
#           1.70005219, 2.06525868, 1.8454927 ],
#          [2.17927984, 1.35288294, 1.76914087, 1.88579515, 1.63004875,
#           1.62722018, 1.93102905, 1.83989155],
#          [3.04517019, 1.87754314, 1.6352554 , 1.63004875, 3.2080927 ,
#           2.61255376, 2.67144999, 1.98725729],
#          [3.13852272, 2.0765766 , 1.70005219, 1.62722018, 2.61255376,
#           2.95382297, 2.66526416, 1.99690051],
#          [3.0678499 , 2.12928102, 2.06525868, 1.93102905, 2.67144999,
#           2.66526416, 3.37273352, 2.01893774],
#          [2.57802196, 1.8373703 , 1.8454927 , 1.83989155, 1.98725729,
#           1.99690051, 2.01893774, 2.27869206]])},
#  array([0.00780965, 0.44311801, 0.82218091, 0.64466618, 0.39305453,
#         0.48080274, 0.57539239, 0.49682178]),
#  1.505581876905086)

(array([4.57825599, 2.59539243, 3.63529675, 3.52406055, 3.24507009,
        4.72188993, 4.3935796 , 3.42740159]),
 {'name': 'multivariate_normal',
  'paras': array([[4.023997  , 2.40310981, 2.20044386, 2.17927984, 3.04517019,
          3.13852272, 3.0678499 , 2.57802196],
         [2.40310981, 1.92007872, 1.40783953, 1.35288294, 1.87754314,
          2.0765766 , 2.12928102, 1.8373703 ],
         [2.20044386, 1.40783953, 1.90733334, 1.76914087, 1.6352554 ,
          1.70005219, 2.06525868, 1.8454927 ],
         [2.17927984, 1.35288294, 1.76914087, 1.88579515, 1.63004875,
          1.62722018, 1.93102905, 1.83989155],
         [3.04517019, 1.87754314, 1.6352554 , 1.63004875, 3.2080927 ,
          2.61255376, 2.67144999, 1.98725729],
         [3.13852272, 2.0765766 , 1.70005219, 1.62722018, 2.61255376,
          2.95382297, 2.66526416, 1.99690051],
         [3.0678499 , 2.12928102, 2.06525868, 1.93102905, 2.67144999,
          2.66526416, 3.37273352, 2.01893774],
         [2.57802196, 1.8

In [None]:
# A good configuration under normal distribution
# m = 5
# mu = np.array([2.26833837, 2.99724413, 2.05634484, 2.22041616, 2.27772837])

# cov_matrix = np.array([[1.53992965, 1.45799569, 1.33721954, 1.29951018, 1.27703683],
#  [1.45799569, 1.46349095, 1.28561251, 1.11928718, 1.2290614 ],
#  [1.33721954, 1.28561251, 2.59160798, 1.50136609, 1.95532864],
#  [1.29951018, 1.11928718, 1.50136609, 1.58527353, 1.23659311],
#  [1.27703683, 1.2290614,  1.95532864, 1.23659311, 1.59685207]])

# p = np.array([0.21428391, 0.09577878, 0.82863508, 0.27561237, 0.88738926])
# b = 2.3308601852993753

In [9]:
# code from before
# Testing: SAA and majority vote comparison - parameters setup

sample_number = np.array([2**i for i in range(10, 17)])
number_of_iterations = 10 # Number of iterations for each sample size (use to take average)
m = 3 # Number of items


# Generate samples for evaluation of optimal SAA and majority vote
large_number_sample = 1000000 
a_ls = np.full(m, 2) #np.random.rand(m)+1 # Pareto distribution parameter
arrays_list = []
for i in range(m):
    arrays_list.append(sample_func('pareto', size=large_number_sample, a=a_ls[i]))
large_sample = np.vstack(arrays_list).T

parameter_list = []
SAA_list_test = []
majority_list_test = []
SAA_obj_list_test = []
majority_obj_list_test = []

for num_test in range(10):
    print(num_test)
    c = float(np.random.uniform(3, 4, size=1)[0]) # Cost 
    q = float(np.random.uniform(0, 2, size=1)[0]) # Budget
    r = np.random.uniform(3, 4, size=m) # Rewards
    
    SAA_obj = 0
    majority_obj = 0
    flag = 0
    for n in sample_number:
        SAA_intermediate = []
        majority_intermediate = []
        for j in range(number_of_iterations):
            arrays_list = []
            for i in range(m):
                arrays_list.append(sample_func('pareto', size=n, a=a_ls[i]))
            sample_n = np.vstack(arrays_list).T
            SAA = majority_vote(sample_n, 1, n, gurobi_SSKP, r,c,q)
            SAA_intermediate.append(SAA)
            SAA_obj += optimal_eval(large_sample, SAA, r, c, q)

            majority = majority_vote(sample_n, 300, int(n/10), gurobi_SSKP, r,c,q)
            majority_intermediate.append(majority)
            majority_obj += optimal_eval(large_sample, majority, r, c, q)

        SAA_obj = SAA_obj/number_of_iterations
        majority_obj = majority_obj/number_of_iterations

        if SAA_obj > majority_obj:
            flag = 1
            break

    if flag == 1:
        continue 
    
    parameter_list.append([m, c, q, r])
    SAA_obj_list_test.append(SAA_obj)
    majority_obj_list_test.append(majority_obj)

0
Set parameter Username
Academic license - for non-commercial use only - expires 2025-02-20
Gurobi Optimizer version 11.0.0 build v11.0.0rc2 (mac64[arm] - Darwin 21.3.0 21D62)

CPU model: Apple M1 Pro
Thread count: 10 physical cores, 10 logical processors, using up to 10 threads

Optimize a model with 1024 rows, 1027 columns and 4096 nonzeros
Model fingerprint: 0xe3b60d0c
Variable types: 1024 continuous, 3 integer (3 binary)
Coefficient statistics:
  Matrix range     [1e-04, 1e+02]
  Objective range  [3e-03, 3e+00]
  Bounds range     [1e+00, 1e+00]
  RHS range        [2e+00, 2e+00]
Found heuristic solution: objective -0.0000000
Found heuristic solution: objective 4.6809689
Presolve removed 1024 rows and 1027 columns
Presolve time: 0.00s
Presolve: All rows and columns removed

Explored 0 nodes (0 simplex iterations) in 0.00 seconds (0.00 work units)
Thread count was 1 (of 10 available processors)

Solution count 4: 4.68097 4.68097 4.68097 -0 

Optimal solution found (tolerance 1.00e-04

TypeError: 'NoneType' object is not iterable