In [1]:
from IPython.display import HTML
HTML("<style>.container { width:100% !important; }</style>")

In [2]:
import numpy as np
from desdeo_emo.EAs import RVEA, NSGAIII
from desdeo_problem.testproblems.TestProblems import test_problem_builder
from desdeo_problem import DataProblem
from desdeo_tools.utilities import fast_non_dominated_sort, hypervolume_indicator

import matplotlib.pyplot as plt
import sklearn
from pyDOE import lhs
import pandas as pd
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ExpSineSquared, WhiteKernel, RationalQuadratic, DotProduct, ConstantKernel, Matern

## Assignment 4

## Task 1 

Implement probability of selection for single objective optimization. * (refer eqn.
12, 13, 14)

In [3]:
#np.set_printoptions(precision=4,suppress=True) # easier to check values rn when we dont care too much about the exactness

In [4]:
# in minimization think probability of being wrong, in max probability of acceptance
# probability of sample A dominating sample B in max is P(A > B) is
def prob_selection(a, b, sigma_a, sigma_b):
    s = sigma_a / sigma_b
    m = (a - b) / sigma_b
    
    top = -2.5*m / np.sqrt(2 + 2*s**2)
    bot = 1 + np.exp(top)
    return 1 / bot

# b clearly dominates
a = np.array([1.,1.,3.])
b = np.array([2.,5.,1.])
sigma_a = 0.01
sigma_b = 0.01

prob_selection(a, b, sigma_a, sigma_b)

array([5.16642063e-055, 7.12457641e-218, 1.00000000e+000])

In [5]:
def prob_sel2(a, b):
    s = 1.
    m = (a - b) / 1.

    bot = 0.8*np.sqrt(2 + 2*s**2)
    return 0.5*(1 + np.tanh(m/bot))
    
    
    
# a1 clearly dominates b1, b1 clearly dominates a2
a = np.array([1.,1.,3.])
b = np.array([2.,5.,1.])


prob_sel2(a, b)

array([0.22270014, 0.00669285, 0.92414182])

So we can see that probability of being wrong with first items (1 and 2) by choosing a is 0.22. Again for second items (1,5) thats 0.007.  And also with 3rd items (3,1) its 0.92. So we can clearly see its working properly.

## Task 2

Implement probability of dominance and ranking. * (refer eqn. 15, 17, 18, 22)

- Generate 50 samples from DTLZ2 problem (3 objectives, 5 decision variables) using LHS sampling
- Build Kriging surrogates using the generated samples
- Resample or generate 20 data points randomly
- Rank the 20 points using probability of dominance ranking

In [6]:
dtlz2_3 = test_problem_builder("DTLZ2", n_of_variables=10, n_of_objectives=3)

In [7]:
def create_samples(dec_dim, samples):
    # create samples 
    x = lhs(dec_dim, samples)     
    return np.asarray(x)

In [8]:
x = create_samples(10, 50)
print(x.shape)
eval_results = dtlz2_3.evaluate(x)
y = eval_results.objectives
print(y[:5])

(50, 10)
[[0.01036054 0.00255694 1.5652018 ]
 [0.62344626 1.2807436  0.46982219]
 [0.01682817 1.52689439 0.39748128]
 [0.26087877 1.02314909 1.24290418]
 [0.90098103 0.79452953 0.81241821]]


In [9]:
print(y[:,0])

[0.01036054 0.62344626 0.01682817 0.26087877 0.90098103 1.62355111
 0.11561917 0.39517427 0.08310203 1.36062989 0.44934034 0.38784565
 0.45693609 0.77361781 1.05700672 1.20362712 0.22638598 0.87220131
 1.83892285 0.00881717 0.69219821 1.06703121 0.07237437 0.797057
 0.45301648 0.02760147 0.31132831 0.10404651 0.09819365 0.76201001
 0.81810897 0.88686998 0.21299806 1.50798702 0.03296605 1.15602488
 0.73294866 1.61554898 0.74984241 0.48151025 1.98142186 0.47598442
 0.90259875 0.83644563 1.16286381 1.15428215 0.54893222 1.25284948
 0.33861398 0.74537751]


In [10]:
## build surrogates

kernel = 1.0 * Matern(length_scale=1.0, length_scale_bounds=(1e-1, 20.0), nu=1.5)
    
y1 = y[:,0]
gpr1 = GaussianProcessRegressor(kernel,n_restarts_optimizer=1,random_state=7).fit(x, y1)

y2 = y[:,1]
gpr2 = GaussianProcessRegressor(kernel,n_restarts_optimizer=1,random_state=7).fit(x, y2)

y3 = y[:,2]
gpr3 = GaussianProcessRegressor(kernel,n_restarts_optimizer=1,random_state=7).fit(x, y3)

In [11]:
## resampling

x_samples = create_samples(10, 20)

# predict
y1_sample, y1_std = gpr1.predict(x_samples, return_std=True) 
print(np.mean(y1_std))

# predict
y2_sample, y2_std = gpr2.predict(x_samples, return_std=True) 
print(np.mean(y2_std))
# predict
y3_sample, y3_std = gpr3.predict(x_samples, return_std=True) 
print(np.mean(y3_std))

y_samples = np.column_stack((y1_sample, y2_sample, y3_sample))
#y_samples.reshape(1,-1)
y_samples.shape

0.1755047725529455
0.20721492660547117
0.1591923856873339


(20, 3)

In [12]:
#print(y_samples)

In [13]:
def sum_of_rank_pos(n):
    return n*(n - 1)/2

sum_of_rank_pos(y_samples.shape[0])

190.0

In [14]:
# Papers example, just to test the code
A = [0.2, 0.9]  # tässä on a1 a2. kun lasketaan prod P (Ai < bi) i to n, tulee P(A < B)
B = [0.3, 0.5]
C = [0.5, 0.3]
D = [0.8, 0.2]
E = [0.85, 0.5]
F = [0.9, 0.95]

sols = np.array([A, B, C, D, E, F])
print(sols.shape)
print(sum_of_rank_pos(sols.shape[0]))

(6, 2)
15.0


In [15]:
simples = np.array([1.,2.,3.,3.,5.,6.,6.5])

In [16]:
sum_of_rank_pos(simples.shape[0])

21.0

In [28]:
def prob_sel(a, b, sigma_a, sigma_b): 
    s = sigma_a / sigma_b
    #s = 1.
    m = (a - b) / sigma_b
    bot = 0.8*np.sqrt(2 + 2*s**2)
    return 0.5*(1 + np.tanh(m/bot))


def smaller(a, b, sigma_a, sigma_b):
    p = 0
    props = prob_sel(a, b, sigma_a, sigma_b)
    p = np.prod(props)
    return p


def bigger(a, b, sigma_a, sigma_b):
    p = 0
    props = np.ones_like(a.shape[0]) - prob_sel(a, b, sigma_a, sigma_b)
    p = np.prod(props)
    return p

def equals(a, b, sigma_a, sigma_b):
    s = smaller(a, b, sigma_a, sigma_b)
    b = bigger(a, b, sigma_a, sigma_b)
    return 1. - s - b

# eq 21: give fitnesses, return rankings 
def eq21(f, sigma_a, sigma_b):
    ri = []
    for j in range(f.shape[0]):
        summa = 0
        for i in range(f.shape[0]):
            summa += smaller(f[j], f[i], sigma_a, sigma_b)
        #summa -= 0.5 # remove from here if using it in eq22
        ri.append(summa)
    return np.asarray(ri)

def eq22(f, sigma_a, sigma_b):
    ri = []
    for j in range(f.shape[0]):
        summa = 0
        for i in range(f.shape[0]):
            summa += equals(f[j], f[i], sigma_a, sigma_b)
        summa *= 0.5
        summa -= 0.5 # remove from here if using it in eq21
        ri.append(summa)
    return np.asarray(ri)

# just calls eq21 and eq22 and sums them
def mop_ranking(f, sigma_a, sigma_b):
    return eq21(f, sigma_a, sigma_b) + eq22(f, sigma_a, sigma_b)

###  Testing with paper example

In [34]:
sigma_a, sigma_b = 0.0001, 0.0001 # no uncertainty

res = mop_ranking(sols, sigma_a, sigma_b)
res

array([2.  , 1.75, 1.5 , 1.5 , 3.25, 5.  ])

Same results as in the paper with 0 uncertainty.

In [35]:
sigma_a, sigma_b = 1., 1. # uncertainty of 1. 

res = mop_ranking(sols, sigma_a, sigma_b)
res

array([2.4563904 , 2.18422086, 2.18354484, 2.36772444, 2.68053434,
       3.12758511])

With sigma = 1, results are quite not like in the paper.. but

In [36]:
sigma_a, sigma_b = 0.1, 0.1 # uncertainty of 0.1. 

res = mop_ranking(sols, sigma_a, sigma_b)
res

array([2.29061645, 1.63406034, 1.43656564, 1.90739326, 3.19884553,
       4.53251878])

with sigma 0.1, it gives pretty much the same results as in the paper.

Assuming it works fine enough to continue, although not sure what is the problem.

## Ranking y_samples

In [23]:
s = []
sigma_a, sigma_b = np.mean(y1_std), np.mean(y2_std) # what about y3_std?
y_samples

res = mop_ranking(y_samples, sigma_a, sigma_b)
res

array([ 9.42993203,  9.07658494,  9.2839752 ,  9.12258372,  9.46986736,
        9.64007296,  9.61675007, 10.46343574,  9.41097698,  9.45228593,
        8.974647  ,  9.14702022,  9.91579433, 10.17114515,  9.56704969,
        9.86783602,  9.53670408,  9.56365501,  9.34873224,  8.94095134])

In [25]:
np.sum(res)

190.0

In [26]:
sum_of_rank_pos(y_samples.shape[0])

190.0