# Introduction


**What?** Introduction to Facebook Nevergrad



# Import modules

In [1]:
import nevergrad as ng
import numpy as np

# Getting rid of the warning messages
import warnings
warnings.filterwarnings("ignore")

# Explore the available optimiser

In [2]:
for OPTIMISER in sorted(ng.optimizers.registry.keys()):
    print(OPTIMISER)

ASCMADEthird
AdaptiveDiscreteOnePlusOne
AlmostRotationInvariantDE
AnisotropicAdaptiveDiscreteOnePlusOne
AvgMetaRecenteringNoHull
BO
BOSplit
BayesOptimBO
CM
CMA
CMandAS2
CMandAS3
CauchyLHSSearch
CauchyOnePlusOne
CauchyScrHammersleySearch
ChainCMAPowell
ChainDiagonalCMAPowell
ChainMetaModelPowell
ChainMetaModelSQP
ChainNaiveTBPSACMAPowell
ChainNaiveTBPSAPowell
Cobyla
DE
DiagonalCMA
DiscreteBSOOnePlusOne
DiscreteDoerrOnePlusOne
DiscreteLenglerOnePlusOne
DiscreteOnePlusOne
DoubleFastGADiscreteOnePlusOne
EDA
ES
FCMA
GeneticDE
HaltonSearch
HaltonSearchPlusMiddlePoint
HammersleySearch
HammersleySearchPlusMiddlePoint
HullAvgMetaRecentering
HullAvgMetaTuneRecentering
LHSSearch
LargeHaltonSearch
LhsDE
MetaModel
MetaRecentering
MetaTuneRecentering
MixES
MultiCMA
MultiDiscrete
MultiScaleCMA
MutDE
NGO
NGOpt
NGOpt10
NGOpt12
NGOpt13
NGOpt14
NGOpt4
NGOpt8
NGOptBase
NaiveIsoEMNA
NaiveTBPSA
NelderMead
NoisyBandit
NoisyDE
NoisyDiscreteOnePlusOne
NoisyOnePlusOne
NonNSGAIIES
ORandomSearch
OScrHammersleySea

# Your first unconstrained optimisation

In [3]:
def square(x):
    return sum((x - 0.5) ** 2)

# optimization on x as an array of shape (2,)
optimizer = ng.optimizers.NGOpt(parametrization = 2, budget = 100)
recommendation = optimizer.minimize(square)  # best value
print(recommendation.value)
# >>> [0.49999998 0.50000004]

[0.5        0.49999995]


In [4]:
dir(recommendation)

['_LAYER_LEVEL',
 '__add__',
 '__annotations__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__mod__',
 '__module__',
 '__mul__',
 '__ne__',
 '__neg__',
 '__new__',
 '__pow__',
 '__radd__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__rmul__',
 '__rpow__',
 '__rtruediv__',
 '__setattr__',
 '__sizeof__',
 '__slotnames__',
 '__str__',
 '__sub__',
 '__subclasshook__',
 '__truediv__',
 '__weakref__',
 '_call_deeper',
 '_check_frozen',
 '_constraint_checkers',
 '_dimension',
 '_frozen',
 '_generation',
 '_get_name',
 '_internal_get_standardized_data',
 '_internal_set_standardized_data',
 '_layer_index',
 '_layered_del_value',
 '_layered_get_value',
 '_layered_mutate',
 '_layered_recombine',
 '_layered_sample',
 '_layered_set_value',
 '_layers',
 '_losses',
 '_meta',
 '_name',
 '_new_with_data_layer',
 '_on_layer_added',
 '_r

# Your first constrained optimisation


- “Cheap” means that we do not try to reduce the number of calls to such constraints. 
- We basically repeat mutations until we get a satisfiable point.
- Let us say that we want to minimize:
>((x[0]-.5)^2 + (x[1]-.5)^2)
- under the constraint 
>(x[0] >= 1)



In [5]:
def square(x):
    return sum((x - 0.5) ** 2)

optimizer = ng.optimizers.NGOpt(parametrization = 2, budget = 100)
# define a constraint on first variable of x:
optimizer.parametrization.register_cheap_constraint(lambda x: x[0] >= 1)

recommendation = optimizer.minimize(square, verbosity = 2)
print("Final value:", recommendation.value)
# >>> [1.00037625, 0.50683314]

Launching 1 jobs with new suggestions
Updating fitness with value 5.585201021332205
99 remaining budget and 0 running jobs
Current pessimistic best is: MultiValue<mean: 5.585201021332205, count: 1, parameter: Array{(2,)}:[ 1.80181506 -1.47242961]>
Launching 1 jobs with new suggestions
Updating fitness with value 4.357677657530252
98 remaining budget and 0 running jobs
Current pessimistic best is: MultiValue<mean: 4.357677657530252, count: 1, parameter: Array{(2,)}:[ 1.46437718 -1.35139254]>
Launching 1 jobs with new suggestions
Updating fitness with value 0.4136244411939989
97 remaining budget and 0 running jobs
Current pessimistic best is: MultiValue<mean: 0.4136244411939989, count: 1, parameter: Array{(2,)}:[1.00832898 0.10601257]>
Launching 1 jobs with new suggestions
Updating fitness with value 3.2300195275554304
96 remaining budget and 0 running jobs
Current pessimistic best is: MultiValue<mean: 0.4136244411939989, count: 1, parameter: Array{(2,)}:[1.00832898 0.10601257]>
Launchin

Updating fitness with value 0.6904816189512507
60 remaining budget and 0 running jobs
Current pessimistic best is: MultiValue<mean: 0.2577173466897536, count: 1, parameter: Array{(2,)}:[1.00282619 0.56987966]>
Launching 1 jobs with new suggestions
Updating fitness with value 0.5898897900789983
59 remaining budget and 0 running jobs
Current pessimistic best is: MultiValue<mean: 0.2577173466897536, count: 1, parameter: Array{(2,)}:[1.00282619 0.56987966]>
Launching 1 jobs with new suggestions
Updating fitness with value 0.30266547453971976
58 remaining budget and 0 running jobs
Current pessimistic best is: MultiValue<mean: 0.2577173466897536, count: 1, parameter: Array{(2,)}:[1.00282619 0.56987966]>
Launching 1 jobs with new suggestions
Updating fitness with value 0.5405196033297542
57 remaining budget and 0 running jobs
Current pessimistic best is: MultiValue<mean: 0.2577173466897536, count: 1, parameter: Array{(2,)}:[1.00282619 0.56987966]>
Launching 1 jobs with new suggestions
Updatin

Updating fitness with value 0.264945657994561
21 remaining budget and 0 running jobs
Current pessimistic best is: MultiValue<mean: 0.2565290764041153, count: 1, parameter: Array{(2,)}:[1.00216277 0.56604265]>
Launching 1 jobs with new suggestions
Updating fitness with value 0.2757214399533214
20 remaining budget and 0 running jobs
Current pessimistic best is: MultiValue<mean: 0.2565290764041153, count: 1, parameter: Array{(2,)}:[1.00216277 0.56604265]>
Launching 1 jobs with new suggestions
Updating fitness with value 0.25646995535425476
19 remaining budget and 0 running jobs
Current pessimistic best is: MultiValue<mean: 0.25646995535425476, count: 1, parameter: Array{(2,)}:[1.00601568 0.52044709]>
Launching 1 jobs with new suggestions
Updating fitness with value 0.2915118149015592
18 remaining budget and 0 running jobs
Current pessimistic best is: MultiValue<mean: 0.25646995535425476, count: 1, parameter: Array{(2,)}:[1.00601568 0.52044709]>
Launching 1 jobs with new suggestions
Updati

# Multi-objective optimisation


- According to the official documentation this option is not stable, not robust, not scalable and not optimal.
- It is unclear to me if this is still the case and further investigation will need to be carried out.



In [6]:
def multiobjective(x):
    return [np.sum(x ** 2), np.sum((x - 1) ** 2)]

print("Example: ", multiobjective(np.array([1.0, 2.0, 0])))
# >>> Example: [5.0, 2.0]

optimizer = ng.optimizers.CMA(parametrization=3, budget=100)

# for all but DE optimizers, deriving a volume out of the losses,
# it's not strictly necessary but highly advised to provide an
# upper bound reference for the losses (if not provided, such upper
# bound is automatically inferred with the first few "tell")
optimizer.tell(ng.p.MultiobjectiveReference(), [5, 5])
# note that you can provide a Parameter to MultiobjectiveReference,
# which will be passed to the optimizer

optimizer.minimize(multiobjective, verbosity = 0)

# The function embeds its Pareto-front:
print("Pareto front:")
for param in sorted(optimizer.pareto_front(), key=lambda p: p.losses[0]):
    print(f"{param} with losses {param.losses}")

# >>> Array{(3,)}:[0. 0. 0.] with loss [0. 3.]
#     Array{(3,)}:[0.39480968 0.98105712 0.55785803] with loss [1.42955333 0.56210368]
#     Array{(3,)}:[1.09901515 0.97673712 0.97153943] with loss [3.10573857 0.01115516]

# It can also provide subsets:
print("Random subset:", optimizer.pareto_front(2, subset="random"))
print("Loss-covering subset:", optimizer.pareto_front(2, subset="loss-covering"))
print("Domain-covering subset:", optimizer.pareto_front(2, subset="domain-covering"))
print("EPS subset:", optimizer.pareto_front(2, subset="EPS"))

Example:  [5.0, 2.0]
Pareto front:
Array{(3,)}:[0.03229814 0.05926466 0.12404622] with losses [0.01994293 2.58872491]
Array{(3,)}:[0.13930313 0.09500046 0.01775613] with losses [0.02874573 2.5246263 ]
Array{(3,)}:[0.23857447 0.09909974 0.07771459] with losses [0.07277809 2.2420005 ]
Array{(3,)}:[0.04546177 0.12369125 0.25323448] with losses [0.081494 2.236719]
Array{(3,)}:[0.05429401 0.25665873 0.21388397] with losses [0.1145679  2.06489447]
Array{(3,)}:[0.24503426 0.31927256 0.0260395 ] with losses [0.16265481 1.98196218]
Array{(3,)}:[0.33750681 0.24970508 0.19711778] with losses [0.2151189  1.64645954]
Array{(3,)}:[0.15928702 0.47201215 0.21161361] with losses [0.29294814 1.6071226 ]
Array{(3,)}:[0.67776098 0.37213942 0.12104515] with losses [0.61249962 1.27060851]
Array{(3,)}:[0.43094016 0.34455746 0.60361598] with losses [0.66878151 0.91055431]
Array{(3,)}:[0.63523424 0.59251202 0.23971277] with losses [0.81205524 0.87713719]
Array{(3,)}:[0.9888735  0.67214548 0.1836611 ] with loss

# Bounded continous variables and discrete variables

In [7]:
def fake_training(learning_rate: float, batch_size: int, architecture: str) -> float:
    # optimal for learning_rate=0.2, batch_size=4, architecture="conv"
    return (learning_rate - 0.2)**2 + (batch_size - 4)**2 + (0 if architecture == "conv" else 10)

# Instrumentation class is used for functions with multiple inputs
# (positional and/or keywords)
parametrization = ng.p.Instrumentation(
    # a log-distributed scalar between 0.001 and 1.0
    learning_rate=ng.p.Log(lower=0.001, upper=1.0),
    # an integer from 1 to 12
    batch_size=ng.p.Scalar(lower=1, upper=12).set_integer_casting(),
    # either "conv" or "fc"
    architecture=ng.p.Choice(["conv", "fc"])
)

optimizer = ng.optimizers.NGO(parametrization=parametrization, budget=100)
recommendation = optimizer.minimize(fake_training)

# show the recommended keyword arguments of the function
print(recommendation.kwargs)

{'learning_rate': 0.16074961067847227, 'batch_size': 4, 'architecture': 'conv'}


# References


- https://facebookresearch.github.io/nevergrad/optimization.html 
- https://reposhub.com/python/deep-learning/facebookresearch-nevergrad.html    



# Conclusions


- Still very little documentation online.
- It seems to have an edge for truly gradient-free multi-objective problems.

