Tout d'abord on crée la fonction

In [20]:
import numpy as np
import torch
torch.set_default_dtype(torch.float64)
from scipy.sparse.linalg import LinearOperator

class Problem() :
    def __init__(self,U,V,mu,nu,nb_messages=None,varepsilon=None,q=None) :
        ## L'ordre est (etat,message,action)
        self.V=V # Utilité du sender de taille (etat,action)
        self.mu=mu # Prior du sender de taille (etat)
        self.U=U # Utilité du reciever de taille (etat,action)
        self.nu=nu # Prior du reciever de taille (etat)
        self.varepsilon=varepsilon # Multiplicateur d'attache à une stratégie a priori (réel)
        self.q=q # Stratégie à priori du receiver de taille (messages,action)
        self.nb_messages=nb_messages
        self.check()
        self.size=self.nb_messages*self.nb_states # size of the input vector f is a function of \mathbb R^n to \mathbb R
        self.shape=(self.nb_states,self.nb_messages)
    def check(self) :
        self.nb_states, self.nb_actions=self.U.shape
        self.debug_shape(self.V,[self.nb_states, self.nb_actions])
        self.debug_shape(self.mu,[self.nb_states,])
        self.debug_shape(self.nu,[self.nb_states,])
        if self.q is not None :
            m,a=self.q.shape
            if self.nb_messages is None :
                self.nb_messages=m
            self.debug_shape(q,[self.nb_messages,self.nb_actions])
        if self.nb_messages is None :
            self.nb_messages=self.nb_actions
        if self.q is None and self.varepsilon is not None :
            self.q=np.ones((self.nb_messages,self.nb_actions))/self.nb_actions
    def debug_shape(self,vect,target_shape) :
        if not list(vect.shape)==target_shape :
            print('Found a vector of size ', vect.shape, 'expected ',target_shape)
            assert False
    def verbose(self,pi) :
        print('We have (state,message,action)=',self.nb_states,self.nb_messages,self.nb_actions)
        self.debug_shape(pi,[self.nb_states,self.nb_messages])
        theta=self.compute_theta(self.compute_g(pi))
        self.debug_shape(theta,[self.nb_messages,self.nb_actions])
        print('We have 1=',pi.sum(axis=1))
        print('We have 1=',theta.sum(axis=1))
        print('theta',theta)
        print('objective',self.objective(pi))
    def compute_g(self,pi):
        denominator=(pi*self.nu[:,None]).sum(axis=0)
        self.debug_shape(denominator,[self.nb_messages])
        g=(pi[:,:,None]*self.nu[:,None,None]*self.U[:,None,:]).sum(axis=0)
        self.debug_shape(g,[self.nb_messages,self.nb_actions])
        return g/denominator[:,None]  
    def compute_theta(self,g):
        assert self.varepsilon is not None
        max_g,_=g.max(axis=1)
        exp=torch.exp((g-max_g[:,None])/self.varepsilon)
        self.debug_shape(exp,[self.nb_messages,self.nb_actions])
        theta=self.q*exp
        denom=theta.sum(axis=1)
        return theta/denom[:,None]
    def objective(self,pi):
        self.debug_shape(pi,[self.nb_states,self.nb_messages])
        g = self.compute_g(pi) # Calculer g en utilisant pi, U, et nu
        theta = self.compute_theta(g) # Calculer theta en utilisant g, q, et epsilon
        return (theta[None,:,:]*pi[:,:,None]*self.mu[:,None,None]*self.V[:,None,:]).sum()
    def project(self,x):
        pi=torch.from_numpy(x)
        pi_projected=torch.zeros_like(pi)
        for i in range(pi.shape[0]):
            row = pi[i, :]
            sorted_row, _ = torch.sort(row, descending=True)
            cumulative_sum = torch.cumsum(sorted_row, dim=0)
            # Calcul de rho
            rho = torch.nonzero(sorted_row * torch.arange(1, len(row) + 1) > (cumulative_sum - 1), as_tuple=False).max()
            # Calcul du seuil theta
            theta = (cumulative_sum[rho] - 1) / (rho + 1)
            # Projection sur le simplexe
            pi_projected[i, :] = torch.clamp(row - theta, min=0)
        return pi_projected.numpy()
    def project_tangent(self,x,d) :
        d2=d-d.mean(axis=1)[:,None]
        d2[(x==0)*(d2<0)] =0. 
        d2[(x==1)*(d2>0)] =0. 
        return d2
    def value(self,x) : # returns the value of the function at point x wich is numpy
        assert x.shape==self.shape
        pi=torch.from_numpy(x).requires_grad_(True)
        f=self.objective(pi)
        f.backward()
        df=pi.grad
        return -f.item(),-df.numpy()
        

Pour faire bien on lance un test de dérivée numérique

In [21]:
alpha = 0.7
beta = 0.9
epsilon = 1e-1
U = torch.tensor([[1.0, 0.0],
                  [0.0, 1.0]])
V = torch.tensor([[0.0, 1.0],
                  [0.0, 1.0]])
mu = torch.tensor([alpha, 1 - alpha])
nu = mu
q = torch.tensor([beta, 1 - beta]).expand(2,2)
pi0 = torch.tensor([[4/7, 3/7],
                  [0, 1]])

P=Problem(U,V,mu,mu,varepsilon=epsilon,q=q)
P.verbose(pi0)
print('### test de la fonction value')
x=pi0.numpy()
print(P.value(x))
    

We have (state,message,action)= 2 2 2
We have 1= tensor([1., 1.])
We have 1= tensor([1., 1.])
theta tensor([[9.9999e-01, 5.0444e-06],
        [9.0000e-01, 1.0000e-01]])
objective tensor(0.0600)
### test de la fonction value
(-0.060002017764477666, array([[-3.53108784e-06,  5.60000000e-01],
       [-3.17796378e-05, -3.00000000e-01]]))


In [22]:
U = torch.tensor([[1.0, 0.0],
                  [0.0, 1.0],[0.0, 1.0]])
V = torch.tensor([[0.0, 1.0],
                  [0.0, 1.0],[0.0, 1.0]])
mu = torch.tensor([alpha, 0.5*(1 - alpha),0.5*(1 - alpha)])
nu = mu
q = torch.tensor([beta, 1 - beta]).expand(4,2)
pi0 = torch.tensor([[4/7,4/7,0,0],
                  [3/14,3/14,0.5,0.5],[3/14,3/14,0.5,0.5]])
P=Problem(U,V,mu,mu,varepsilon=epsilon,q=q)
P.verbose(pi0)

We have (state,message,action)= 3 4 2
We have 1= tensor([1.1429, 1.4286, 1.4286])
We have 1= tensor([1., 1., 1., 1.])
theta tensor([[9.9992e-01, 8.0434e-05],
        [9.9992e-01, 8.0434e-05],
        [4.0843e-04, 9.9959e-01],
        [4.0843e-04, 9.9959e-01]])
objective tensor(0.3000)


In [23]:
def deriv_num(function,a,d) :
    """test numerically the derivative and the Hessian of a function.       
    Parameters
    ----------
    function : instance of a class
        The function to be tested it must have the following methods, where x is a 1d vector
        of size n
            -- function.eval(x) : evaluation of J at point x, must return a float
            -- function.grad(x) : evaluation of the gradient of J at point x, must a 1d vector of size n
    a : 1d vector of size n
        Point at which the numerical derivatives are evaluated
    d : 1d vector of size n
        Direction in which the numerical derivatives are evaluated
    
   Ouput 
   -----
   This function does not have an output, it prints a string s.
    """        
    eps_range=[0.1**(i+1) for i in range(12)]
    f0,df0=function.value(a)
    for eps in  eps_range:
        s='eps {:1.1e}'.format(eps)
        f,_=function.value(a+eps*d)
        ratio=(f-f0)/(eps*(df0*d).sum()) 
        s+=' grad: ({:1.1e} =? 0)'.format(np.abs(ratio-1)) 
        print(s)

U = torch.tensor([[1.0, 0.0],
                  [0.0, 1.0],[0.0, 1.0]])
V = torch.tensor([[0.0, 1.0],
                  [0.0, 1.0],[0.0, 1.0]])
mu = torch.tensor([alpha, 0.5*(1 - alpha),0.5*(1 - alpha)])
nu = mu
q = torch.tensor([beta, 1 - beta]).expand(4,2)
pi0 = torch.tensor([[4/7,4/7,0,0],
                  [3/14,3/14,0.5,0.5],[3/14,3/14,0.5,0.5]])
P=Problem(U,V,mu,mu,varepsilon=epsilon,q=q)

np.random.seed(42)
a=np.random.randn(P.size).reshape(P.shape)
d=np.random.randn(P.size).reshape(P.shape)
deriv_num(P,a,d)

eps 1.0e-01 grad: (9.8e-01 =? 0)
eps 1.0e-02 grad: (5.7e-02 =? 0)
eps 1.0e-03 grad: (5.5e-03 =? 0)
eps 1.0e-04 grad: (5.5e-04 =? 0)
eps 1.0e-05 grad: (5.5e-05 =? 0)
eps 1.0e-06 grad: (5.5e-06 =? 0)
eps 1.0e-07 grad: (5.5e-07 =? 0)
eps 1.0e-08 grad: (4.3e-08 =? 0)
eps 1.0e-09 grad: (2.4e-07 =? 0)
eps 1.0e-10 grad: (1.5e-06 =? 0)
eps 1.0e-11 grad: (7.8e-06 =? 0)
eps 1.0e-12 grad: (4.4e-05 =? 0)


On va maintenant charger une méthode linéaire de Wolfe

In [24]:
def dot(a,b) :
    return (a*b).sum()
    
def ls_wolfe(x,function,step,descent,f,df) :
    step_min,step_max=0.,np.inf
    scal=dot(df,descent)
    if scal > 0 :
        print('WARNING with scal',scal)
    step2=step
    eps1,eps2=1.e-4,0.9
    i=0
    while i<100 :
        i=i+1
        x2=function.project(x+step2*descent)
        f2,df2=function.value(x2)
        if dot(x2-x,df) >=0 :
            print('We have a problem',dot(x2-x,df),dot(descent,df))
        if f2>f+eps1*dot(x2-x,df) : # step is too big, decrease it
            step_max=step2
            step2=0.5*(step_min+step_max)
        else :
            if dot(df2,x2-x) < eps2*dot(df,x2-x) : # step is too small, increase it
                step_min=step2
                step2=min(0.5*(step_min+step_max),2*step_min)
            else :
                return x2,f2,df2,step2
    print('We do not exit Wolfe')
    print(f2>f+eps1*step2*scal,dot(df2,descent) < eps2*scal)
    return x2,f2,df2,step2
    

In [25]:

def optimize(function,itermax = 1000,tol=1.e-6,verbose=True):
    np.random.seed(42)
    x=np.random.randn(function.size).reshape(function.shape)
    x=function.project(x)
    list_costs=[]
    list_grads=[]
    nbiter = 0
    f,df=function.value(x)
    df_tangent=function.project_tangent(x,-df)
    norm_grad=np.linalg.norm(df_tangent)
    err=2*tol
    if verbose :  
        print('iter={:4d} f={:1.3e} df={:1.3e}'.format(nbiter,f,err))
    list_costs.append(f)
    list_grads.append(norm_grad)
    while (err > tol) and (nbiter < itermax):
        descent=-df
        x_old=np.copy(x)
        x,f,df,step = ls_wolfe(x, function,1., descent,f,df)
        norm_grad = np.linalg.norm(function.project_tangent(x,-df))
        list_costs.append(f)
        list_grads.append(norm_grad)
        err=norm_grad
        nbiter+=1
        if verbose : 
            print('iter={:4d} f={:1.3e} err={:1.3e} s={:1.3e}'.format(nbiter,f,err,step))
        if (err <= tol):
            if verbose : print("Success !!! Algorithm converged !!!")
            return x,list_costs,list_grads
    if verbose : print("FAILED to converge")

Et on obtient


In [26]:
alpha = 0.7
beta = 0.9
epsilon = 1e-3
U = torch.tensor([[1.0, 0.0],
                  [0.0, 1.0]])
V = torch.tensor([[0.0, 1.0],
                  [0.0, 1.0]])
mu = torch.tensor([alpha, 1 - alpha])
nu = mu
q = torch.tensor([beta, 1 - beta]).expand(2,2)
pi0 = torch.tensor([[4/7, 3/7],
                  [0, 1]])
P=Problem(U,V,mu,mu,varepsilon=epsilon,q=q)
x,costs,grad=optimize(P,tol=1.e-5,verbose=True)

iter=   0 f=-4.091e-01 df=2.000e-05
iter=   1 f=-5.873e-01 err=6.871e+00 s=6.875e-01
iter=   2 f=-5.896e-01 err=4.949e-01 s=1.953e-03
iter=   3 f=-5.933e-01 err=4.305e-01 s=1.562e-02
iter=   4 f=-5.938e-01 err=5.884e-01 s=7.812e-03
iter=   5 f=-5.940e-01 err=8.065e-02 s=1.953e-03
iter=   6 f=-5.940e-01 err=4.448e-02 s=3.906e-03
iter=   7 f=-5.940e-01 err=2.858e-02 s=3.906e-03
iter=   8 f=-5.940e-01 err=1.714e-02 s=3.906e-03
iter=   9 f=-5.940e-01 err=1.078e-02 s=3.906e-03
iter=  10 f=-5.940e-01 err=6.605e-03 s=3.906e-03
iter=  11 f=-5.940e-01 err=4.118e-03 s=3.906e-03
iter=  12 f=-5.940e-01 err=2.540e-03 s=3.906e-03
iter=  13 f=-5.940e-01 err=1.577e-03 s=3.906e-03
iter=  14 f=-5.940e-01 err=9.754e-04 s=3.906e-03
iter=  15 f=-5.940e-01 err=6.048e-04 s=3.906e-03
iter=  16 f=-5.940e-01 err=3.744e-04 s=3.906e-03
iter=  17 f=-5.940e-01 err=2.320e-04 s=3.906e-03
iter=  18 f=-5.940e-01 err=1.437e-04 s=3.906e-03
iter=  19 f=-5.940e-01 err=8.901e-05 s=3.906e-03
iter=  20 f=-5.940e-01 err=5.513e

In [27]:
pi=x.reshape([P.nb_states,P.nb_messages])
print(pi)

[[0.57916921 0.42083079]
 [0.         1.        ]]
