# Projet de Machine Learning

In [1]:
from tools import *
import math as math
%matplotlib inline



In [2]:
class OptimFunc:
    def __init__(self,f=None,grad_f=None,dim=2):
        self.f=f
        self.grad_f=grad_f
        self.dim=dim
    def init(self,low=-1,high=1):
        return random.random(self.dim)*(high-low)+low

class GradientDescent:
    def __init__(self,optim_f,eps=1e-4,max_iter=5000):
        self.optim_f=optim_f
        self.eps=eps
        self.max_iter=max_iter
    def reset(self):
        self.i=0
        self.w = self.optim_f.init()
        self.log_w=np.array(self.w)
        self.log_f=np.array(self.optim_f.f(self.w))
        self.log_grad=np.array(self.optim_f.grad_f(self.w))
    def optimize(self,reset=True):
        if reset:
            self.reset()
        while not self.stop():
            self.w = self.w - self.get_eps()*self.optim_f.grad_f(self.w)
            self.log_w=np.vstack((self.log_w,self.w))
            self.log_f=np.vstack((self.log_f,self.optim_f.f(self.w)))
            self.log_grad=np.vstack((self.log_grad,self.optim_f.grad_f(self.w)))
            #if self.i%200==0:
                #print(self.i," iterations ",self.log_f[self.i],self.log_grad[self.i],self.log_w[self.i])
                #,self.score(self.data,self.y),self.w.dot(self.w))
            self.i+=1
        print("Valeur de f: ",self.log_f[self.i],"\nValeur du gradient: ",self.log_grad[self.i],
                  "\nValeur de w: " ,self.log_w[self.i])
    def stop(self):
        return (self.i>2) and (self.max_iter and (self.i>self.max_iter))
    def get_eps(self):
        return self.eps

def gen_arti(centerx=1,centery=1,sigma=0.1,nbex=1000,data_type=0,eps=0.02):
    """ Generateur de donnees,
        :param centerx: centre des gaussiennes
        :param centery:
        :param sigma: des gaussiennes
        :param nbex: nombre d'exemples
        :param data_type: 0: melange 2 gaussiennes, 1: melange 4 gaussiennes, 2:echequier
        :param eps: bruit dans les donnees
        :return: data matrice 2d des donnnes,y etiquette des donnnees
    """
    if data_type==0:
         #melange de 2 gaussiennes
         xpos=np.random.multivariate_normal([centerx,centery],np.diag([sigma,sigma]),nbex//2)
         xneg=np.random.multivariate_normal([-centerx,-centery],np.diag([sigma,sigma]),nbex//2)
         data=np.vstack((xpos,xneg))
         y=np.hstack((np.ones(nbex//2),-np.ones(nbex//2)))
    if data_type==1:
        #melange de 4 gaussiennes
        xpos=np.vstack((np.random.multivariate_normal([centerx,centery],np.diag([sigma,sigma]),nbex//4),
                        np.random.multivariate_normal([-centerx,-centery],np.diag([sigma,sigma]),nbex//4)))
        xneg=np.vstack((np.random.multivariate_normal([-centerx,centery],np.diag([sigma,sigma]),nbex//4),
                        np.random.multivariate_normal([centerx,-centery],np.diag([sigma,sigma]),nbex//4)))
        data=np.vstack((xpos,xneg))
        y=np.hstack((np.ones(nbex//2),-np.ones(nbex//2)))

    if data_type==2:
        #echiquier
        data=np.reshape(np.random.uniform(-4,4,2*nbex),(nbex,2))
        y=np.ceil(data[:,0])+np.ceil(data[:,1])
        y=2*(y % 2)-1
    # un peu de bruit
    data[:,0]+=np.random.normal(0,eps,nbex)
    data[:,1]+=np.random.normal(0,eps,nbex)
    # on mélange les données
    idx = np.random.permutation((range(y.size)))
    data=data[idx,:]
    y=y[idx]
    return data,y

    

In [21]:
def f_K(x, y, sigma=1):
    return math.exp(-pow(np.linalg.norm(x-y), 2)/(2*pow(sigma,2)))

def HA(a, S, dataset):

    Kql= np.zeros((N,N))
    Kal = np.zeros((N,N))
    f = []
    a[0] = np.random.randint(0,1)
    for l,x in enumerate(dataset):
        fl = a[0]*f_K(x, x)
        for j,e in enumerate(S):
            fl += a[j]*K[l,j]
    
        f.append(fl)

    for i,x in enumerate(S):
        for j,y in enumerate(S):
            Kql[i,j] = f_K(x,y)
            
    Kql = np.asmatrix(np.asarray(Kql))
    f = np.asarray(f)
    a = np.asarray(a)
    
    return sum(y*f - np.log(1+np.exp(f)))+lamda/2 * a.T * Kql * a

def H(x, S, a_prec, dataset):

    a = a_optim(x, S, a_prec) 
    return HA(a, S, dataset)

def a_optim(x, S, a_prec):

    f = K*a_prec
    p = np.multiply(np.asarray(np.exp(f)),1/np.asarray((1+np.exp(f))))
    x = np.asarray(p*(1-p))
    W = np.asmatrix(np.zeros((x.shape[0], x.shape[0])))
    
    for i,element in enumerate(x):
        W[i,i] = element
        
    z = K*a_prec+np.linalg.inv(W)*(y-p)
    return np.linalg.inv(K.T*W*K + lamda*K) * K.T*W*z 

def ivm(dataset, y):

    N = dataset.shape[0]
    a = np.zeros((N,1))
    S = []
    
    lamda = 3
    
    Hk = 1
    Hk_1 = 0

    while abs(Hk-Hk_1)/Hk > 0.01:
    
        h = np.zeros((1, N))
        for i in range(N):
            S_b = S
            S_b.append(dataset[i])
            print H(dataset[i], S_b, a, dataset)
            h[i] = H(dataset[i], S_b, a, dataset)

        xl_opt = np.argmin(h)
        
        Hk_1 = Hk
        Hk = h[xl_opt]
        
        print xl_opt
        print Hk
        print Hk_1
        print dataset[xl_opt,:]
        
        S = S.append(dataset[xl_opt, :])
        np.delete(dataset, xl_opt)

        # update de a
        f = K*a_prec
        p = np.multiply(np.asarray(np.exp(f)),1/np.asarray((1+np.exp(f))))
        x = np.asarray(p*(1-p))
        W = np.asmatrix(np.zeros((x.shape[0], x.shape[0])))

        for i,element in enumerate(x):
            W[i,i] = element
        z = K*a_prec+np.linalg.inv(W)*(y-p)
        a = np.linalg.inv(K.T*W*K + lamda*K) * K.T*W*z 
        
    print S


In [22]:
data, y = gen_arti()
N = data.shape[0]
K = np.asmatrix(np.zeros((N,N)))
lamda=1
for i,x in enumerate(data):
    for j,e in enumerate(data):
        K[i,j] = f_K(x,e)

y = y.reshape((y.shape[0], 1))
ivm(data, y)

[[-693.14718056 -693.14718056]]
[[-693.14718056 -693.14718056]]
[[-693.14718056 -693.14718056]]


ValueError: could not broadcast input array from shape (2) into shape (1000)

In [None]:
def grad_HA(a, S):
    
    K = np.zeros((N,N))
    Kql= np.zeros((N,N))
    Kal = np.zeros((N,N))
    
    for i,x in enumerate(dataset):
        for j,y in enumerate(dataset):
            K[i,j] = f_K(x,y)
                
    a[0] = np.random.randint(0,1)
    for l,x in enumerate(dataset):
        fl = a[0]*f_K(x, x)
        for j,e in enumerate(S):
            fl += a[j]*K(x,e)
    
        f.append(fl)

    a = np.asarray(a)

    for i,x in enumerate(S):
        for j,y in enumerate(S):
            Kql[i,j] = f_K(x,y)
            
    Kql = np.asmatrix(np.asarray(Kql))
    f = np.asarray(f)
    a = np.asarray(a)
    
    Kal = np.asmatrix(np.asarray(Kal))
    for i,x in enumerate(dataset):
        for j,y in enumerate(S):
            Kal[i,j] = f_K(x,y)
            
    grad = zeros(1, Kal.shape[0])
    for i in range(0, Kal.shape[0]):
        grad(i) = Kal[i,i]*np.exp(sum(Kal*a))/(1+np.exp(sum(Kal*a)))
        
    return -Kal.T*y + lamda*Kql*a + grad
