In [1]:
import numpy as np
import matplotlib.pyplot as plt


In [28]:
# Loading data

data_matrix_train, COP_train, data_matrix_test, COP_test, names = np.load('data_center_data_matrix.npy', allow_pickle=True)


### Question 3.1

Si $Aw = b$ alors $(Aw)_t = \tilde{x}(t)^Tw_1+w_0-y(t)\tilde{x}(t)^Tw_2 = b_t =y(t)$. En isolant les $y(t)$ le résultat est immédiat et $y(t) = \frac{w_1^T\tilde{x}(t)+w_0}{w_2^T\tilde{x}(t)+1}$ et car $\tilde{x}(t)^Tw_i = w_i^T\tilde{x}(t)$ pour $i = 1,2$

In [29]:
# Constructing matrices for min_w ||A w - b||_2**2

matrix_mean = np.mean(data_matrix_train, axis=0)
M = data_matrix_train - matrix_mean
matrix_std = np.std(M, axis=0)
M = M / matrix_std

A = np.hstack([M, np.ones((M.shape[0],1)), -(M.T * COP_train[:,3]).T])
b = COP_train[:,3]


### Question 2


In [5]:
w_train, residuals_train, _, _ = np.linalg.lstsq(A, b, rcond=None)

print("Solution for w_train:", w_train)
print("Residuals_train:", residuals_train)

Solution for w_train: [-0.00927821  0.08309371 -0.03672704 ...  0.01980595 -0.03057174
 -0.01188614]
Residuals_train: []


In [7]:
# Constructing matrices for the test set

M_test = (data_matrix_test - matrix_mean) / matrix_std
A_test = np.hstack([M_test, np.ones((M_test.shape[0],1)), -(M_test.T * COP_test[:,3]).T])
b_test = COP_test[:,3]


# Loading raw data
import pandas as pd
data = pd.read_csv('Raw_Dataset_May.csv')

def name_to_subcategory_and_details(col_name):
    if np.isreal(col_name):
        col_name = names[col_name]
    indices = np.nonzero((data['NAME'] == col_name).values)[0]
    if len(indices) > 0:
        subcategory = data['SUBCATEGORY'].iloc[[indices[0]]].values[0]
        details = data['DETAILS'].iloc[[indices[0]]].values[0]
        return subcategory, details
    else:
        print('unknown name')

### Question 3.3

Le résultat obtenu est assez grand, l'approximation n'est pas convenable

In [48]:
w_test, residuals_test, _, _ = np.linalg.lstsq(A_test, b_test, rcond=None)

print("Solution for w_train:", w_train)
MSE = np.sum((A_test@w_train-b_test)**2)/A_test.shape[0]
print("MSE : " + str(MSE))

Solution for w_train: [-0.00927821  0.08309371 -0.03672704 ...  0.01980595 -0.03057174
 -0.01188614]
MSE : 780.898479352339


### Question 3.4


Le minimiseur de $\frac{1}{2}(||Aw-b||^2+\lambda||w||^2)$ est obtenu par $\hat w^{(rdg)} = (A^T A + \lambda I)^{-1}A^Tb$

In [49]:
lambda_val = 100
A_train_reg = np.vstack([A, np.sqrt(lambda_val) * np.eye(A.shape[1])])
b_train_reg = np.concatenate([b, np.zeros(A.shape[1])])

AtA = A.T@A
inv = np.linalg.inv(AtA + lambda_val*np.identity(AtA.shape[0]))
Atb = A.T@b

w_train_reg = inv@Atb

print("Solution for w_train_reg:", w_train_reg)

MSE1 = np.sum((A_test@w_train-b_test)**2)/A_test.shape[0]
MSE2 = np.sum((A_test@w_train_reg-b_test)**2)/A_test.shape[0]
print("MSE without regularization : " + str(MSE1))
print("MSE with regularization " + str(MSE2))


Solution for w_train_reg: [-0.01238313  0.05780406 -0.00127775 ...  0.01590558 -0.03567897
  0.0131232 ]
MSE without regularization : 780.898479352339
MSE with regularization 301.0548280877943


La regularisation améliore la qualité de la solution

### Question 3.5

On a $\nabla f_1 = A^T(Aw-b) + \lambda w$ Puis $\nabla^2 f_1 = (A^TA+\lambda I_d)$. Or pour tout $X$, $X^T(A^TA + \lambda I_d)X \geq 0$ ($\lambda \geq 0$) donc $f_1$ est convexe.

### Question 3.6

On remarque que le gradient de $f_1$ est L-lipschitzien avec $L = A^TA+\lambda$. On choisira donc $\frac 1 L$ comme pas d'après le cours

On commence les itérations avec $w =0$

In [16]:
lambda_val = 100
L = lambda_val + np.linalg.norm(A.transpose()@A)
compt = 0
w = np.zeros(A.shape[1])
grad = A.transpose()@(A@w-b) + lambda_val*w
while np.linalg.norm(grad) >1 :
  print("iter : " + str(compt) + ", grad value: " + str(np.linalg.norm(grad)))
  compt+=1
  w -= 1/L*grad
  grad = A.transpose()@(A@w-b) + lambda_val*w
print(compt)


[1;30;43mLe flux de sortie a été tronqué et ne contient que les 5000 dernières lignes.[0m
iter : 113760, grad value: 1.1939494487999578
iter : 113761, grad value: 1.1939067914955968
iter : 113762, grad value: 1.1938641358464088
iter : 113763, grad value: 1.193821481852205
iter : 113764, grad value: 1.1937788295133127
iter : 113765, grad value: 1.1937361788292795
iter : 113766, grad value: 1.1936935298001954
iter : 113767, grad value: 1.1936508824260776
iter : 113768, grad value: 1.1936082367068865
iter : 113769, grad value: 1.1935655926422393
iter : 113770, grad value: 1.1935229502322457
iter : 113771, grad value: 1.1934803094769202
iter : 113772, grad value: 1.1934376703762088
iter : 113773, grad value: 1.1933950329298069
iter : 113774, grad value: 1.193352397137817
iter : 113775, grad value: 1.1933097630001042
iter : 113776, grad value: 1.1932671305165998
iter : 113777, grad value: 1.1932244996872432
iter : 113778, grad value: 1.1931818705120418
iter : 113779, grad value: 1.1931392

Il a fallu 118759 itérations.

### Question 4.1

On a $f_2 = \frac 1 2 ||Aw-b||^2$ et $g_2 = \lambda ||w||_1$

$\nabla f_2 = A^T(Aw-b)$

$prox_{g_2}(x) = argmin_y (g_2(y) + \frac 1 2 ||x-y||^2) = (sgn(x_i)max((|x_i|-\lambda),0))_i$

### Question 4.2

In [62]:

lambda_Val = 200
L = np.linalg.norm(A.transpose()@A)

def prox(x):
  return np.sign(x) * np.maximum(np.abs(x) - lambda_Val, 0)

def f2(x):
    return 1/2 * np.linalg.norm(A@x-b)**2

x = np.random.random_integers(0,300,A.shape[1])
grad = A.transpose()@(A@x-b)
compt2 = 0
while np.sum(np.abs(x - prox(x-grad/L))) > 0.0001:
  compt2 += 1
  print("iter : " + str(compt2) + ", F2 value: " + str((f2(x) + lambda_Val*np.sum(np.abs(x)))))
  x = prox(x-grad/L)
  print(x)
  grad = A.transpose()@(A@x-b)


iter : 1, F2 value: 6289502442013.152
[ 0.         38.06829709 37.73150629 ... 16.93392648  0.
  0.        ]
iter : 2, F2 value: 53602855524.37931
[ 0.  0.  0. ...  0. -0. -0.]


  x = np.random.random_integers(0,300,A.shape[1])
