In [1]:
# Add below current directory to path
# Notebook cannot import any code without this line !!!!
import sys; sys.path.insert(0, '..')

import numpy as np
import matplotlib.pyplot as plt
from ipywidgets import IntSlider, interact
from math import sqrt

from sklearn.preprocessing import scale

from tqdm import tqdm, trange # For progress bar
%matplotlib inline
%load_ext autoreload
%autoreload 2

from src.models.CostModel import LogisticModel, RMSEModel

from src.machinery.GradientDescent import ArtemisDescent, FL_VanillaSGD
from src.machinery.GradientUpdateMethod import ArtemisUpdate
from src.machinery.Parameters import *
from src.machinery.PredefinedParameters import *

from src.utils.ErrorPlotter import *
from src.utils.Constants import *
from src.utils.DataClustering import *
from src.utils.DataPreparation import build_data_logistic, add_bias_term
from src.utils.Utilities import pickle_loader, pickle_saver
from src.utils.runner.RunnerUtilities import *
from src.utils.runner.ResultsOfSeveralDescents import ResultsOfSeveralDescents

filename = "rcv1"

nb_devices_for_the_run = 10

In [2]:
import numpy as np
import cyanure as cyan
import scipy.sparse
#load rcv1 dataset about 1Gb, n=781265, p=47152
data = np.load('/home/constantin/OneDrive/Documents/Etudes/Thèse/dataset/rcv1/rcv1.npz',allow_pickle=True); Y_data=data['y']; X_data=data['X']
X_data = scipy.sparse.csc_matrix(X_data.all()).T # n x p matrix, csr format
#normalize the rows of X in-place, without performing any copy
cyan.preprocess(X_data,normalize=True,columns=False)

In [3]:
#Transforming into torch.FloatTensor
X_merged = X_data
Y_merged = torch.tensor(Y_data, dtype=torch.float64)
number_of_items = X_merged.shape[0]
number_of_items_by_devices = number_of_items // nb_devices_for_the_run
print("Number of points by devices: ", number_of_items_by_devices)

X, Y = [], []
for i in range(nb_devices_for_the_run):
    X.append(scipy.sparse.csc_matrix(torch.tensor(
        X_merged[number_of_items_by_devices * i:number_of_items_by_devices * (i+1)].A, dtype=torch.float64
    )))
    Y_temp = Y_merged[number_of_items_by_devices * i:number_of_items_by_devices * (i+1)]
    Y.append(torch.stack([y[0] for y in Y_temp]))
print("There is " + str(len(X)) + " devices.")

# Adding a columns of "1" to take into account a potential bias.
#X = add_bias_term(X)
dim_notebook = X[0].shape[1]
for x in X:
    print("Number of points on this device:", x.shape)

Number of points by devices:  2000
There is 10 devices.
Number of points on this device: (2000, 47152)
Number of points on this device: (2000, 47152)
Number of points on this device: (2000, 47152)
Number of points on this device: (2000, 47152)
Number of points on this device: (2000, 47152)
Number of points on this device: (2000, 47152)
Number of points on this device: (2000, 47152)
Number of points on this device: (2000, 47152)
Number of points on this device: (2000, 47152)
Number of points on this device: (2000, 47152)


In [4]:
dim_notebook = X_data.shape[1]

In [5]:
# Checking that data is balanced over devices.
print("Diplaying the ratio between state 1 and state -1.")
print("If data is iid, the ratio should be close to 0.5")
for y in Y:
    print("ratio of state 1 on this device: ", abs((y == 1).sum().item() / abs(y).sum().item()))

Diplaying the ratio between state 1 and state -1.
If data is iid, the ratio should be close to 0.5
ratio of state 1 on this device:  0.461
ratio of state 1 on this device:  0.4915
ratio of state 1 on this device:  0.4465
ratio of state 1 on this device:  0.472
ratio of state 1 on this device:  0.47
ratio of state 1 on this device:  0.481
ratio of state 1 on this device:  0.4675
ratio of state 1 on this device:  0.4625
ratio of state 1 on this device:  0.4565
ratio of state 1 on this device:  0.4755


In [6]:
X[0][5]

<1x47152 sparse matrix of type '<class 'numpy.float64'>'
	with 29 stored elements in Compressed Sparse Column format>

In [12]:
%%time
import gc
gc.collect()
obj_min_by_N = {}
obj_min_by_N_descent = FL_VanillaSGD(Parameters(n_dimensions = dim_notebook, 
                                                     nb_devices=nb_devices_for_the_run,
                                                     nb_epoch=5000, 
                                                     quantization_param=0,
                                                     momentum = 0., 
                                                     verbose=True, 
                                                     cost_model=LogisticModel(),
                                                     stochastic=False,
                                                     bidirectional=False
                                                    ))
obj_min_by_N_descent.set_data(X,Y)
obj_min_by_N_descent.run()
obj_min_by_N = obj_min_by_N_descent.losses[-1]
pickle_saver(obj_min_by_N, filename + "-iid-obj_min")

Computing Lipschitz constant ...
Done.
   it    |   obj   
      20 | 4.4318e-01
      40 | 3.6032e-01
      60 | 3.1553e-01
      80 | 2.8669e-01
Gradient Descent: execution time=5.486 seconds
Final loss :  0.2671044495199029

CPU times: user 33.5 s, sys: 292 ms, total: 33.7 s
Wall time: 11.3 s


In [13]:
x = X[0]
w = obj_min_by_N_descent.model_params[-1]
mul = torch.FloatTensor(x.dot(w))
inf_middle = 0
failure = 0
for i in range(len(mul)):
    if torch.sigmoid(mul[i]) > 0.5:
        if Y[0][i] != 1:
            failure +=1
        inf_middle +=1
print("Number of labels equal to 1: ", inf_middle)
print("Failures:", failure)
print("Percent of labels wrongly predicted to be state 1 for worker 0: " + str(failure/inf_middle * 100) + "%")

Number of labels equal to 1:  868
Failures: 42
Percent of labels wrongly predicted to be state 1 for worker 0: 4.838709677419355%


In [None]:
%%time
all_descent = {}
X_number_of_bits = []
for type_params in tqdm(KIND_COMPRESSION):
    multiple_sg_descent = multiple_run_descent(type_params, X, Y, model = LogisticModel(), 
                                               use_averaging=True, nb_epoch=50)
    all_descent[type_params.name()] = multiple_sg_descent
res = ResultsOfSeveralDescents(all_descent, nb_devices_for_the_run)
pickle_saver(res, filename + "-iid-descent")

  0%|          | 0/5 [00:00<?, ?it/s]

SGD


### With Averaging

In [None]:
obj = pickle_loader(filename + "-iid-obj_min")
res = pickle_loader(filename + "-iid-descent")

plot_error_dist(res.get_loss(obj, averaged=True), res.names, res.nb_devices_for_the_run, 
                dim_notebook, all_error=res.get_std(obj, averaged=True), x_legend="Number of passes on data\n(Avg, iid)") 
plot_error_dist(res.get_loss(obj, averaged=True), res.names, res.nb_devices_for_the_run, dim_notebook, 
                x_points=res.X_number_of_bits, all_error=res.get_std(obj, averaged=True), 
                x_legend="Communicated bits (Avg, iid)") 


### Without Averaging

In [None]:
obj = pickle_loader(filename + "-iid-obj_min")
res = pickle_loader(filename + "-iid-descent")

plot_error_dist(res.get_loss(obj), res.names, res.nb_devices_for_the_run, dim_notebook,
                x_legend="Number of passes on data (iid)", all_error=res.get_std(obj)) 
plot_error_dist(res.get_loss(obj), res.names, res.nb_devices_for_the_run, dim_notebook, 
                x_points=res.X_number_of_bits, x_legend="Communicated bits (iid)", all_error=res.get_std(obj)) 