In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import import_ipynb
%matplotlib inline

In [15]:
from Data.frecency import sample, frecency_points
from Data.frecency import sample_suggestions_normal as sample_suggestions

In [5]:
frecency_points

array([120.,  84.,  60.,  36.,  12., 200., 140., 100.,  60.,  20., 140.,
        98.,  70.,  42.,  14.])

In [6]:
def svm_loss(preds, ys, delta=0):
    correct = ys.argmax()
    score_correct = preds[correct]
    
    loss = 0
    
    for i, pred in enumerate(preds):
        loss += max(0, pred + delta - score_correct)            
    
    return loss

In [7]:
def rank_accuracy(y, preds):
    correct = 0.
    
    for yi, pi in zip(y, preds):
        if yi[pi.argmax()] == yi.max():
            correct += 1
    return correct / len(y)

In [8]:
class ModelCheckpoint:
    def __init__(self, metric_fn, data_generator, num_sampled=10000):
        self.best_model = None
        self.best_metric = -np.inf
        self.metric_fn = metric_fn
        self.num_sampled = num_sampled
        self.data_generator = data_generator
    
    def __call__(self, model):
        X_val, y_val = self.data_generator(self.num_sampled)
        metric = self.metric_fn(y_val, model.predict(X_val))
        if metric > self.best_metric:
            self.best_metric = metric
            self.best_model = model
            print("[ModelCheckpoint] New best model with %.5f validation accuracy" % metric)
        else:
            print("validation: %.3f accuracy" % metric)

In [9]:
import random

class Server:
    def __init__(self, clients):
        self.clients = clients
        
        num_features = len(frecency_points)
        self.W = np.int32(frecency_points + (np.random.random(size=(num_features)) - 0.5) * 300)
        self.W = np.maximum(1, self.W)
    
    def model_fit(self, optimizer, num_iterations, num_clients_per_iteration, constraints=[], callbacks=[]):        
        update_list = []
        W_list = [self.W.copy()]
        
        for j in range(num_iterations):
            clients = random.sample(self.clients, num_clients_per_iteration)
            updates, losses = zip(*[client.request_update(self) for client in clients])
            update_list.append(updates)
            
            gradient = np.mean(updates, axis=0)
            loss = np.mean(losses, axis=0)
            print("[%d/%d] training loss across clients %.5f" % (j + 1, num_iterations, loss))
            
            for callback in callbacks:
                callback(self)
            
            self.W += np.int32(optimizer(gradient))
            
            for constraint in constraints:
                self.W = constraint(self.W)
                
            W_list.append(self.W.copy())
        print("\n update_list: ",update_list)       
        print("\n W_list", W_list)
        return update_list, W_list
    
    def predict(self, X):
        preds = []
        
        for x in X:
            scores = x.dot(self.W)
            preds.append(scores)
        return preds
    
class FrecencyConstraints:
    def __call__(self, gradient):
        return gradient - min(0, gradient.min())

In [10]:
def full_loss(model, loss_fn, X, y):
    preds = model.predict(X)
    return sum([loss_fn(pi, yi) for pi, yi in zip(preds, y)]) / len(X)

In [11]:
class ClientSide:
    def __init__(self, data_generator, delta=0):
        self.data_generator = data_generator
        self.delta = 0
    
    def request_update(self, model, eps=1):
        X, y = self.data_generator()
        loss = full_loss(model, svm_loss, X, y)
        
        num_features = X[0].shape[1]
        gradient = []
        
        for i in range(num_features):
            model.W[i] -= eps
            loss1 = full_loss(model, svm_loss, X, y)
            
            model.W[i] += 2 * eps
            loss2 = full_loss(model, svm_loss, X, y)
            
            finite_difference = (loss1 - loss2) / (2 * eps)
            gradient.append(finite_difference)
            
            model.W[i] -= eps
        return gradient, loss

## Optimizers

In [12]:
class RProp:
    def __init__(self, learning_rate, num_features, min_value=0.0000001, max_value=50, alpha=1.2, beta=0.5):
        self.learning_rates = np.full(num_features, learning_rate)
        self.min = min_value
        self.max = max_value
        self.a = alpha
        self.b = beta
        
        self.t = 0
        self.last_gradient = None
    
    def __call__(self, gradient):
        if self.t >= 1:
            for i in range(len(gradient)):
                if gradient[i] * self.last_gradient[i] > 0:
                    self.learning_rates[i] = min(self.learning_rates[i] * self.a, self.max)
                elif gradient[i] * self.last_gradient[i] < 0:
                    self.learning_rates[i] = max(self.learning_rates[i] * self.b, self.min)
            
        self.learning_rates = np.round(self.learning_rates)
            
        self.t += 1
        self.last_gradient = gradient
        
        
        print("\n RProp : ",self.learning_rates * np.sign(gradient))
        return self.learning_rates * np.sign(gradient)



In [13]:
clients = [ClientSide(lambda: sample_suggestions(np.int32(np.random.exponential(1.0)) + 1)) for _ in range(100)]

In [41]:
np.random.seed(10)
opt = RProp(2., len(frecency_points), min_value=1, max_value=3, alpha=2., beta=0.6)
server = Server(clients)
updates, Ws = server.model_fit(optimizer=opt,
          num_iterations=30,
           num_clients_per_iteration=400,
           constraints=[FrecencyConstraints()],
          callbacks=[ModelCheckpoint(rank_accuracy, sample_suggestions, 1000)])

[1/30] training loss across clients 137.35913

rank_accuracy result 0.379
[ModelCheckpoint] New best model with 0.37900 validation accuracy

 RProp :  [-2.  2. -2. -2. -2.  2.  2. -2.  2. -2.  2. -2.  2. -2. -2.]
[2/30] training loss across clients 115.13448

rank_accuracy result 0.359
validation: 0.359 accuracy

 RProp :  [-3.  3. -3. -3. -3.  3.  3. -3.  3.  1.  3. -3.  3. -3. -3.]
[3/30] training loss across clients 112.06038

rank_accuracy result 0.445
[ModelCheckpoint] New best model with 0.44500 validation accuracy

 RProp :  [-3.  3. -3. -3. -3.  3.  3. -3. -2.  2. -2. -3.  3. -3. -3.]
[4/30] training loss across clients 105.96704

rank_accuracy result 0.453
[ModelCheckpoint] New best model with 0.45300 validation accuracy

 RProp :  [-3.  3. -3. -3. -3.  3.  3. -3.  1.  3. -3. -3.  3. -3. -3.]
[5/30] training loss across clients 95.87425

rank_accuracy result 0.493
[ModelCheckpoint] New best model with 0.49300 validation accuracy

 RProp :  [-3.  3. -3. -3.  0.  3.  3. -3.  2. 

In [42]:
for i in range(len(updates)):
    np.savetxt("updates-%.2d.csv" % i, updates[i], fmt="%.7f")

In [43]:
np.savetxt("weights.csv", np.int32(Ws), fmt="%d")