In [24]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [25]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [26]:
import import_ipynb

In [28]:
from Data.frecency import sample, frecency_points
from Data.frecency import sample_suggestions_normal as sample_suggestions

## Linear Regression

In [29]:
from sklearn.linear_model import LinearRegression

In [30]:
n = int(1e6)
print("n",n)
noise = np.random.normal(0, 2, size=(n))
print("\n noise",noise)
X, y = sample(n)
print("\nx",X);
print("\ny",y)
y += noise
print("\nnew y",y)

n 1000000

 noise [-0.38448855 -1.89836125 -5.21798474 ...  2.37600832 -1.36802302
 -0.89428946]

x [[0. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 1. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]

y [ 36. 120.  12. ... 200.  84.  12.]

new y [ 35.61551145 118.10163875   6.78201526 ... 202.37600832  82.63197698
  11.10571054]


In [31]:
model = LinearRegression(fit_intercept=False)
print(model)
model.fit(X, y)

LinearRegression(fit_intercept=False)


In [32]:
model.coef_

array([119.9908454 ,  83.99445256,  59.99194624,  36.00201694,
        11.99813473, 199.98870842, 139.9914027 , 100.01628061,
        60.00103744,  19.99794161, 140.00620165,  98.01876691,
        69.99496774,  41.99725315,  13.99693151])

In [33]:
frecency_points

array([120.,  84.,  60.,  36.,  12., 200., 140., 100.,  60.,  20., 140.,
        98.,  70.,  42.,  14.])

In [34]:
zip(model.coef_, frecency_points)

<zip at 0x1d461bfa480>

In [36]:
def normalize(X):
    X_flat = flatten(X)
    print("flatten",X_flat)
    mu = X_flat.mean(axis=0)
    print("\nmu",mu)
    return [x - mu for x in X]

In [37]:
def flatten(X):
    X_flat = []

    for x in X:
        X_flat += list(x)
        
    return np.array(X_flat)

In [38]:
X = normalize(X)


flatten [0. 0. 0. ... 0. 0. 0.]

mu 0.06666666666666667


## Optimizers

In [39]:
class GradientDescent:
    def __init__(self, learning_rate):
        self.learning_rate = learning_rate
        
    def __call__(self, gradient):
        print("\nGradientDescent: ",self.learning_rate * gradient)
        return self.learning_rate * gradient
    
class AdaptiveGradientDescent:
    def __init__(self, learning_rate, num_features):
        self.learning_rates = np.full(num_features, learning_rate)
        self.min = 0.0000001
        self.max = 50
        self.a = 1.2
        self.b = 0.5
        
        self.t = 0
        self.last_gradient = None
    
    def __call__(self, gradient):
        if self.t >= 1:
            for i in range(len(gradient)):
                if gradient[i] * self.last_gradient[i] > 0:
                    self.learning_rates[i] = min(self.learning_rates[i] * self.a, self.max)
                elif gradient[i] * self.last_gradient[i] < 0:
                    self.learning_rates[i] = max(self.learning_rates[i] * self.b, self.min)
            
        self.t += 1
        self.last_gradient = gradient
        
        print("\n AdaptiveGradientDescent: ",self.learning_rates * gradient)
        return self.learning_rates * gradient
    

In [42]:
def svm_loss(preds, ys, delta=0):
    correct = ys.argmax()
    score_correct = preds[correct]
    
    loss = 0
    
    for i, pred in enumerate(preds):
        loss += max(0, pred + delta - score_correct)            
            
    return loss

In [2]:
def full_loss(model, loss_fn, X, y):
    preds = model.predict(X)
    return sum([loss_fn(pi, yi) for pi, yi in zip(preds, y)]) / len(X)

In [43]:
class ModelCheckpoint:
    def __init__(self, metric_fn, data_generator, num_sampled=10000):
        self.best_model = None
        self.best_metric = -np.inf
        self.metric_fn = metric_fn
        self.num_sampled = num_sampled
        self.data_generator = data_generator
    
    def __call__(self, model):
        X_val, y_val = self.data_generator(self.num_sampled)
        metric = self.metric_fn(y_val, model.predict(X_val))
        
        if metric > self.best_metric:
            self.best_metric = metric
            self.best_model = model
            print("[ModelCheckpoint] New best model with %.5f validation accuracy" % metric)
        else:
            print("validation: %.3f accuracy" % metric)

In [50]:
def rank_accuracy(y, preds):
    correct = 0.
    
    for yi, pi in zip(y, preds):
        if yi[pi.argmax()] == yi.max():
            correct += 1
    print("\nra",correct / len(y))        
    return correct / len(y)

In [51]:
class SVMRanking:
    def __init__(self, delta):
        self.delta = delta
        
    def fit(self, data_generator, optimizer, num_iterations=10, callbacks=[]):
        X, y = data_generator(1)
        num_features = X[0].shape[1]
        self.W = frecency_points + (np.random.random(size=(num_features)) - 0.5) * 100
        
        for j in range(num_iterations):
            X, y = data_generator(4000)
            
            preds = self.predict(X)
            gradient = np.zeros(num_features)

            for xi, pi, yi in zip(X, preds, y):
                correct = yi.argmax()
                score_correct = pi[correct]

                for i, predicted_score in enumerate(pi):
                    gradient -= xi[i] * max(0, predicted_score + self.delta - score_correct)
            
            gradient /= len(X)
            
            loss = np.mean([svm_loss(pi, yi) for pi, yi in zip(self.predict(X), y)])
            accuracy = rank_accuracy(y, model.predict(X))
            
            print("[%d/%d] training: %.5f loss, %.3f accuracy" % (j + 1, num_iterations, loss, accuracy))
            
            for callback in callbacks:
                callback(self)
            
            self.W += optimizer(gradient)
            
    def predict(self, X):
        preds = []
        
        for x in X:
            scores = x.dot(self.W)
            preds.append(scores)
        
        return preds

### Training

In [46]:
np.random.seed(0)
model = SVMRanking(delta=0.)
model.fit(data_generator=sample_suggestions,
          optimizer=GradientDescent(0.1),
          num_iterations=48,
          callbacks=[ModelCheckpoint(rank_accuracy, sample_suggestions)])

[1/48] training: 11.37885 loss, 0.704 accuracy
[ModelCheckpoint] New best model with 0.69890 validation accuracy

GradientDescent:  [-122.36405941 -107.48999198  -37.48290809    0.           -5.96792421
    0.          -19.25965145    0.          -27.58203727    0.
    0.           -3.79508718  -17.42396833    0.            0.        ]
[2/48] training: 20.77378 loss, 0.699 accuracy
validation: 0.680 accuracy

GradientDescent:  [   0.            0.         -152.35681685  -13.76368635  -36.32932907
    0.           -2.42519558  -54.79312979 -149.76243926    0.
    0.          -80.58033171 -117.61071445  -15.59179987    0.        ]
[3/48] training: 19.16303 loss, 0.877 accuracy
[ModelCheckpoint] New best model with 0.88550 validation accuracy

GradientDescent:  [   0.          -45.89852283    0.         -151.69075912 -141.82331544
    0.           -0.63079582    0.           -2.02847128  -84.95572452
    0.            0.            0.          -50.72393272  -97.13940778]
[4/48] training: 

In [47]:
np.random.seed(0)
opt = AdaptiveGradientDescent(0.1, len(frecency_points))
model = SVMRanking(delta=0.)
model.fit(data_generator=sample_suggestions,
          optimizer=opt,
          num_iterations=48,
          callbacks=[ModelCheckpoint(rank_accuracy,  sample_suggestions)])

[1/48] training: 11.37885 loss, 0.704 accuracy
[ModelCheckpoint] New best model with 0.69890 validation accuracy

 AdaptiveGradientDescent:  [-0.4078802  -0.35829997 -0.12494303  0.         -0.01989308  0.
 -0.06419884  0.         -0.09194012  0.          0.         -0.01265029
 -0.05807989  0.          0.        ]
[2/48] training: 11.67939 loss, 0.705 accuracy
[ModelCheckpoint] New best model with 0.69980 validation accuracy

 AdaptiveGradientDescent:  [-0.44666552 -0.47875831 -0.16872485  0.         -0.02125797  0.
 -0.06671128  0.         -0.12078476  0.          0.         -0.01425169
 -0.08437253  0.          0.        ]
[3/48] training: 12.47571 loss, 0.724 accuracy
[ModelCheckpoint] New best model with 0.73450 validation accuracy

 AdaptiveGradientDescent:  [-0.61520703 -0.60068602 -0.19197632  0.         -0.02829744  0.
 -0.08062199  0.         -0.16323485  0.          0.         -0.02252831
 -0.09395063  0.          0.        ]
[4/48] training: 10.88291 loss, 0.733 accuracy
va

## Federated Learning

In [59]:
import random
class Server:
    def __init__(self, clients):
        self.clients = clients
        
        num_features = len(frecency_points)
        self.W = np.int32(frecency_points + (np.random.random(size=(num_features)) - 0.5) * 100)
    
    def fit(self, optimizer, num_iterations, num_clients_per_iteration, constraints=[], callbacks=[]):
        for j in range(num_iterations):
            clients = random.sample(self.clients, num_clients_per_iteration)
            updates, losses = zip(*[client.request_update(self) for client in clients])
            
            gradient = np.mean(updates, axis=0)
            loss = np.mean(losses, axis=0)
            
            print("[%d/%d] training loss across clients %.5f" % (j + 1, num_iterations, loss))
            
            for callback in callbacks:
                callback(self)
            
            self.W += np.int32(optimizer(gradient))
            
            for constraint in constraints:
                self.W = constraint(self.W)
    
    def predict(self, X):
        preds = []
        
        for x in X:
            scores = x.dot(self.W)
            preds.append(scores)
        
        return preds

In [65]:
class Client:
    def __init__(self, data_generator, delta=0):
        self.data_generator = data_generator
        self.delta = 0
    
    def request_update(self, model, eps=1):
        X, y = self.data_generator()
        loss = full_loss(model, svm_loss, X, y)
        
        num_features = X[0].shape[1]
        gradient = []
        
        for i in range(num_features):
            model.W[i] -= eps
            loss1 = full_loss(model, svm_loss, X, y)
            
            model.W[i] += 2 * eps
            loss2 = full_loss(model, svm_loss, X, y)
            
            finite_difference = (loss1 - loss2) / (2 * eps)
            gradient.append(finite_difference)
            
            model.W[i] -= eps
        
        return gradient, loss

In [66]:
class FrecencyConstraints:
    def __call__(self, gradient):
        return gradient - min(0, gradient.min())

In [1]:
class RProp:
    def __init__(self, learning_rate, num_features, min_value=0.0000001, max_value=50, alpha=1.2, beta=0.5):
        self.learning_rates = np.full(num_features, learning_rate)
        self.min = min_value
        self.max = max_value
        self.a = alpha
        self.b = beta
        
        self.t = 0
        self.last_gradient = None
    
    def __call__(self, gradient):
        if self.t >= 1:
            for i in range(len(gradient)):
                if gradient[i] * self.last_gradient[i] > 0:
                    self.learning_rates[i] = min(self.learning_rates[i] * self.a, self.max)
                elif gradient[i] * self.last_gradient[i] < 0:
                    self.learning_rates[i] = max(self.learning_rates[i] * self.b, self.min)
            
        self.learning_rates = np.round(self.learning_rates)
            
        self.t += 1
        self.last_gradient = gradient
        
        print("\nRProp",self.learning_rates * np.sign(gradient))
        return self.learning_rates * np.sign(gradient)

In [70]:
clients = [Client(lambda: sample_suggestions(np.int32(np.random.exponential(1.0)) + 1)) for _ in range(5000)]

In [71]:
np.random.seed(10)
server = Server(clients)
server.fit(optimizer=GradientDescent(0.1),
          num_iterations=48,
           num_clients_per_iteration=400,
           constraints=[FrecencyConstraints()],
          callbacks=[ModelCheckpoint(rank_accuracy, sample_suggestions, 5000)])

[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]

 0.0
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]

 0.0
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]

 0.0
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]

 0.0
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]

 0.0
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]

 0.0
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]

 0.0
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]

 0.0
[-19.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
   0.]

 19.0
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]

 0.0
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]

 0.0
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]

 0.0
[   0.    0.    0.    0.    0.    0.    0.    0.    0.    0. -147.    0.
    0.    0.    0.]

 147.0
[  0.           0.           0.         -16.           0.
   0.           0.           0.           0.           0.
   0.           0.           0.          -2.33333333   0.        ]

 18.333333333333332
[0. 0. 0. 0. 0. 0. 0. 0. 0.

In [72]:
zip(frecency_points[ais], server.W[ais])

<zip at 0x1d40265c9c0>

In [73]:
np.random.seed(10)
opt = RProp(2., len(frecency_points), min_value=1, max_value=3, alpha=2., beta=0.6)
server = Server(clients)
server.fit(optimizer=opt,
          num_iterations=48,
           num_clients_per_iteration=400,
           constraints=[FrecencyConstraints()],
          callbacks=[ModelCheckpoint(rank_accuracy, sample_suggestions, 5000)])

[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]

 0.0
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]

 0.0
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]

 0.0
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]

 0.0
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]

 0.0
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]

 0.0
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]

 0.0
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]

 0.0
[-19.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
   0.]

 19.0
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]

 0.0
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]

 0.0
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]

 0.0
[   0.    0.    0.    0.    0.    0.    0.    0.    0.    0. -147.    0.
    0.    0.    0.]

 147.0
[  0.           0.           0.         -16.           0.
   0.           0.           0.           0.           0.
   0.           0.           0.          -2.33333333   0.        ]

 18.333333333333332
[0. 0. 0. 0. 0. 0. 0. 0. 0.

In [74]:
opt.learning_rates

array([3., 2., 3., 3., 3., 2., 2., 3., 3., 2., 3., 3., 2., 3., 3.])