In [1]:
!pip install -r requirements.txt



# Linear Regression with Numpy and OpenFL

In [2]:
from typing import List, Union
import numpy as np
import random
import time
import matplotlib.pyplot as plt
%matplotlib inline
from matplotlib.pylab import rcParams
rcParams['figure.figsize'] = 7, 5

# We will use MSE as loss function and Ridge weights regularization
![image.png](https://www.analyticsvidhya.com/wp-content/uploads/2016/01/eq5-1.png)

In [3]:
import jax
import jax.numpy as jnp
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split

# create a dataset with n_features
X, y = make_regression(n_samples = 50000, n_features=3)


In [4]:
X, X_test, y, y_test = train_test_split(X, y)

In [5]:
def mse_loss_function1(W, X, y):
    y_pred = jnp.dot(X, W)
    mse_error = y_pred - y
    return jnp.mean(jnp.square(mse_error))

def mse_loss_function2(W, X, Y):
    def squared_error(x, y):
        y_pred = jnp.dot(x, W)
        return jnp.inner(y-y_pred, y-y_pred)
    vectorized_square_error = jax.vmap(squared_error)
    return jnp.mean(vectorized_square_error(X, Y), axis=0)

# @jax.jit
def update(W, x, y, lr):
    W = W - lr * jax.grad(mse_loss_function2, 0)(W, x, y)
    return W

In [6]:
class LinearRegression:
    def __init__(self, n_feat: int) -> None:
        self.weights = jnp.ones(n_feat)
    
    def mse(self, X, y) -> float:
        return mse_loss_function2(self.weights, X, y)
 
    def predict(self, X):
        return jnp.dot(X, self.weights)
    
    def fit(self, X, Y, n_epochs, lr, silent):
        
        start_time = time.time()
        print('Training Loss at start - W,b: ', self.mse(X, Y))
        
        for i in range(n_epochs):
            self.weights = update(self.weights, X, y, lr)
            print(str(i), 'Training Loss: ', self.mse(X, Y))

        print("--- %s seconds ---" % (time.time() - start_time))

    

In [7]:
X.shape[1:]

(3,)

In [8]:
# lr_model = LinearRegression(X.shape[1:])
# lr = 0.001
# epochs = 5001

# print(f"Initil Testset MSE: {lr_model.mse(X_test,y_test)}")
# lr_model.fit(X[:,np.newaxis],y, epochs, lr, silent=True)
# print(f"Final Testset MSE: {lr_model.mse(X_test,y_test)}")
# print(f"Final parameters: {lr_model.weights}")

# Now we run the same training on federated data

## Connect to a Federation

In [9]:
# Create a federation
from openfl.interface.interactive_api.federation import Federation

# please use the same identificator that was used in signed certificate
client_id = 'frontend'
director_node_fqdn = 'localhost'
director_port = 50049

federation = Federation(
    client_id=client_id,
    director_node_fqdn=director_node_fqdn,
    director_port=director_port,
    tls=False
)

In [10]:
shard_registry = federation.get_shard_registry()
shard_registry

{'envoy_instance': {'shard_info': node_info {
    name: "envoy_instance"
  }
  shard_description: "Allowed dataset types are `train` and `val`"
  sample_shape: "3"
  target_shape: "1",
  'is_online': True,
  'is_experiment_running': False,
  'last_updated': '2022-06-02 21:44:20',
  'current_time': '2022-06-02 21:44:21',
  'valid_duration': seconds: 10,
  'experiment_name': 'ExperimentName Mock'}}

### Data

In [11]:
from openfl.interface.interactive_api.experiment import TaskInterface, DataInterface, ModelInterface, FLExperiment

class LinRegDataSet(DataInterface):
    def __init__(self, **kwargs):
        """Initialize DataLoader."""
        self.kwargs = kwargs
        pass

    @property
    def shard_descriptor(self):
        """Return shard descriptor."""
        return self._shard_descriptor
    
    @shard_descriptor.setter
    def shard_descriptor(self, shard_descriptor):
        """
        Describe per-collaborator procedures or sharding.
        
        This method will be called during a collaborator initialization.
        Local shard_descriptor  will be set by Envoy.
        """
        self._shard_descriptor = shard_descriptor
        self.train_set = shard_descriptor.get_dataset("train")
        self.val_set = shard_descriptor.get_dataset("val")

    def get_train_loader(self, **kwargs):
        """Output of this method will be provided to tasks with optimizer in contract."""
        return self.train_set

    def get_valid_loader(self, **kwargs):
        """Output of this method will be provided to tasks without optimizer in contract."""
        return self.val_set

    def get_train_data_size(self):
        """Information for aggregation."""
        return len(self.train_set)

    def get_valid_data_size(self):
        """Information for aggregation."""
        return len(self.val_set)
    
lin_reg_dataset = LinRegDataSet()

### Model

In [12]:
framework_adapter = 'custom_adapter.CustomFrameworkAdapter'
fed_model = LinearRegression(X.shape[1:])
MI = ModelInterface(model=fed_model, optimizer=None, framework_plugin=framework_adapter)

# Save the initial model state
initial_model = LinearRegression(X.shape[1:])



### Tasks
We need to employ a trick reporting metrics. OpenFL decides which model is the best based on an *increasing* metric.

In [13]:
TI = TaskInterface()

@TI.add_kwargs(**{'lr': 0.01,
                   'epochs': 101})
@TI.register_fl_task(model='my_model', data_loader='train_data', \
                     device='device', optimizer='optimizer')     
def train(my_model, train_data, optimizer, device, lr, epochs):
    X, Y = train_data[:,:-1], train_data[:,-1]
    my_model.fit(X[:,np.newaxis], Y, epochs, lr, silent=True)
    return {'train_MSE': my_model.mse(X, Y),}

@TI.register_fl_task(model='my_model', data_loader='val_data', device='device')
def validate(my_model, val_data, device):
    X, Y = val_data[:,:-1], val_data[:,-1] 
    return {'validation_MSE': my_model.mse(X, Y),}

### Run

In [14]:
experiment_name = 'linear_regression_experiment'
fl_experiment = FLExperiment(federation=federation, experiment_name=experiment_name,
                            )

In [15]:
fl_experiment.start(model_provider=MI, 
                    task_keeper=TI,
                    data_loader=lin_reg_dataset,
                    rounds_to_train=2,)



In [16]:
fl_experiment.stream_metrics()

### Optional: start tensorboard

In [17]:
%%script /bin/bash --bg
tensorboard --host $(hostname --all-fqdns | awk '{print $1}') --logdir logs

In [19]:
last_model = fl_experiment.get_last_model()
best_model = fl_experiment.get_best_model()
print(best_model.weights)
print(last_model.weights)
print(f"last model MSE: {last_model.mse(X,y)}")
print(f"best model MSE: {best_model.mse(X,y)}")

[ 0.18155295  1.0125793  -0.32810533]
[ 0.07516366  1.0139713  -0.5028201 ]
last model MSE: 16889.4375
best model MSE: 16839.392578125


### Evaluate results

In [None]:
n_cols = 20
n_samples = 4
interval = 240
x_start = 60
noise = 0.3

X = None

for rank in range(n_cols):
    np.random.seed(rank)  # Setting seed for reproducibility
    x = np.random.rand(n_samples, 1) * interval + x_start
    x *= np.pi / 180
    X = x if X is None else np.vstack((X,x))
    y = np.sin(x) + np.random.normal(0, noise, size=(n_samples, 1))
    plt.plot(x,y,'+')
    
X.sort()    
Y_hat = last_model.predict(X)
plt.plot(X,Y_hat,'--')