# 导入工具包

In [1]:
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge
import numpy as np
from sklearn.metrics import r2_score
from VFL_LinearRegression import *
from sklearn.metrics import mean_squared_error
from phe import paillier
%load_ext autoreload
%autoreload 2

# 使用普通线性回归

## 准备数据 
糖尿病数据集

In [2]:
dataset = load_diabetes()
X,y = dataset.data,dataset.target
X_train, X_test, y_train, y_test  = train_test_split(X,y,test_size=0.3)
X_train = np.column_stack((X_train,np.ones(len(X_train))))
X_test = np.column_stack((X_test,np.ones(len(X_test))))
# 打印数据形状
for temp in [X_train, X_test, y_train, y_test]:
    print(temp.shape)

(309, 11)
(133, 11)
(309,)
(133,)


In [3]:
config = {
    'lambda':0.4, #正则项系数
    'lr':1e-2,    # 学习率
    'n_iters':10, # 训练轮数
}

In [None]:
weights = np.zeros(X_train.shape[1])
loss_history = []
for i in range(config['n_iters']):
    L = 0.5 * np.sum(np.square(X_train.dot(weights) - y_train)) + 0.5 * config['lambda'] * np.sum(np.square(weights))
    dL_w = X_train.T.dot(X_train.dot(weights) - y_train) + config['lambda'] * weights
    weights = weights - config['lr'] * dL_w / len(X_train)
    loss_history.append(L)
    print('*'*8,L,'*'*8)
    print('weights:{}'.format(weights))

# 纵向联邦线性回归

## 准备数据集  
使用糖尿病数据集 
垂直划分  
B拥有标签

In [7]:
idx_A = list(range(6))
idx_B = list(range(6,11))
XA_train,XB_train = X_train[:,idx_A], X_train[:,idx_B]
XA_test,XB_test = X_test[:,idx_A], X_test[:,idx_B]
# 打印形状
for name,temp in zip(['XA_train','XB_train','XA_test','XB_test'],[XA_train,XB_train,XA_test,XB_test]):
    print(name,temp.shape)

XA_train (309, 6)
XB_train (309, 5)
XA_test (133, 6)
XB_test (133, 5)


# 训练过程

配置config

In [52]:
config = {
    'lambda':0.4, #正则项系数
    'lr':1e-2,    # 学习率
    'n_iters':10, # 训练轮数
}

初始化客户端对象

In [8]:
clientA = ClientA(XA_train,config)
clientB = ClientB(XB_train,y_train,config)
clientC = ClientC(config)

建立连接

In [9]:
for client1 in [clientA,clientB,clientC]:
    for name,client2 in zip(['A','B','C'],[clientA,clientB,clientC]):
        if client1 is not client2:
            client1.other_clinet[name] = client2

In [10]:
# 打印连接
for client1 in [clientA,clientB,clientC]:
    print(client1.other_clinet)

{'B': <VFL_LinearRegression.ClientB object at 0x7fd129c08430>, 'C': <VFL_LinearRegression.ClientC object at 0x7fd129c12a90>}
{'A': <VFL_LinearRegression.ClientA object at 0x7fd129c12d30>, 'C': <VFL_LinearRegression.ClientC object at 0x7fd129c12a90>}
{'A': <VFL_LinearRegression.ClientA object at 0x7fd129c12d30>, 'B': <VFL_LinearRegression.ClientB object at 0x7fd129c08430>}


## 训练流程实现
一    初始化A的参数weights，初始化B的参数weights，C创建公钥和私钥，并将公钥发送给A，B

二    1、 A方计算[[u_a]] , [[L_a]]发送给B方

​        2、B方计算[[d]]发送给A, 计算[[L]]发给C

三    1、A方计算[[dL_a]]，将[[masked_dL_a]] 发送给C

​        2、B方计算[[dL_b]],将[[maksed_dL_b]]发送给C

​        3、解密[[L]]，[[masked_dL_a]]解密发送给A，[[maksed_dL_b]]发送给B

In [11]:
for i in range(config['n_iters']):
    # 1.C创建钥匙对，分发公钥给A和B
    clientC.task_1('A','B')
    # 2.1 A方计算[[u_a]] , [[L_a]]发送给B方
    clientA.task_1('B')
    # 2.2 B方计算[[d]]发送给A, 计算[[L]]发给C
    clientB.task_1('A','C')
    # 3.1 A方计算[[dL_a]]，将[[masked_dL_a]] 发送给C
    clientA.task_2('C')
    # 3.2 B方计算[[dL_b]],将[[maksed_dL_b]]发送给C
    clientB.task_2('C')
    # 3.3 解密[[L]]，[[masked_dL_a]]解密发送给A，[[maksed_dL_b]]发送给B
    clientC.task_2('A','B')
    # 4.1 A、B方更新模型
    clientA.task_3()
    clientB.task_3()

******** 4482934.5 ********
A weights : [0.00885201 0.00596332 0.0180488  0.01786043 0.00522641 0.00451066]
B weights : [-0.01540872  0.01612476  0.01961885  0.0118286   1.53116505]
******** 4410796.718796585 ********
A weights : [0.01769672 0.01189607 0.03610017 0.03570789 0.01045204 0.00902136]
B weights : [-0.03079945  0.03222942  0.03920877  0.02366211  3.04699773]
******** 4340095.382033676 ********
A weights : [0.0265342  0.01779855 0.05415405 0.05354249 0.01567689 0.01353208]
B weights : [-0.04617234  0.04831417  0.05877004  0.03550044  4.54765157]
******** 4270801.867530334 ********
A weights : [0.03536451 0.02367106 0.07221042 0.07136434 0.02090095 0.01804282]
B weights : [-0.06152756  0.0643792   0.07830294  0.04734355  6.03327854]
******** 4202888.123426587 ********
A weights : [0.04418772 0.02951388 0.09026923 0.08917357 0.02612422 0.02255355]
B weights : [-0.07686529  0.08042468  0.09780773  0.05919137  7.50402913]
******** 4136326.656819311 ********
A weights : [0.0530038

In [21]:
np.array(loss_history) - np.array(clientC.loss_history)

array([ 0.00000000e+00, -9.31322575e-10, -9.31322575e-10, -9.31322575e-10,
        0.00000000e+00, -9.31322575e-10,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  4.65661287e-10])

## 预测

In [22]:
y_pred = XA_test.dot(clientA.weights) + XB_test.dot(clientB.weights)

In [23]:
mean_squared_error(y_test,y_pred)

25033.750801867864

In [40]:
r2_score(y_test,y_pred)

-4.210138322736245