In [1]:
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation,Flatten
from keras import regularizers

import pandas as pd
import numpy as np
import seaborn as sb
import matplotlib.pyplot as plt
from scipy import stats

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, RobustScaler
from sklearn import metrics
from keras.models import model_from_json



Using TensorFlow backend.


In [2]:
from sklearn.metrics import confusion_matrix
from imblearn.over_sampling import SMOTE

# Class definitons

In [3]:
class Data():
    def __init__(self,data):
        self.data=data
        self.dataSmote=pd.DataFrame()
        
    def sample(self,start,end):
        size=len(self.data)
        return self.data[int(size*start):int(size*end)]
    
    def getData():
        return self.dataSmote

    def sample_smote(self,start,end):
        data=self.sample(start,end)
        self.dataSmote=np.array(data.drop("Class",axis=1))
        y=np.array(data[['Class']])
        smt=SMOTE()
        self.dataSmote,y=smt.fit_sample(self.dataSmote,y)
        self.dataSmote=pd.DataFrame(self.dataSmote)
        y=pd.DataFrame(y)
        self.dataSmote['Class']=y
        self.dataSmote=self.dataSmote.sample(frac=1)
        self.dataSmote.columns=list(data.columns)
        return self.dataSmote

In [4]:
class Aggregator():
    
    def __init__(self):
        self.wB1=0.55
        self.wB2=0.35
        self.wB3=0.10
    
    def aggregate(self,delta,B1,B2,B3):
        delta=np.array(delta)
        temp=(self.wB1*np.array(B1) + self.wB2*np.array(B2) + self.wB3*np.array(B3))
        temp-=delta
        delta+=temp
        
        return delta
    

In [5]:
class Model():
    
    def __init__(self):
        self.input_shape=(30,)
        self.model = Sequential()
        self.model.add(Dense(32, activation='relu',input_shape=self.input_shape))
        self.model.add(Dense(16, activation='relu'))
        self.model.add(Dense(8, activation='relu'))
        self.model.add(Dense(1, activation='sigmoid'))
        self.model.compile(optimizer='adam',   #rmsprop
                      loss='binary_crossentropy',
                      metrics=['accuracy'])

    
    def saveModel(self):
        model_json = self.model.to_json()
        with open("model.json", "w") as json_file:
            json_file.write(model_json)
        # serialize weights to HDF5
        self.model.save_weights("model.h5")
        print("Saved model to disk")
        
    def loadModel(self):
        json_file = open('model.json', 'r')
        loaded_model_json = json_file.read()
        json_file.close()
        loaded_model = model_from_json(loaded_model_json)
        # load weights into new model
        loaded_model.load_weights("model.h5")
        print("Loaded model from disk")
        loaded_model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])
        return loaded_model    
    
    def getModel(self):
        return self.model


    def run(self,X,Y,validation_split=0,load=True):
        if(load):
            self.model=self.loadModel()
        self.model.fit(X,Y,epochs=5,validation_split=validation_split, verbose=1)
        
    def evaluate(self,X,Y):
        return self.model.evaluate(X,Y)[1]*100
    
    def loss(self,X,Y):
        return self.model.evaluate(X,Y)[0]
    
    def predict(self,X):
        return self.model.predict(X)
        
    def getLayers(self):
        return self.model.layers
    
    def getWeights(self):
        return self.model.get_weights()
    
    def setWeights(self,weight):
        self.model.set_weights(weight)


In [6]:
class Bank(Model):
    
    def __init__(self,data,split_size=0):
        super().__init__()
        self.data=data
        self.split(split_size)
    
    def setData(self,data,split_size=0):
        self.data=data
        self.split(split_size)
        
    def getData(self):
        return self.data
    
    def split(self,split_size):
        X=self.data.copy()
        X.drop(['Class'],axis=1,inplace=True)
        Y=self.data[['Class']]

        if split_size == 0:
            self.X_train, self.X_test, self.Y_train, self.Y_test = X,X,Y,Y
        else:
            self.X_train, self.X_test, self.Y_train, self.Y_test = train_test_split(X, Y, test_size=split_size)

# EDA

In [7]:
data = pd.read_csv('creditcard.csv')
data.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0


In [8]:
data.shape

(284807, 31)

In [9]:
std_scaler = StandardScaler()
rob_scaler = RobustScaler()

data['scaled_amount'] = rob_scaler.fit_transform(data['Amount'].values.reshape(-1,1))
data['scaled_time'] = rob_scaler.fit_transform(data['Time'].values.reshape(-1,1))
data.drop(['Time','Amount'], axis=1, inplace=True)

scaled_amount = data['scaled_amount']
scaled_time = data['scaled_time']

data.drop(['scaled_amount', 'scaled_time'], axis=1, inplace=True)
data.insert(0, 'scaled_amount', scaled_amount)
data.insert(1, 'scaled_time', scaled_time)

data.head()

data = data.sample(frac=1)

# amount of fraud classes 492 rows.
fraud_data = data.loc[data['Class'] == 1]
non_fraud_data = data.loc[data['Class'] == 0]

normal_distributed_data = pd.concat([fraud_data, non_fraud_data])

# Shuffle dataframe rows
new_data = normal_distributed_data.sample(frac=1, random_state=42)

new_data.head()

Unnamed: 0,scaled_amount,scaled_time,V1,V2,V3,V4,V5,V6,V7,V8,...,V20,V21,V22,V23,V24,V25,V26,V27,V28,Class
69188,-0.181793,-0.368719,-1.373669,1.681458,0.856925,0.084847,-0.905125,-1.214641,-0.008443,0.775643,...,0.004643,-0.150017,-0.583534,0.131279,0.688137,-0.13986,0.05967,0.124571,0.05017,0
183984,0.391253,0.485767,2.219304,-1.714562,-2.396642,-2.571874,1.136436,3.443811,-1.582553,0.790591,...,-0.292766,-0.148788,-0.091763,0.237288,0.688691,-0.141106,-0.144035,0.022049,-0.051637,0
170959,0.171872,0.419836,2.010373,0.000753,-1.952661,1.242297,0.655689,-0.580965,0.491131,-0.206686,...,-0.324693,0.017948,0.096671,-0.006121,0.369656,0.44849,-0.500643,-0.036769,-0.057618,0
150315,0.321246,0.097922,1.978214,0.038608,-1.75811,0.455132,0.650015,-0.286199,0.007624,-0.130452,...,-0.117415,-0.467669,-1.149514,0.25938,0.028174,-0.291914,0.13622,-0.104944,-0.044384,0
225015,1.760637,0.697494,-1.472941,0.252156,3.014367,1.315021,-1.38798,1.807696,-0.734495,-0.24941,...,-0.476991,0.88497,0.652405,-0.674504,0.057619,0.364067,-0.200854,0.118688,0.046721,0


In [10]:
#Using SMOTE

# dataSmote=np.array(data.drop("Class",axis=1))
# y=np.array(data[['Class']])
# smt=SMOTE()
# dataSmote,y=smt.fit_sample(dataSmote,y)
# dataSmote=pd.DataFrame(dataSmote)
# y=pd.DataFrame(y)
# dataSmote['Class']=y
# dataSmote=dataSmote.sample(frac=1)
# dataSmote.columns=list(data.columns)

In [11]:
# f, axes = plt.subplots(ncols=2, figsize=(20,6))

# sb.countplot('Class',data=data,ax=axes[0])
# axes[0].set_title('Fraud Distribution Original')

# sb.countplot('Class',data=dataSmote,ax=axes[1])
# axes[1].set_title('Fraud Distribution SMOTE')

# Federated Learning

In [12]:
results={}
aggregator=Aggregator()

In [13]:
datum=Data(data)

Data_Global=datum.sample_smote(0,0.1)              #use datum.sample  if smote not required
Data_Model_1A=datum.sample_smote(0.1,0.3)
Data_Model_2A=datum.sample_smote(0.3,0.45)
Data_Model_3A=datum.sample_smote(0.45,0.50)
Data_Model_1B=datum.sample_smote(0.50,0.70)
Data_Model_2B=datum.sample_smote(0.70,0.85)
Data_Model_3B=datum.sample_smote(0.85,0.90)
Data_Test=datum.sample_smote(0.90,1)

In [72]:
def getWeights(weights):
    A=[]
    for i in weights:
        A.extend(np.array(i).flatten())
    B=[]
    for i in A:
        B.extend(i.flatten())

    
    return np.array(B)

def getAdversarialData(probabilities,data,gradient,result):
    output=[]
    for index,i in enumerate(probabilities.reshape(-1)):
        temp=[]
        temp.append(i)
        temp.extend(np.array(data[index:index+1])[0])
        temp.extend(gradient)
        temp.append(result)
        output.append(temp)
    return np.array(output)


In [15]:
GlobalBank=Bank(Data_Global,0.2)
GlobalBank.run(GlobalBank.X_train, GlobalBank.Y_train,load=False)

results['BankG.1']=GlobalBank.evaluate(GlobalBank.X_test,GlobalBank.Y_test)

GlobalBank.saveModel()







Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Saved model to disk


In [77]:
result=[]
for i in range(10):
    Bank1=Bank(Data_Model_1A[i*30:(i+1)*30])
    Bank1.run(Bank1.X_train,Bank1.Y_train)
    gradient=getWeights(GlobalBank.getWeights()) - getWeights(Bank1.getWeights())
    result.extend(
        getAdversarialData(Bank1.predict(Bank1.X_test),Bank1.X_test,gradient,True)
    )


Loaded model from disk
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loaded model from disk
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loaded model from disk
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loaded model from disk
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loaded model from disk
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loaded model from disk
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loaded model from disk
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loaded model from disk
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loaded model from disk
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loaded model from disk
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [78]:
np.array(result).shape

(300, 1697)

In [67]:
temp=[]
temp.extend(output)
temp.extend(output)
np.array(temp).shape

(60, 1697)

In [64]:


Bank1=Bank(Data_Model_1A,0.2)
Bank1.run(Bank1.X_train,Bank1.Y_train)

Bank2=Bank(Data_Model_2A,0.2)
Bank2.run(Bank2.X_train,Bank2.Y_train)

Bank3=Bank(Data_Model_3A,0.2)
Bank3.run(Bank3.X_train,Bank3.Y_train)

delta=aggregator.aggregate(GlobalBank.getWeights(),Bank1.getWeights(),Bank2.getWeights(),Bank3.getWeights())

GlobalBank.setWeights(delta)
GlobalBank.saveModel()

Loaded model from disk
Train on 81882 samples, validate on 9098 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loaded model from disk
Train on 61411 samples, validate on 6824 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loaded model from disk
Train on 225177 samples, validate on 25020 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Saved model to disk


In [65]:
results['Bank1.1']=Bank1.evaluate(Bank1.X_test,Bank1.Y_test)
results['Bank2.1']=Bank2.evaluate(Bank2.X_test,Bank2.Y_test)
results['Bank3.1']=Bank3.evaluate(Bank3.X_test,Bank3.Y_test)
results['BankG.2']=GlobalBank.evaluate(GlobalBank.X_test,GlobalBank.Y_test)



In [66]:

Bank1.setData(Data_Model_1B,0.2)
Bank1.run(Bank1.X_train,Bank1.Y_train)

Bank2.setData(Data_Model_2B,0.2)
Bank2.run(Bank2.X_train,Bank2.Y_train)

Bank3.setData(Data_Model_3B,0.2)
Bank3.run(Bank3.X_train,Bank3.Y_train)



delta=aggregator.aggregate(GlobalBank.getWeights(),Bank1.getWeights(),Bank2.getWeights(),Bank3.getWeights())

GlobalBank.setWeights(delta)
GlobalBank.saveModel()

Loaded model from disk
Train on 204706 samples, validate on 22746 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loaded model from disk
Train on 122823 samples, validate on 13648 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Loaded model from disk
Train on 61412 samples, validate on 6824 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Saved model to disk


In [67]:
results['Bank1.2']=Bank1.evaluate(Bank1.X_test,Bank1.Y_test)
results['Bank2.2']=Bank2.evaluate(Bank2.X_test,Bank2.Y_test)
results['Bank3.2']=Bank3.evaluate(Bank3.X_test,Bank3.Y_test)




In [68]:
GlobalBank.setData(Data_Test,0.9)
results['BankG.3']=GlobalBank.evaluate(GlobalBank.X_test,GlobalBank.Y_test)



# Non Federated Learning