In [2]:
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation,Flatten
from keras import regularizers

import pandas as pd
import numpy as np
import seaborn as sb
import matplotlib.pyplot as plt
from scipy import stats

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, RobustScaler
from sklearn import metrics
from keras.models import model_from_json

from tqdm import tqdm
import math

In [1]:
from sklearn.metrics import confusion_matrix
from imblearn.over_sampling import SMOTE

Using TensorFlow backend.


# Class definitons

In [3]:
class Data():
    def __init__(self,data):
        self.data=data
        self.dataSmote=pd.DataFrame()
        
    def sample(self,start,end):
        size=len(self.data)
        return self.data[int(size*start):int(size*end)]
    
    def getData():
        return self.dataSmote

    def sample_smote(self,start,end):
        data=self.sample(start,end)
        self.dataSmote=np.array(data.drop("Class",axis=1))
        y=np.array(data[['Class']])
        smt=SMOTE()
        self.dataSmote,y=smt.fit_sample(self.dataSmote,y)
        self.dataSmote=pd.DataFrame(self.dataSmote)
        y=pd.DataFrame(y)
        self.dataSmote['Class']=y
        self.dataSmote=self.dataSmote.sample(frac=1)
        self.dataSmote.columns=list(data.columns)
        return self.dataSmote

In [4]:
class Aggregator():
    
    def __init__(self):
        self.wB1=0.55
        self.wB2=0.35
        self.wB3=0.10
    
    def aggregate(self,delta,B1,B2,B3):
        delta=np.array(delta)
        temp=(self.wB1*np.array(B1) + self.wB2*np.array(B2) + self.wB3*np.array(B3))
        temp-=delta
        delta+=temp

        return delta
    

In [5]:
class Model():
    
    def __init__(self):
        self.input_shape=(30,)
        self.model = Sequential()
        self.model.add(Dense(32, activation='relu',input_shape=self.input_shape))
        self.model.add(Dense(16, activation='relu'))
        self.model.add(Dense(8, activation='relu'))
        self.model.add(Dense(1, activation='sigmoid'))
        self.model.compile(optimizer='adam',   #rmsprop
                      loss='binary_crossentropy',
                      metrics=['accuracy'])

    
    def saveModel(self):
        model_json = self.model.to_json()
        with open("model.json", "w") as json_file:
            json_file.write(model_json)
        # serialize weights to HDF5
        self.model.save_weights("model.h5")
        #print("Saved model to disk")
        
    def loadModel(self):
        json_file = open('model.json', 'r')
        loaded_model_json = json_file.read()
        json_file.close()
        loaded_model = model_from_json(loaded_model_json)
        # load weights into new model
        loaded_model.load_weights("model.h5")
        #print("Loaded model from disk")
        loaded_model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])
        return loaded_model    
    
    def getModel(self):
        return self.model


    def run(self,X,Y,validation_split=0,load=True):
        if(load):
            self.model=self.loadModel()
        self.model.fit(X,Y,epochs=5,validation_split=validation_split, verbose=0)
        
    def evaluate(self,X,Y):
        return self.model.evaluate(X,Y)[1]*100
    
    def loss(self,X,Y):
        return self.model.evaluate(X,Y)[0]
    
    def predict(self,X):
        return self.model.predict(X)
        
    def getLayers(self):
        return self.model.layers
    
    def getWeights(self):
        return self.model.get_weights()
    
    def setWeights(self,weight):
        self.model.set_weights(weight)


In [6]:
class Bank(Model):
    
    def __init__(self,data,split_size=0):
        super().__init__()
        self.data=data
        self.split(split_size)
    
    def setData(self,data,split_size=0):
        self.data=data
        self.split(split_size)
        
    def getData(self):
        return self.data
    
    def split(self,split_size):
        X=self.data.copy()
        X.drop(['Class'],axis=1,inplace=True)
        Y=self.data[['Class']]

        if split_size == 0:
            self.X_train, self.X_test, self.Y_train, self.Y_test = X,X,Y,Y
        else:
            self.X_train, self.X_test, self.Y_train, self.Y_test = train_test_split(X, Y, test_size=split_size)

In [7]:
def getWeights(weights):
    A=[]
    for i in weights:
        A.extend(np.array(i).flatten())
    B=[]
    for i in A:
        B.extend(i.flatten())

    
    return np.array(B)

def getAdversarialData(probabilities,data,gradient,result):
    output=[]
    for index,i in enumerate(probabilities.reshape(-1)):
        temp=[]
        temp.append(i)
        temp.extend(np.array(data[index:index+1])[0])
        temp.extend(gradient)
        temp.append(result)
        output.append(temp)
    return np.array(output)


# EDA

In [8]:
data = pd.read_csv('creditcard.csv')
data.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0


In [9]:
data.shape

(284807, 31)

In [10]:
std_scaler = StandardScaler()
rob_scaler = RobustScaler()

data['scaled_amount'] = rob_scaler.fit_transform(data['Amount'].values.reshape(-1,1))
data['scaled_time'] = rob_scaler.fit_transform(data['Time'].values.reshape(-1,1))
data.drop(['Time','Amount'], axis=1, inplace=True)

scaled_amount = data['scaled_amount']
scaled_time = data['scaled_time']

data.drop(['scaled_amount', 'scaled_time'], axis=1, inplace=True)
data.insert(0, 'scaled_amount', scaled_amount)
data.insert(1, 'scaled_time', scaled_time)

data.head()

data = data.sample(frac=1)

# amount of fraud classes 492 rows.
fraud_data = data.loc[data['Class'] == 1]
non_fraud_data = data.loc[data['Class'] == 0]

normal_distributed_data = pd.concat([fraud_data, non_fraud_data])

# Shuffle dataframe rows
new_data = normal_distributed_data.sample(frac=1, random_state=42)

new_data.head()

Unnamed: 0,scaled_amount,scaled_time,V1,V2,V3,V4,V5,V6,V7,V8,...,V20,V21,V22,V23,V24,V25,V26,V27,V28,Class
112942,0.294138,-0.138958,1.096221,-0.334989,0.491418,1.222875,0.191806,1.934249,-0.607641,0.514896,...,-0.054194,-0.453975,-0.914777,-0.174166,-1.692185,0.625974,-0.42457,0.08131,0.00742,0
101244,-0.258506,-0.198816,1.288532,-1.070826,1.772919,-0.092079,-2.184398,-0.079061,-1.590632,0.235521,...,-0.021488,0.153245,0.77405,-0.042568,0.777695,0.41599,-0.064042,0.082693,0.029199,0
229941,3.682806,0.721672,-0.736715,-0.560197,1.868625,0.341622,-0.732814,1.253531,0.709524,0.242769,...,0.560364,0.46536,0.999584,0.481277,0.634388,-0.07813,-0.666859,-0.14726,-0.155888,0
153595,-0.29344,0.174661,2.009217,0.082927,-0.585626,1.061602,0.519582,0.873484,-0.540208,0.083716,...,-0.136204,-0.354148,-0.631028,0.267381,-0.447804,-0.217258,-0.9919,0.029769,-0.042619,0
164350,2.50779,0.37551,1.861103,-1.243445,-1.453612,-0.723378,-0.785562,-1.314244,-0.05624,-0.507137,...,0.396145,0.568211,1.275484,-0.18935,0.113098,0.14756,0.051938,-0.066289,-0.030371,0


In [11]:
results={}
aggregator=Aggregator()

In [28]:
datum=Data(data)

Data_Global=datum.sample_smote(0,0.1)              #use datum.sample  if smote not required
Data_Model_1A=datum.sample_smote(0.10,0.50)
Data_Model_2A=datum.sample_smote(0.50,0.80)
Data_Model_3A=datum.sample_smote(0.8,0.90)
# Data_Model_1B=datum.sample_smote(0.50,0.70)
# Data_Model_2B=datum.sample_smote(0.70,0.85)
# Data_Model_3B=datum.sample_smote(0.85,0.90)
Data_Test=datum.sample_smote(0.90,1)

In [29]:
GlobalBank=Bank(Data_Global,0.2)
GlobalBank.run(GlobalBank.X_train, GlobalBank.Y_train,load=False)

results['BankG.1']=GlobalBank.evaluate(GlobalBank.X_test,GlobalBank.Y_test)

GlobalBank.saveModel()




In [30]:
step=10
# no_iter=math.floor(Data_Model_3A.shape[0]/step)
no_iter=200 # for speed

In [16]:
result=[]
for i in tqdm(range(71,no_iter+1)):

    Bank1=Bank(Data_Model_1A[i*step:(i+1)*step])
    Bank1.run(Bank1.X_train,Bank1.Y_train)
    Bank1_gradient=getWeights(GlobalBank.getWeights()) - getWeights(Bank1.getWeights())
    result.extend(
        getAdversarialData(Bank1.predict(Bank1.X_test),Bank1.X_test,Bank1_gradient,True)
    )

    Bank2=Bank(Data_Model_2A[i*step:(i+1)*step])
    Bank2.run(Bank2.X_train,Bank2.Y_train)
    Bank2_gradient=getWeights(GlobalBank.getWeights()) - getWeights(Bank2.getWeights())
    result.extend(
        getAdversarialData(Bank2.predict(Bank2.X_test),Bank2.X_test,Bank2_gradient,True)
    )

    Bank3=Bank(Data_Model_3A[i*step:(i+1)*step])
    Bank3.run(Bank3.X_train,Bank3.Y_train)
    Bank3_gradient=getWeights(GlobalBank.getWeights()) - getWeights(Bank3.getWeights())
    result.extend(
        getAdversarialData(Bank3.predict(Bank3.X_test),Bank3.X_test,Bank3_gradient,True)
    )


    result.extend(
        getAdversarialData(Bank1.predict(Bank2.X_test),Bank2.X_test,Bank1_gradient,False)
    )

    result.extend(
        getAdversarialData(Bank1.predict(Bank3.X_test),Bank3.X_test,Bank1_gradient,False)
    )

    result.extend(
        getAdversarialData(Bank2.predict(Bank1.X_test),Bank1.X_test,Bank2_gradient,False)
    )

    result.extend(
        getAdversarialData(Bank2.predict(Bank3.X_test),Bank3.X_test,Bank2_gradient,False)
    )

    result.extend(
        getAdversarialData(Bank3.predict(Bank1.X_test),Bank1.X_test,Bank3_gradient,False)
    )

    result.extend(
        getAdversarialData(Bank3.predict(Bank2.X_test),Bank2.X_test,Bank3_gradient,False)
    )


    delta=aggregator.aggregate(GlobalBank.getWeights(),Bank1.getWeights(),Bank2.getWeights(),Bank3.getWeights())

    GlobalBank.setWeights(delta)
    GlobalBank.saveModel()

    if(i%10 == 0 and i!=0):
        result=np.array(result)
        advData=pd.DataFrame(result)

        loc='Adversarial_Input'
        loc+=str(i)
        loc+='.csv'

        advData.to_csv(loc,index=False)
        result=[]





  0%|          | 0/130 [00:00<?, ?it/s][A
  1%|          | 1/130 [00:03<07:03,  3.28s/it][A
  2%|▏         | 2/130 [00:11<12:01,  5.64s/it]


KeyboardInterrupt: 

# ADVERSARIAL MODEL

In [64]:
# advData=pd.read_csv('Adversarial_Input10.csv')
advData=pd.DataFrame()
for i in range(10,201,10):
    loc='Adversarial_Input'
    loc+=str(i)
    loc+='.csv'
    x=pd.read_csv(loc)
    advData=advData.append(x,ignore_index=True)
   


In [65]:
advData.shape

(18090, 1697)

In [194]:
from keras.layers import Conv2D, MaxPooling2D
def Model(input_shape):

    model = Sequential()

    model.add(Conv2D(64, 2,input_shape=input_shape))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Conv2D(32,2))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Flatten())

    model.add(Dense(32, activation='relu',kernel_regularizer=regularizers.l2(0.01))) 
    model.add(Dropout(0.2))
    model.add(Dense(16, activation='relu'))
    model.add(Dropout(0.1))
    model.add(Dense(8, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    
    model.compile(optimizer='adam',   #rmsprop
                  loss= "binary_crossentropy",#[custom_loss()]
                  metrics=['acc']
                 )

    return model

In [82]:
from imblearn.under_sampling import RandomUnderSampler

In [191]:
X=advData.copy()
X.drop(['1696'],axis=1,inplace=True)
Y=advData[['1696']]


In [192]:
undersample = RandomUnderSampler(sampling_strategy='majority')
undersample = RandomUnderSampler(sampling_strategy=1)
X,Y = undersample.fit_resample(X,Y)
input_shape=(X.shape[0],X.shape[1],)
X=np.array(X)
X = X.reshape(X.shape[0],212,8,1)
input_shape=X.shape[1:]
Y=np.array(Y)

In [195]:
model=Model(input_shape)
model.fit(X,Y,epochs=10,validation_split=0.1, verbose=1)

Train on 10854 samples, validate on 1206 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x23717a4a7c8>