In [1]:
import tensorflow.keras as keras
import numpy as np
from sklearn.utils import resample
import pandas as pd
import pickle
import os
import csv
import scipy.stats as stats
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import confusion_matrix,accuracy_score
from sklearn.linear_model import LogisticRegression
from statistics import mean
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Dropout
from keras.regularizers import l2
MODEL_PATH = './model/'
DATA_PATH = './data/'

2022-08-16 12:36:35.496930: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-08-16 12:36:35.496982: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [2]:
target_dataset = pd.read_csv('../MIA/datasets/adultODS10K.csv', na_values=["?"])
shadow_dataset = pd.read_csv('../MIA/datasets/adultODS10K_to_25K.csv', na_values=["?"])
attack_test_dataset = pd.read_csv('../MIA/datasets/adultODS25K_to_30K.csv', na_values=["?"])

In [3]:
def read_data(data_name):
    with np.load(DATA_PATH + data_name) as f:
        train_x, train_y, test_x, test_y = [f['arr_%d' % i] for i in range(len(f.files))]
    return train_x, train_y, test_x, test_y

In [4]:
def load_target_data(dataset, test_ratio):
    x = dataset.iloc[:,0:dataset.shape[1]-1] # seperate the feature column from class label
    y = dataset.iloc[:,-1] # label column

    dim=x.shape[1] # number of feature columns

    #num of classes
    num_classes=2

    for j in range(0,dim):
        if x.iloc[:,j].dtypes == object:   # transform categorical variables
            x.iloc[:,j] = x.iloc[:,j].astype('category') # change datatype to categorical
            x.iloc[:,j] = x.iloc[:,j].cat.codes # change from category name to category number
        else:  #transform numrical variables to standard scaler form 
            sc = StandardScaler()  
            val=np.array(x.iloc[:,j]).reshape(-1,1)
            std_data = sc.fit_transform(val)
            std_data = pd.DataFrame(std_data)
            x.iloc[:,j]=std_data

    y = y.astype('category') # change label to categorical
    y = y.cat.codes # change from category name to number

    x=np.array(x)
    y=np.array(y)
    
    trainX,testX, trainY, testY = train_test_split(x, y, test_size=test_ratio, random_state=0, stratify=y)
    return (trainX, trainY), (testX, testY), dim

In [5]:
def build_simple_mlp(n_class,pix,d):

    model = Sequential()
    model.add(Dense(256, input_dim=pix))
    model.add(Activation("relu"))
    #model.add(Dropout(0.01))
    
    model.add(Dense(256, kernel_regularizer=l2(0.01)))
    model.add(Activation("relu"))
    #model.add(Dropout(0.01))
    
    
    #model.add(Dense(248))
    #model.add(Activation("relu"))
    #model.add(Dropout(0.01))

    model.add(Dense(64))
    model.add(Activation("relu"))
    model.add(Dropout(0.01))
    
    model.add(Dense(n_class, activation='softmax'))
    model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

    act_layer=3
    
    return model, act_layer

In [166]:
per_class_sample=5000
channel=0   
EPS=200
act_layer=3
n_class = 2
VERBOSE = 0
test_ratio = 0.3

(target_trainX, target_trainY), (target_testX, target_testY), dim = load_target_data(target_dataset, test_ratio)
model,_ = build_simple_mlp (n_class,dim,channel)
#get_trained_keras_models(model, (target_trainX, target_trainY), (target_testX, target_testY), num_models=1)
history = model.fit(target_trainX, target_trainY, epochs=EPS, batch_size=32, verbose=VERBOSE)
score = model.evaluate(target_testX, target_testY, verbose=VERBOSE)
print('\n', 'Model test accuracy:', score[1])


 Model test accuracy: 0.8266666531562805


In [169]:
def load_shadow_data(dataset, n_shadow, shadow_size, test_ratio):
    x = dataset.iloc[:,0:dataset.shape[1]-1] # seperate the feature column from class label
    y = dataset.iloc[:,-1] # label column

    dim=x.shape[1] # number of feature columns

    #num of classes
    num_classes=2

    for j in range(0,dim):
        if x.iloc[:,j].dtypes == object:   # transform categorical variables
            x.iloc[:,j] = x.iloc[:,j].astype('category') # change datatype to categorical
            x.iloc[:,j] = x.iloc[:,j].cat.codes # change from category name to category number
        else:  #transform numrical variables to standard scaler form 
            sc = StandardScaler()  
            val=np.array(x.iloc[:,j]).reshape(-1,1)
            std_data = sc.fit_transform(val)
            std_data = pd.DataFrame(std_data)
            x.iloc[:,j]=std_data

    y = y.astype('category') # change label to categorical
    y = y.cat.codes # change from category name to number

    x=np.array(x)
    y=np.array(y)
    
    shadow_indices = np.arange(len(dataset))
    
   
    for i in range(n_shadow):
        shadow_i_indices = np.random.choice(shadow_indices, shadow_size, replace=False)
        shadow_i_x, shadow_i_y = x[shadow_i_indices], y[shadow_i_indices]
        trainX,testX, trainY, testY = train_test_split(shadow_i_x, shadow_i_y, test_size=test_ratio, random_state=0, stratify=shadow_i_y)
        #print('shadow_i_trainX = ', trainX, 'shadow_i_trainY = ', trainY)
        
        np.savez(DATA_PATH + 'shadow_adult{}_data.npz'.format(i), train_x, train_y, test_x, test_y)

In [197]:
n_shadow_models = 1
shadow_data_size = 10000
test_ratio = 0.3

load_shadow_data(shadow_dataset, 10, 10000, 0.3)

In [194]:
def train_shadow_models(n_shadow, n_class, dim, channel):
    full_sm_train_pred=[]
    full_sm_train_class=[]
    
    full_sm_test_pred=[]
    full_sm_test_class=[]
    
    full_clz_train=[]
    full_clz_test=[]
    
    members=[]
    nonmembers=[]
    

    for j in range(n_shadow):
        
        print("Shadow Model ", j)
        
        print('Training shadow model {}'.format(j))
        data = read_data('shadow{}_data.npz'.format(j))
        x_shadow_train, y_shadow_train, x_shadow_test, y_shadow_test = data
                

        model,_ = build_simple_mlp (n_class,dim,channel)
            
        # fit model
        history = model.fit(x_shadow_train, y_shadow_train, epochs=EPS, batch_size=32, validation_data=(x_shadow_test, y_shadow_test), verbose=0)
    
        # evaluate model
        _, train_acc = model.evaluate(x_shadow_train, y_shadow_train, verbose=0)
        _, test_acc = model.evaluate(x_shadow_test, y_shadow_test, verbose=0)
        print("Shadow Train acc : ", (train_acc * 100.0),"Shadow Test acc : ", (test_acc * 100.0))

    
        #train SM
        sm_train_pred=model.predict(x_shadow_train, batch_size=32)
        sm_train_class=np.argmax(y_shadow_train,axis=0)
    
    
        #test SM
        sm_test_pred=model.predict(x_shadow_test, batch_size=32)
        sm_test_class=np.argmax(y_shadow_test,axis=0)
        
     
        full_sm_train_pred.append(sm_train_pred)        
        full_sm_train_class.append(sm_train_class)
        members.append(np.ones(len(sm_train_pred)))
        
        full_sm_test_pred.append(sm_test_pred)        
        full_sm_test_class.append(sm_test_class) 
        nonmembers.append(np.zeros(len(sm_test_pred)))

    print("\n train predic", full_sm_train_pred)
    print("\n trian class", full_sm_train_class)
    print("\n members", members)
    print("\n nonmembers", nonmembers)
    
    print("\n train class type ", type(full_sm_train_class))

    
    full_sm_train_pred = np.vstack(full_sm_train_pred)
    full_sm_train_class = [item for sublist in full_sm_train_class for item in sublist]
    members = [item for sublist in members for item in sublist]
    
    full_sm_test_pred = np.vstack(full_sm_test_pred)
    full_sm_test_class = [item for sublist in full_sm_test_class for item in sublist]
    
    nonmembers = [item for sublist in nonmembers for item in sublist]
    

    
    shadow_train_performance=(full_sm_train_pred, np.array(full_sm_train_class))
    shadow_test_performance=(full_sm_test_pred, np.array(full_sm_test_class))


    ###atack data preparation
    attack_x = (full_sm_train_pred,full_sm_test_pred)
    #attack_x = np.vstack(attack_x)
    
    attack_y = (np.array(members).astype('int32'),np.array(nonmembers).astype('int32'))
    #attack_y = np.concatenate(attack_y)
    #attack_y = attack_y.astype('int32')
    
    
    classes = (np.array(full_sm_train_class),np.array(full_sm_test_class))
    #classes = np.array([item for sublist in classes for item in sublist])


    attack_dataset = (attack_x,attack_y,classes)

            
    return  shadow_train_performance, shadow_test_performance, attack_dataset, x_shadow_train, y_shadow_train, x_shadow_test, y_shadow_test, model


In [198]:
per_class_sample=5000
channel=0   
EPS=200
act_layer=3
n_class = 2
VERBOSE = 0
test_ratio = 0.3

#train_shadow_models(n_shadow_models, n_class, dim, channel)

In [200]:
n_shadow = 1

In [214]:
full_sm_train_pred=[]
full_sm_train_class=[]

full_sm_test_pred=[]
full_sm_test_class=[]

full_clz_train=[]
full_clz_test=[]

members=[]
nonmembers=[]


for j in range(n_shadow):

    print("Shadow Model ", j)

    print('Training shadow model {}'.format(j))
    data = read_data('shadow{}_data.npz'.format(j))
    x_shadow_train, y_shadow_train, x_shadow_test, y_shadow_test = data

    print('x_shadow trian\n', x_shadow_train,'\n y_shadow trian\n', y_shadow_train, '\n x_shadow test\n', x_shadow_test, '\n y_shadow test\n', y_shadow_test)

    model,_ = build_simple_mlp (n_class,dim,channel)

    # fit model
    history = model.fit(x_shadow_train, y_shadow_train, epochs=EPS, batch_size=32, validation_data=(x_shadow_test, y_shadow_test), verbose=0)

    # evaluate model
    _, train_acc = model.evaluate(x_shadow_train, y_shadow_train, verbose=0)
    _, test_acc = model.evaluate(x_shadow_test, y_shadow_test, verbose=0)
    print("Shadow Train acc : ", (train_acc * 100.0),"Shadow Test acc : ", (test_acc * 100.0))


    #train SM
    sm_train_pred=model.predict(x_shadow_train, batch_size=32)
    sm_train_class=np.argmax(y_shadow_train,axis=1)


    #test SM
    sm_test_pred=model.predict(x_shadow_test, batch_size=32)
    sm_test_class=np.argmax(y_shadow_test,axis=1)


    full_sm_train_pred.append(sm_train_pred)        
    full_sm_train_class.append(sm_train_class)
    members.append(np.ones(len(sm_train_pred)))

    full_sm_test_pred.append(sm_test_pred)        
    full_sm_test_class.append(sm_test_class) 
    nonmembers.append(np.zeros(len(sm_test_pred)))

print("\n train predic", full_sm_train_pred)
print("\n trian class", full_sm_train_class)
print("\n members", members)
print("\n nonmembers", nonmembers)

print("\n train class type ", type(full_sm_train_class))


full_sm_train_pred = np.vstack(full_sm_train_pred)
full_sm_train_class = [item for sublist in full_sm_train_class for item in sublist]
members = [item for sublist in members for item in sublist]

full_sm_test_pred = np.vstack(full_sm_test_pred)
full_sm_test_class = [item for sublist in full_sm_test_class for item in sublist]

nonmembers = [item for sublist in nonmembers for item in sublist]



shadow_train_performance=(full_sm_train_pred, np.array(full_sm_train_class))
shadow_test_performance=(full_sm_test_pred, np.array(full_sm_test_class))


###atack data preparation
attack_x = (full_sm_train_pred,full_sm_test_pred)
#attack_x = np.vstack(attack_x)

attack_y = (np.array(members).astype('int32'),np.array(nonmembers).astype('int32'))
#attack_y = np.concatenate(attack_y)
#attack_y = attack_y.astype('int32')


classes = (np.array(full_sm_train_class),np.array(full_sm_test_class))
#classes = np.array([item for sublist in classes for item in sublist])


attack_dataset = (attack_x,attack_y,classes)


Shadow Model  0
Training shadow model 0
x_shadow trian
 [[ 0.66124743  2.         15.         ... -0.21952066 -0.07073478
   6.        ]
 [-1.391348    2.         15.         ... -0.21952066  0.26406476
   0.        ]
 [ 0.20511511  2.          1.         ... -0.21952066 -0.07073478
   6.        ]
 ...
 [ 0.5852254   2.         11.         ...  4.532243   -0.40553433
   6.        ]
 [-0.17499517  2.          9.         ...  4.532243    0.7662641
   6.        ]
 [-1.6194142   2.          1.         ... -0.21952066 -2.079532
   6.        ]] 
 y_shadow trian
 [1 0 0 ... 1 1 0] 
 x_shadow test
 [[-0.17499517  5.         11.         ... -0.21952066 -0.07073478
   6.        ]
 [-0.8591937   2.          8.         ... -0.21952066 -0.4892342
   6.        ]
 [-0.02295106  4.         11.         ... -0.21952066 -0.07073478
   6.        ]
 ...
 [ 0.35715923  3.         14.         ...  4.7588773   0.7662641
   6.        ]
 [ 1.421468    2.         15.         ... -0.21952066 -0.07073478
   6.    

AxisError: axis 1 is out of bounds for array of dimension 1

In [217]:
model.predict(x_shadow_train, batch_size=32)



array([[3.0479312e-04, 9.9969518e-01],
       [9.9995667e-01, 4.3250599e-05],
       [9.9862522e-01, 1.3747002e-03],
       ...,
       [1.1925322e-07, 9.9999982e-01],
       [1.2142958e-07, 9.9999982e-01],
       [9.9999994e-01, 4.5283450e-08]], dtype=float32)

In [None]:
full_sm_train_class = [item for sublist in full_sm_train_class for item in sublist]