In [None]:
import gurobipy as grb
import numpy as np
import timeit
import scipy
from sklearn.datasets import make_classification
import sklearn

In [None]:
def gen_data(imb_type='0', perc_wrong_y=0):
    x_nom = scipy.io.readmat('nominal.mat')['x']
    y_nom = scipy.io.readmat('nominal.mat')['y']
    
    client_vec = np.random.permutation(4)+1
    
    x_c1 = shuffle_data(scipy.io.readmat('client'+str(client_vec[0])+'.mat')['x'])
    y_c1 = scipy.io.readmat('client'+str(client_vec[0])+'.mat')['y']
    
    x_c2 = shuffle_data(scipy.io.readmat('client'+str(client_vec[1])+'.mat')['x'])
    y_c2 = scipy.io.readmat('client'+str(client_vec[1])+'.mat')['y']
    
    x_c3 = shuffle_data(scipy.io.readmat('client'+str(client_vec[2])+'.mat')['x'])
    y_c3 = scipy.io.readmat('client'+str(client_vec[2])+'.mat')['y']

    x_c4 = shuffle_data(scipy.io.readmat('client'+str(client_vec[3])+'.mat')['x'])
    y_c4 = scipy.io.readmat('client'+str(client_vec[3])+'.mat')['y']
    
    idx_test = [800,200,200,200,200]
    
    x_test = np.concatenate([x_nom[:idx_test[0]],x_c1[:idx_test[1]],x_c2[:idx_test[2]],x_c3[:idx_test[3]],\
                            x_c4[:idx_test[4]],\],axis=0)
    
    y_test = np.concatenate([y_nom[:idx_test[0]],y_c1[:idx_test[1]],y_c2[:idx_test[2]],y_c3[:idx_test[3]],\
                            y_c4[:idx_test[4]],\],axis=0)
    
    if perc_wrong_y > 0:
        y_nom = (perc_wrong_y,y_nom)
        y_c1 = (perc_wrong_y,y_c1)
        y_c2 = (perc_wrong_y,y_c2)
        y_c3 = (perc_wrong_y,y_c3)
        y_c4 = (perc_wrong_y,y_c4)
    
    
    if imb_type == '0': #No Imbalance
        idx_train1 = [100,100]
        idx_train2 = [100,100]
        idx_train3 = [100,100]
        idx_train4 = [100,100]
        Ng_vec = [200,200,200,200]
        
    elif imb_type == '1': #Client Imbalance
        idx_train1 = [280,280] #70%
        idx_train2 = [60,60] #15%
        idx_train3 = [40,40] #10%
        idx_train4 = [20,20] #5%
        Ng_vec = [560,120,80,40]
    
    elif imb_type == '2': #Class Imbalance
        idx_train1 = [180,20] #95% - 5%
        idx_train2 = [180,20]
        idx_train3 = [180,20]
        idx_train4 = [180,20]
        Ng_vec = [200,200,200,200]
        
    elif imb_type == '3': #Client and Class Imbalance
        idx_train1 = [504,56]
        idx_train2 = [108,12]
        idx_train3 = [72,8]
        idx_train4 = [36,4]
        Ng_vec = [560,120,80,40]
        
    x1 = np.concatenate([x_nom[idx_test[0]:idx_test[0]+idx_train1[0]],\
                         x_c1[idx_test[1]:idx_test[1]+idx_train1[1]]],axis=0)
    y1 = np.concatenate([y_nom[idx_test[0]:idx_test[0]+idx_train1[0]],\
                         y_c1[idx_test[1]:idx_test[1]+idx_train1[1]]],axis=0)
    
    x2 = np.concatenate([x_nom[idx_test[0]+idx_train1[0]:idx_train1[0]+idx_train2[0]],\
                         x_c2[idx_test[2]:idx_test[2]+idx_train2[1]]],axis=0)
    y2 = np.concatenate([y_nom[idx_test[0]+idx_train1[0]:idx_train1[0]+idx_train2[0]],\
                         y_c2[idx_test[2]:idx_test[2]:idx_train2[1]]],axis=0)
    
    x3 = np.concatenate([x_nom[idx_test[0]+idx_train1[0]+idx_train2[0]:\
                               idx_test[0]+idx_train1[0]+idx_train2[0]+idx_train3[0]],\
                         x_c3[idx_test[3]:idx_test[3]+idx_train3[1]]],axis=0)
    y3 = np.concatenate([y_nom[idx_test[0]+idx_train1[0]+idx_train2[0]:\
                               idx_test[0]+idx_train1[0]+idx_train2[0]+idx_train3[0]],\
                         y_c3[idx_test[3]:idx_test[3]+idx_train3[1]]],axis=0)
    
    
    x4 = np.concatenate([x_nom[idx_test[0]+idx_train1[0]+idx_train2[0]+idx_train3[0]:\
                               idx_test[0]+idx_train1[0]+idx_train2[0]+idx_train3[0]+idx_train4[0]],\
                         x_c4[idx_test[4]:idx_test[4]+idx_train4[1]]],axis=0)
    y4 = np.concatenate([y_nom[idx_test[0]+idx_train1[0]+idx_train2[0]+idx_train3[0]:\
                               idx_test[0]+idx_train1[0]+idx_train2[0]+idx_train3[0]+idx_train4[0]],\
                         y_c4[idx_test[4]:idx_test[4]+idx_train4[1]]],axis=0)
    
    x_central = np.concatenate([x1,x2,x3,x4],axis=0)
    y_central = np.concatenate([y1,y2,y3,y4],axis=0)
    
    
    central_set = {'x': x_central, 'y': y_central}
    test_set = {'x': x_test, 'y': y_test}
    client_sets = {'x0':x1, 'y0':y1, 'x1':x2, 'y1':y2, 'x2':x3, 'y2':y3, 'x3':x4, 'y3':y4}
    
    # Compute Limits
    lims = np.zeros([2,n_features])
    for c in range(n_features):
        lims[0,c] = np.max(x_all[:,c])
        lims[1,c] = np.min(x_all[:,c])
    
    x_1_norm = normalize_data(x_1,lims)
    x_2_norm = normalize_data(x_2,lims)
    x_3_norm = normalize_data(x_3,lims)
    x_4_norm = normalize_data(x_4,lims)
    x_central_norm = normalize_data(x_central,lims)
    x_test_norm = normalize_data(x_test,lims)
    
    central_set_norm = {'x': x_central_norm, 'y': y_central}
    test_set_norm = {'x': x_test_norm, 'y': y_test}
    client_sets_norm = {'x0':x1_norm, 'y0':y1, 'x1':x2_norm, 'y1':y2, 'x2':x3_norm, 'y2':y3, 'x3':x4_norm, 'y3':y4}
    
    return central_set, client_sets, test_set, central_set_norm, client_sets_norm, test_set_norm, Ng_vec
    
    
def shuffle_data(x_in):
    np.random.shuffle(x_in)  # Shuffles rows in place
    return x_in

def normalize_data(data_in,lims):
    row,col = data_in.shape
    data_norm = np.zeros([row,col])
    for c in range(col):
        tmp = data_in[:,c]
        data_norm[:,c] = (tmp - lims[1,c])/(lims[0,c] - lims[1,c])
    
    return data_norm

def change_labels(perc_wrong_y,y):
    total_samples = len(y)
    n_wrong = round(perc_wrong_y*total_samples)
    idx_wrong = np.random.randint(0, high=total_samples, size=n_wrong)
    y[idx_wrong] = y[idx_wrong]*-1
    
    return y