In [181]:
import csv
import sys
import random
import numpy as np

In [182]:
print(sys.version)

3.7.4 (v3.7.4:e09359112e, Jul  8 2019, 14:54:52) 
[Clang 6.0 (clang-600.0.57)]


In [317]:
male_data = np.genfromtxt('Data/MALE.csv', delimiter=',', skip_header=1)
female_data = np.genfromtxt('Data/FEMALE.csv', delimiter=',', skip_header=1)
mixed_data = np.genfromtxt('Data/MIXED.csv', delimiter=',', skip_header=1)

male_x = male_data[:,:-1]
male_y = male_data[:,-1:]
female_x = female_data[:,:-1]
female_y = female_data[:,-1:]
mixed_x = mixed_data[:,:-1]
mixed_y = mixed_data[:,-1:]

In [350]:
from sklearn.model_selection import train_test_split

male_x_train, male_x_test, male_y_train, male_y_test = \
train_test_split(male_x, male_y, test_size=0.10, random_state=random.randint(1,101))

male_x_dev = male_x_test[:100]
male_y_dev = male_y_test[:100]
male_x_test = male_x_test[100:]
male_y_test = male_y_test[100:]

female_x_train, female_x_test, female_y_train, female_y_test = \
train_test_split(female_x, female_y, test_size=0.10, random_state=random.randint(1,101))

female_x_dev = female_x_test[:100]
female_y_dev = female_y_test[:100]
female_x_test = female_x_test[100:]
female_y_test = female_y_test[100:]


mixed_x_train, mixed_x_test, mixed_y_train, mixed_y_test = \
train_test_split(mixed_x, mixed_y, test_size=0.04, random_state=random.randint(1,101))

mixed_x_dev = mixed_x_test[:100]
mixed_y_dev = mixed_y_test[:100]
mixed_x_test = mixed_x_test[100:]
mixed_y_test = mixed_y_test[100:]

In [351]:
print("Mixed test and train size: ")
print(mixed_x_test.shape)
print(mixed_x_train.shape)

print("Female test and train size: ")
print(female_x_test.shape)
print(female_x_train.shape)

print("Male test and train size: ")
print(male_x_test.shape)
print(male_x_train.shape)


Mixed test and train size: 
(193, 6)
(7011, 6)
Female test and train size: 
(341, 6)
(3963, 6)
Male test and train size: 
(266, 6)
(3288, 6)


In [366]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import Ridge
from sklearn.utils.testing import ignore_warnings
from sklearn.exceptions import ConvergenceWarning
from sklearn.model_selection import GridSearchCV

ridge = Ridge()
# {'hidden_layer_sizes': (16, 16, 16, 16, 16), 'learning_rate_init': 0.01, 'max_iter': 450}
# {'hidden_layer_sizes': (16, 16, 16, 16), 'learning_rate_init': 0.01, 'max_iter': 350}
# hidden_layer_sizes=(8, 8, 8, 8, 8, 8, 8, 8, 8), max_iter=250, activation = 'relu', learning_rate_init=.001)
neural_net = MLPClassifier(hidden_layer_sizes=(16,16,16,16,16), max_iter=350, activation = 'relu', learning_rate_init=.001)


In [None]:
from hypopt import GridSearch

# hyper param tuning for the ridge regression
def tune_ridge(x_train, y_train, x_dev, y_dev, x_test, y_test):
    params = {"alpha": [.0001, .001, .01, .1, 1, 10, 100, 150]}
    grid = GridSearch(model = ridge, param_grid = params)
    grid.fit(x_train, y_train, x_dev, y_dev)
    #print('Best score: ', grid.score(x_test, y_test))
    ridge.alpha = grid.best_params['alpha']
    return ridge
    
# hyper param tuning for the neural network
def tune_nn(x_train, y_train, x_dev, y_dev, x_test, y_test):
    params = {"learning_rate_init": [.001, .01, .1], "max_iter" : [350, 400, 450], "hidden_layer_sizes" : [(16,16,16,16,16), (8,8,8,8,8,8,8,8,8)]}
    grid = GridSearch(model = neural_net, param_grid = params)
    grid.fit(x_train, y_train.ravel(), x_dev, y_dev.ravel())
    #print('Best score: ', grid.score(x_test, y_test.ravel()))
    neural_net.learning_rate_init = grid.best_params['learning_rate_init']
    neural_net.max_iter = grid.best_params['max_iter']
    neural_net.hidden_layer_sizes = grid.best_params['hidden_layer_sizes']
    return neural_net
    

#model = tune_ridge(male_x_train, male_y_train, male_x_dev, male_y_dev, male_x_test, male_y_test)
model = tune_nn(male_x_train, male_y_train, male_x_dev, male_y_dev, male_x_test, male_y_test)
model.get_params()

In [367]:
@ignore_warnings(category=ConvergenceWarning)
def baseline(training_x, training_y, dev_x, dev_y, test_x, test_y):
    nn = tune_nn(training_x, training_y, dev_x, dev_y, test_x, test_y)
    nn.fit(training_x, training_y)
    rr = tune_nn(training_x, training_y, dev_x, dev_y, test_x, test_y)
    rr.fit(training_x, training_y)
    rr_predictions = rr.predict(test_x)
    nn_predictions = nn.predict(test_x)
    return rr_predictions, nn_predictions

def mse(rr_predictions, nn_predictions, test_y):
    mse_rr1 = mean_squared_error(test_y, rr_predictions)
    mse_nn1 = mean_squared_error(test_y, nn_predictions)
    print("Ridge regression mse: " + str(mse_rr1))
    print("Neural net mse: " + str(mse_nn1))

In [368]:
def get_subset(data):
    return data[:100]

In [369]:
# three possible splits for f-fold cross validation:
# { (male, female), (male, mixed), (female, mixed) }
def src_data_format(x1, x2, y1, y2):
    x = np.concatenate([x1, x2])
    y = np.concatenate([y1, y2]).ravel()
    return x, y

In [370]:
def tgt_data_format(x, y):
    return x[:100], y[:100].ravel()

In [371]:
def all_data_format(x1, x2, x3, y1, y2, y3):
    x3 = get_subset(x3)
    y3 = get_subset(y3)
    x = np.concatenate([x1, x2, x3])
    y = np.concatenate([y1, y2, y3]).ravel()
    return x, y

In [372]:
def weighted(x1, x2, x3, y1, y2, y3):
    source = np.concatenate([x1, x2])
    target = get_subset(x3)
    N = len(source)
    M = len(target)
    weight = int(N/M)
    x = np.concatenate([source, target])
    for i in range(1, weight):
        x = np.concatenate([x, target])
    return x

def update_weighted_y(x, y, y_data):
    while(len(y) < len(x)):
        y = np.concatenate([y, y_data[:100].ravel()])
    return y

In [404]:
def pred(src_x1, src_x2, src_y1, src_y2, tgt_x_train, tgt_x_dev, tgt_x_test, model):
    x, y = src_data_format(src_x1, src_x2, src_y1, src_y2)
    model.fit(x, y)
    
    pred = model.predict(tgt_x_train)
    train = np.column_stack((tgt_x_train, pred))
    
    pred = model.predict(tgt_x_dev)
    dev = np.column_stack((tgt_x_dev, pred))
    
    pred = model.predict(tgt_x_test)
    test = np.column_stack((tgt_x_test, pred))
    
    return model, train, dev, test

def make_predictions(model, train_x, train_y, dev_x, dev_y, test_x, test_y):
    model.fit(train_x, train_y.ravel())
    predictions = model.predict(test_x)
    return linint_mse(predictions, test_y)
    


In [398]:
from sklearn.linear_model import LinearRegression

# predictions are of length 100 from target dev data
def linint(srconly_pred, tgtonly_pred, target_dev_x, target_dev_y, target_test_x):
    combined_models = np.column_stack((srconly_pred,tgtonly_pred))
    model = LinearRegression()
    model.fit(combined_models, target_dev_y.ravel())
    model.predict
    
    combined_models = np.column_stack((tgtonly_pred,tgtonly_pred))
    prediction = model.predict(combined_models)
    return prediction
    
# calculates the mse for linint because linint returns only 1 predicter, not 2
def linint_mse(predictions, test):
    error = mean_squared_error(test, predictions)
    print("mse: " + str(error))

In [375]:
x1, y1 = src_data_format(male_x_train, female_x_train, male_y_train, female_y_train)
dev_x1, dev_y1 = src_data_format(male_x_dev, female_x_dev, male_y_dev, female_y_dev)

x2, y2 = src_data_format(male_x_train, mixed_x_train, male_y_train, mixed_y_train)
dev_x2, dev_y2 = src_data_format(male_x_dev, mixed_x_dev, male_y_dev, mixed_y_dev)

x3, y3 = src_data_format(female_x_train, mixed_x_train, female_y_train, mixed_y_train)
dev_x3, dev_y3 = src_data_format(female_x_dev, mixed_x_dev, female_y_dev, mixed_y_dev)

In [376]:
# baseline(training_x, training_y, dev_x, dev_y, test_x, test_y)

print("Mixed target SRCONLY model:")
rr_predictions, nn_predictions = baseline(x1, y1, dev_x1, dev_y1, mixed_x_test, mixed_y_test)
mse(rr_predictions, nn_predictions, mixed_y_test)

print("Female target SRCONLY model:")
rr_predictions, nn_predictions = baseline(x2, y2, dev_x2, dev_y2, female_x_test, female_y_test)
mse(rr_predictions, nn_predictions, female_y_test)

print("Male target SRCONLY model:")
rr_predictions, nn_predictions = baseline(x3, y3, dev_x3, dev_y3, male_x_test, male_y_test)
mse(rr_predictions, nn_predictions, male_y_test)

Mixed target SRCONLY model:
Ridge regression mse: 114.92198704382011
Neural net mse: 153.21243523316062
Female target SRCONLY model:
Ridge regression mse: 164.3392558326437
Neural net mse: 155.42815249266863
Male target SRCONLY model:
Ridge regression mse: 157.55730189104148
Neural net mse: 137.38345864661653


In [379]:
x1, y1 = tgt_data_format(male_x_train, male_y_train)
x2, y2 = tgt_data_format(female_x_train, female_y_train)
x3, y3 = tgt_data_format(mixed_x_train, mixed_y_train)

In [380]:
print("Mixed target TGTONLY model:")
rr_predictions, nn_predictions = baseline(x3, y3, mixed_x_test)
mse(rr_predictions, nn_predictions, mixed_y_test)

print("Female target TGTONLY model:")
rr_predictions, nn_predictions = baseline(x2, y2, female_x_test)
mse(rr_predictions, nn_predictions, female_y_test)

print("Male target TGTONLY model:")
rr_predictions, nn_predictions = baseline(x1, y1, male_x_test)
mse(rr_predictions, nn_predictions, male_y_test)

Mixed target SRCONLY model:
Ridge regression mse: 115.8301801875647
Neural net mse: 231.79274611398964
Female target SRCONLY model:
Ridge regression mse: 165.8498956004615
Neural net mse: 252.29912023460412
Male target TGTONLY model:
Ridge regression mse: 150.9510980233947
Neural net mse: 284.90977443609023


In [381]:
x1, y1 = all_data_format(male_x_train, female_x_train, mixed_x_train, male_y_train, female_y_train, mixed_y_train)
x2, y2 = all_data_format(male_x_train, mixed_x_train, female_x_train, male_y_train, mixed_y_train, female_y_train)
x3, y3 = all_data_format(female_x_train, mixed_x_train, male_x_train, female_y_train, mixed_y_train, male_y_train)

In [382]:
print("Mixed target ALL model:")
rr_predictions, nn_predictions = baseline(x1, y1, mixed_x_test)
mse(rr_predictions, nn_predictions, mixed_y_test)

print("Female target ALL model:")
rr_predictions, nn_predictions = baseline(x2, y2, female_x_test)
mse(rr_predictions, nn_predictions, female_y_test)

print("Male target ALL model:")
rr_predictions, nn_predictions = baseline(x3, y3, male_x_test)
mse(rr_predictions, nn_predictions, male_y_test)

Mixed target ALL model:
Ridge regression mse: 114.7379111936112
Neural net mse: 115.35751295336787
Female target ALL model:
Ridge regression mse: 164.36480311855726
Neural net mse: 156.22287390029325
Male target ALL model:
Ridge regression mse: 157.33031150905555
Neural net mse: 132.30075187969925


In [309]:
# change the data groups to the all variables
x1 = weighted(male_x_train, female_x_train, mixed_x_train, male_y_train, female_y_train, mixed_y_train)
x2 = weighted(male_x_train, mixed_x_train, female_x_train, male_y_train, mixed_y_train, female_y_train)
x3 = weighted(female_x_train, mixed_x_train, male_x_train, female_y_train, mixed_y_train, male_y_train)

y1 = update_weighted_y(x1, y1, mixed_y_train)
y2 = update_weighted_y(x2, y2, female_y_train)
y3 = update_weighted_y(x3, y3, male_y_train)

In [310]:
print("Mixed target WEIGHTED model:")
rr_predictions, nn_predictions = baseline(x1, y1, mixed_x_test)
mse(rr_predictions, nn_predictions, mixed_y_test)

print("Female target WEIGHTED model:")
rr_predictions, nn_predictions = baseline(x2, y2, female_x_test)
mse(rr_predictions, nn_predictions, female_y_test)

print("Male target WEIGHTED model:")
rr_predictions, nn_predictions = baseline(x3, y3, male_x_test)
mse(rr_predictions, nn_predictions, male_y_test)

Mixed target WEIGHTED model:
Ridge regression mse: 145.35193129758622
Neural net mse: 242.59904912836768
Female target WEIGHTED model:
Ridge regression mse: 171.46480823240492
Neural net mse: 274.9882697947214
Male target WEIGHTED model:
Ridge regression mse: 176.2156872351883
Neural net mse: 281.1127819548872


In [390]:
print("PRED mixed target: ")
rr, train, dev, test = pred(male_x_train, female_x_train, male_y_train, female_y_train, mixed_x_train, mixed_x_dev, mixed_x_test, ridge)    
make_predictions(rr, train, mixed_y_train, dev, mixed_y_dev, test, mixed_y_test)
nn, train, dev, test = pred(male_x_train, female_x_train, male_y_train, female_y_train, mixed_x_train, mixed_x_dev, mixed_x_test, neural_net)    
make_predictions(nn, train, mixed_y_train, dev, mixed_y_dev, test, mixed_y_test)

print("PRED female target: ")
rr, train, dev, test = pred(male_x_train, mixed_x_train, male_y_train, mixed_y_train, female_x_train, female_x_dev, female_x_test, ridge)    
make_predictions(rr, train, female_y_train, dev, female_y_dev, test, female_y_test)
nn, train, dev, test = pred(male_x_train, mixed_x_train, male_y_train, mixed_y_train, female_x_train, female_x_dev, female_x_test, neural_net)    
make_predictions(nn, train, female_y_train, dev, female_y_dev, test, female_y_test)

print("PRED male target: ")
rr, train, dev, test = pred(female_x_train, mixed_x_train, female_y_train, mixed_y_train, male_x_train, male_x_dev, male_x_test, ridge)    
make_predictions(rr, train, male_y_train, dev, male_y_dev, test, male_y_test)
nn, train, dev, test = pred(female_x_train, mixed_x_train, female_y_train, mixed_y_train, male_x_train, male_x_dev, male_x_test, neural_net)    
make_predictions(nn, train, male_y_train, dev, male_y_dev, test, male_y_test)

PRED mixed target: 
mse: 109.26973139802593
mse: 114.54922279792746
PRED female target: 
mse: 160.24704084160345
mse: 129.66275659824046
PRED male target: 
mse: 148.9383099879814
mse: 159.0563909774436




In [None]:
# Mixed target data formatting
x1, y1 = src_data_format(male_x_train, female_x_train, male_y_train, female_y_train)
rr_src1, nn_src1 = baseline(x1, y1, mixed_x_dev)

x1, y1 = tgt_data_format(mixed_x_train, mixed_y_train)
rr_tgt1, nn_tgt1 = baseline(x1, y1, mixed_x_dev)

# Female target data formatting
x2, y2 = src_data_format(male_x_train, mixed_x_train, male_y_train, mixed_y_train)
rr_src2, nn_src2 = baseline(x1, y1, female_x_dev)

x2, y2 = tgt_data_format(female_x_train, female_y_train)
rr_tgt2, nn_tgt2 = baseline(x1, y1, female_x_dev)

# Male target data formatting
x3, y3 = src_data_format(mixed_x_train, female_x_train, mixed_y_train, female_y_train)
rr_src3, nn_src3 = baseline(x1, y1, male_x_dev)

x3, y3 = tgt_data_format(male_x_train, male_y_train)
rr_tgt3, nn_tgt3 = baseline(x1, y1, male_x_dev)

In [None]:
print("Mixed target Ridge Regression LININT model:")
predictions = linint(rr_src1, rr_tgt1, mixed_x_dev, mixed_y_dev, mixed_x_test)
linint_mse(predictions, mixed_y_dev)

print("Mixed target Neural Net LININT model:")
predictions = linint(nn_src1, nn_tgt1, mixed_x_dev, mixed_y_dev, mixed_x_test)
linint_mse(predictions, mixed_y_dev)

print("Female target Ridge Regression LININT model:")
predictions = linint(rr_src2, rr_tgt2, female_x_dev, female_y_dev, female_x_test)
linint_mse(predictions, female_y_dev)

print("Female target Neural Net LININT model:")
predictions = linint(nn_src1, nn_tgt1, female_x_dev, female_y_dev, female_x_test)
linint_mse(predictions, female_y_dev)

print("Male target Ridge Regression LININT model:")
predictions = linint(rr_src3, rr_tgt3, male_x_dev, male_y_dev, male_x_test)
linint_mse(predictions, male_y_dev)

print("Male target Neural Net LININT model:")
predictions = linint(nn_src3, nn_tgt3, male_x_dev, male_y_dev, male_x_test)
linint_mse(predictions, male_y_dev)



In [204]:
# number is 1 2 3 denoting which positition x lies in the <x, i, i, i> vector
# male would be <x,x,0,0>, female <x,0,x,0> and mixed <x,0,0,x> 
def feda(source_x, number):
    new_data = []
    for row in source_x:
        expanded_row = []
        for i in row:
            expanded_row.append(i)
            if number == 1:
                expanded_row.append(i)
                expanded_row.append(0)
                expanded_row.append(0)
            elif number == 2:
                expanded_row.append(0)
                expanded_row.append(i)
                expanded_row.append(0)
            elif number == 3:
                expanded_row.append(0)
                expanded_row.append(0)
                expanded_row.append(i)
        new_data.append(expanded_row)
    data = np.array(new_data)
    return data
            

In [205]:
x1, y1 = tgt_data_format(male_x_train, male_y_train)
x2, y2 = tgt_data_format(female_x_train, female_y_train)
x3, y3 = tgt_data_format(mixed_x_train, mixed_y_train)

print("Mixed target FEDA TGTONLY model:")
rr_predictions, nn_predictions = baseline(feda(x3, 3), y3, feda(mixed_x_test, 3))
mse(rr_predictions, nn_predictions, mixed_y_test)

print("Female target FEDA TGTONLY model:")
rr_predictions, nn_predictions =  baseline(feda(x2, 2), y2, feda(female_x_test, 2))
mse(rr_predictions, nn_predictions, female_y_test)

print("Male target FEDA TGTONLY model:")
rr_predictions, nn_predictions = baseline(feda(x1,1), y1, feda(male_x_test, 1))
mse(rr_predictions, nn_predictions, male_y_test)

Mixed target FEDA TGTONLY model:
Ridge regression mse: 138.86195035151235
Neural net mse: 317.2931854199683
Female target FEDA TGTONLY model:
Ridge regression mse: 138.95148769323373
Neural net mse: 176.97067448680352
Male target FEDA TGTONLY model:
Ridge regression mse: 157.41649525390852
Neural net mse: 196.11278195488723


In [206]:
feda_male = feda(male_x_train, 1)
feda_female = feda(female_x_train, 2)
feda_mixed = feda(mixed_x_train, 3)

print("Mixed target FEDA SRCONLY:")
x1, y1 = src_data_format(feda_male, feda_female, male_y_train, female_y_train)
x1_test = feda(mixed_x_test, 3)
rr_predictions, nn_predictions = baseline(x1, y1, x1_test)
mse(rr_predictions, nn_predictions, mixed_y_test)

print("Female target FEDA SRCONLY:")
x2, y2 = src_data_format(feda_male, feda_mixed, male_y_train, mixed_y_train)
x2_test = feda(female_x_test, 2)
rr_predictions, nn_predictions = baseline(x2, y2, x2_test)
mse(rr_predictions, nn_predictions, female_y_test)

print("Male target FEDA SRCONLY:")
x3, y3 = src_data_format(feda_female, feda_mixed, female_y_train, mixed_y_train)
x3_test = feda(male_x_test, 1)
rr_predictions, nn_predictions = baseline(x1, y1, x3_test)
mse(rr_predictions, nn_predictions, male_y_test)

Mixed target FEDA SRCONLY:
Ridge regression mse: 143.33396971646772
Neural net mse: 235.80348652931855
Female target FEDA SRCONLY:
Ridge regression mse: 177.21325080233805
Neural net mse: 197.0791788856305
Male target FEDA SRCONLY:
Ridge regression mse: 153.98807279747416
Neural net mse: 143.90977443609023


In [281]:
def find_similarity(source_x, source_y, target_x, threshold):
    x = []
    y = []
    for i in range(0, len(source_x)):
        src_row = source_x[i]
        similarity = 0
        for tgt_row in target_x[:100]:
            similarity += sum((src_row - tgt_row)**2)
        similarity /= 100
        if similarity < threshold:
            x.append(src_row)
            y.append(source_y[i])
    print(str(len(y)) + " samples were found to be similar to the target data.")
    return x, y

In [284]:
print("Mixed is the target:")
x_new1, y_new1 = find_similarity(male_x_train, male_y_train, mixed_x_train, 400)
x_new2, y_new2 = find_similarity(female_x_train, female_y_train, mixed_x_train, 400)

x1 = np.concatenate([mixed_x_train[:100], x_new1, x_new2])
y1 = np.concatenate([mixed_y_train[:100], y_new1, y_new2])

print("Female is the target:")
x_new1, y_new1 = find_similarity(male_x_train, male_y_train, female_x_train, 400)
x_new2, y_new2 = find_similarity(mixed_x_train, mixed_y_train, female_x_train, 400)

x2 = np.concatenate([female_x_train[:100], x_new1, x_new2])
y2 = np.concatenate([female_y_train[:100], y_new1, y_new2])

print("Male is the target:")
x_new1, y_new1 = find_similarity(female_x_train, female_y_train, male_x_train, 400)
x_new2, y_new2 = find_similarity(mixed_x_train, mixed_y_train, male_x_train, 400)

x2 = np.concatenate([male_x_train[:100], x_new1, x_new2])
y2 = np.concatenate([male_y_train[:100], y_new1, y_new2])

Mixed is the target:
1312 samples were found to be similar to the target data.
1295 samples were found to be similar to the target data.
Female is the target:
1793 samples were found to be similar to the target data.
2592 samples were found to be similar to the target data.
Male is the target:
1861 samples were found to be similar to the target data.
3408 samples were found to be similar to the target data.


In [285]:
print("-----MIXED TARGET-----")
print("TGT ELM:")
elm_poly.train(np.column_stack([mixed_y_train[:100], mixed_x_train[:100]]))
print(elm_poly.test(np.column_stack((mixed_y_test,mixed_x_test))).get("mse"))

print("TGT ELM with added threshold data:")
elm_poly.train(np.column_stack((y1,x1)))
print(elm_poly.test(np.column_stack((mixed_y_test,mixed_x_test))).get("mse"))
      
x1 = np.concatenate([mixed_x_train[:100], male_x_train, female_x_train])
y1 = np.concatenate([mixed_y_train[:100], male_y_train, female_y_train])
print("ALL ELM:")
elm_poly.train(np.column_stack((y1,x1)))
print(elm_poly.test(np.column_stack((mixed_y_test,mixed_x_test))).get("mse"))

-----MIXED TARGET-----
TGT ELM:
158.23582537456818
TGT ELM with added threshold data:
128.1621717383944
ALL ELM:
112.16138503843507


In [286]:
print("-----Female TARGET-----")
print("TGT ELM:")
elm_poly.train(np.column_stack([female_y_train[:100], female_x_train[:100]]))
print(elm_poly.test(np.column_stack((female_y_test, female_x_test))).get("mse"))

print("TGT ELM with added threshold data:")
elm_poly.train(np.column_stack((y2,x2)))
print(elm_poly.test(np.column_stack((female_y_test,female_x_test))).get("mse"))
      
x2 = np.concatenate([female_x_train[:100], male_x_train, mixed_x_train])
y2 = np.concatenate([female_y_train[:100], male_y_train, mixed_y_train])
print("ALL ELM:")
elm_poly.train(np.column_stack((y2,x2)))
print(elm_poly.test(np.column_stack((female_y_test,female_x_test))).get("mse"))

-----Female TARGET-----
TGT ELM:
139.43175060932026
TGT ELM with added threshold data:
127.4104173106588
ALL ELM:
109.62844405271157


In [287]:
print("-----Male TARGET-----")
print("TGT ELM:")
elm_poly.train(np.column_stack([male_y_train[:100], male_x_train[:100]]))
print(elm_poly.test(np.column_stack((male_y_test, male_x_test))).get("mse"))

print("TGT ELM with added threshold data:")
elm_poly.train(np.column_stack((y3,x3)))
print(elm_poly.test(np.column_stack((male_y_test,male_x_test))).get("mse"))
      
x3 = np.concatenate([male_x_train[:100], female_x_train, mixed_x_train])
y3 = np.concatenate([male_y_train[:100], female_y_train, mixed_y_train])
print("ALL ELM:")
elm_poly.train(np.column_stack((y3,x3)))
print(elm_poly.test(np.column_stack((male_y_test,male_x_test))).get("mse"))

-----Male TARGET-----
TGT ELM:
156.7855629084097
TGT ELM with added threshold data:
117.60682751845277
ALL ELM:
117.33856661143736


In [61]:
from elm import ELM
from sklearn.model_selection import ShuffleSplit, KFold, cross_val_score

x1, y1 = src_data_format(male_x_train, female_x_train, male_y_train, female_y_train)
x2, y2 = src_data_format(male_x_train, mixed_x_train, male_y_train, mixed_y_train)
x3, y3 = src_data_format(female_x_train, mixed_x_train, female_y_train, mixed_y_train)
hid_nums = [10, 50, 100, 250, 400]
'''
for number in hid_nums:
    print(number, end=' ')
    e = ELM(number)
    ave = 0
    for i in range(10):
        cv = KFold(n_splits=5, shuffle=True)
        scores = cross_val_score(e, x2.astype(int), y2.astype(int),cv=cv, scoring='accuracy', n_jobs=-1)
        ave += scores.mean()
    ave /= 10
    print("Accuracy: %0.3f " % (ave))
'''
elm = ELM(hid_num=250).fit(x1.astype(int), y1.astype(int))
preds = elm.predict(mixed_x_test.astype(int))
error = mean_squared_error(mixed_y_test.astype(int), preds)
print("ELM mse: " + str(error))

#print("ELM Accuracy %0.3f " % elm.score(mixed_x_test.astype(int), mixed_y_test.astype(int)))

ELM mse: 208.42472266244056


In [165]:
import elm.elmk
# first argument is a kernel function 
# second iscoefficient C of regularization,
# third is a list of arguments for the kernel function.
# safe values -> params = ["poly", 0.9993040203345642, [2.470566656543985, 2.4474257958252617]]
params = ["poly", 1.0722153359905189, [3.06744136499759, 2.3780118881296826]]
elm_poly = elm.ELMKernel(params)
params = ["linear", 250, []]
elm_linear = elm.ELMKernel(params)
params = ["rbf", 5.092921315186362, [-14.097889701989264]]
elm_rbf = elm.ELMKernel(params)

In [166]:
total_data = np.concatenate([np.column_stack((male_y_test,male_x_test)),\
                             np.column_stack((female_y_test,female_x_test)),\
                             np.column_stack((mixed_y_test,mixed_x_test))])
elm_poly.search_param(total_data, kf = ["rbf"])
elm_poly.search_param(total_data, kf = ["linear"])
elm_poly.search_param(total_data, kf = ["poly"])

elmk
##### Start search #####




Kernel function:  rbf  best cv value:  12.458890608230988
##### Search complete #####

Regressor Parameters

Regularization coefficient:  3.4605337430331766
Kernel Function:  rbf
Kernel parameters:  [-11.869577640638505]

CV error:  12.458890608230988

elmk
##### Start search #####
Kernel function:  linear  best cv value:  12.494790157920074
##### Search complete #####

Regressor Parameters

Regularization coefficient:  -1.4038826250230767
Kernel Function:  linear
Kernel parameters:  []

CV error:  12.494790157920074

elmk
##### Start search #####
Kernel function:  poly  best cv value:  11.138483353497202
##### Search complete #####

Regressor Parameters

Regularization coefficient:  2.497220939205878
Kernel Function:  poly
Kernel parameters:  [2.81682235152461, 2.269563054158114]

CV error:  11.138483353497202



In [280]:
x1, y1 = src_data_format(male_x_train, female_x_train, male_y_train, female_y_train)
x2, y2 = src_data_format(male_x_train, mixed_x_train, male_y_train, mixed_y_train)
x3, y3 = src_data_format(female_x_train, mixed_x_train, female_y_train, mixed_y_train)
elm_rbf.train(np.column_stack((y2,x2)))
print(elm_rbf.test(np.column_stack((female_y_test,female_x_test))).get("mse"))

141.5591255698434
