In [2]:
import csv
import sys
import numpy as np

In [3]:
print(sys.version)

3.7.4 (v3.7.4:e09359112e, Jul  8 2019, 14:54:52) 
[Clang 6.0 (clang-600.0.57)]


In [48]:
male_data = np.genfromtxt('Data/MALE.csv', delimiter=',', skip_header=1)
female_data = np.genfromtxt('Data/FEMALE.csv', delimiter=',', skip_header=1)
mixed_data = np.genfromtxt('Data/MIXED.csv', delimiter=',', skip_header=1)

male_x = male_data[:,:-1]
male_y = male_data[:,-1:]
female_x = female_data[:,:-1]
female_y = female_data[:,-1:]
mixed_x = mixed_data[:,:-1]
mixed_y = mixed_data[:,-1:]

In [5]:
from sklearn.model_selection import train_test_split

male_x_train, male_x_test, male_y_train, male_y_test = \
train_test_split(male_x, male_y, test_size=0.1, random_state=1)

male_x_dev = male_x_test[:100]
male_y_dev = male_y_test[:100]
male_x_test = male_x_test[100:]
male_y_test = male_y_test[100:]

female_x_train, female_x_test, female_y_train, female_y_test = \
train_test_split(female_x, female_y, test_size=0.1, random_state=1)

female_x_dev = female_x_test[:100]
female_y_dev = female_y_test[:100]
female_x_test = female_x_test[100:]
female_y_test = female_y_test[100:]


mixed_x_train, mixed_x_test, mixed_y_train, mixed_y_test = \
train_test_split(mixed_x, mixed_y, test_size=0.1, random_state=1)

mixed_x_dev = mixed_x_test[:100]
mixed_y_dev = mixed_y_test[:100]
mixed_x_test = mixed_x_test[100:]
mixed_y_test = mixed_y_test[100:]

In [6]:
# three possible splits for f-fold cross validation:
# { (male, female), (male, mixed), (female, mixed) }
group1_training_x = np.concatenate([male_x_train, female_x_train])
group1_training_y = np.concatenate([male_y_train, female_y_train]).ravel()
group1_test_x = mixed_x_test
group1_test_y = mixed_y_test

group2_training_x = np.concatenate([male_x_train, mixed_x_train])
group2_training_y = np.concatenate([male_y_train, mixed_y_train]).ravel()
group2_test_x = female_x_test
group2_test_y = female_y_test

group3_training_x = np.concatenate([female_x_train, mixed_x_train])
group3_training_y = np.concatenate([female_y_train, mixed_y_train]).ravel()
group3_test_x = male_x_test
group3_test_y = male_y_test

In [7]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.neural_network import MLPClassifier

regression_model = LinearRegression()
neural_net = MLPClassifier(hidden_layer_sizes=(8,8,8,8),max_iter=350, activation = 'logistic')

In [42]:
def baseline(training_x, training_y, test_x, test_y):
    regression_model.fit(training_x, training_y)
    neural_net.fit(training_x, training_y)
    lr_predictions1 = regression_model.predict(test_x)
    nn_predictions1 = neural_net.predict(test_x)
    mse_lr1 = mean_squared_error(test_y, lr_predictions1)
    mse_nn1 = mean_squared_error(test_y, nn_predictions1)
    print("Linear regression mse: " + str(mse_lr1))
    print("Neural net mse: " + str(mse_nn1))

In [43]:
srconly_mixed_target = baseline(group1_training_x, group1_training_y, mixed_x_test, mixed_y_test)
srconly_female_target = baseline(group2_training_x, group2_training_y, female_x_test, female_y_test)
srconly_male_target = baseline(group3_training_x, group3_training_y, male_x_test, male_y_test)



Linear regression mse: 147.23280235872892
Neural net mse: 117.83835182250397
Linear regression mse: 187.36033295257312
Neural net mse: 190.63929618768327
Linear regression mse: 165.89154613676692
Neural net mse: 166.36466165413535


In [44]:
tgtonly_male_target = baseline(male_x_train[:100], male_y_train[:100].ravel(), male_x_test, male_y_test)
tgtonly_female_target = baseline(female_x_train[:100], female_y_train[:100].ravel(), female_x_test, female_y_test)
tgtonly_mixed_target = baseline(mixed_x_train[:100], mixed_y_train[:100].ravel(), mixed_x_test, mixed_y_test)

Linear regression mse: 197.5487341809297
Neural net mse: 163.15037593984962
Linear regression mse: 185.17158690855237
Neural net mse: 321.4046920821114
Linear regression mse: 159.44789235986846
Neural net mse: 194.58161648177497


In [11]:
group1_training_x_all = np.concatenate([group1_training_x, mixed_x_train[:100]])
group1_training_y_all = np.concatenate([group1_training_y, mixed_y_train[:100].ravel()])
group2_training_x_all = np.concatenate([group2_training_x, female_x_train[:100]])
group2_training_y_all = np.concatenate([group2_training_y, female_y_train[:100].ravel()])
group3_training_x_all = np.concatenate([group3_training_x, male_x_train[:100]])
group3_training_y_all = np.concatenate([group3_training_y, male_y_train[:100].ravel()])

In [12]:
all_mixed_target = baseline(group1_training_x_all, group1_training_y_all, mixed_x_test, mixed_y_test)
all_female_target = baseline(group2_training_x_all, group2_training_y_all, female_x_test, female_y_test)
all_male_target = baseline(group3_training_x_all, group3_training_y_all, male_x_test, male_y_test)



Group one logistic regression mse: 147.25984490027892
Group one neural net mse: 123.17749603803486
Group one logistic regression mse: 186.9914600177854
Group one neural net mse: 180.57771260997066
Group one logistic regression mse: 165.90069417469795
Group one neural net mse: 159.13533834586465


In [13]:
def weighted(source_x, target_x, N, M):
    weight = M/N
    training_x_weighted = np.concatenate([source_x * weight, target_x[:100]])
    return training_x_weighted
    
group1_training_x_weighted = weighted(group1_training_x, mixed_x_train[:100], len(group1_training_y), len(mixed_y_train[:100]))
group2_training_x_weighted = weighted(group2_training_x, female_x_train[:100], len(group2_training_y), len(female_y_train[:100]))
group3_training_x_weighted = weighted(group3_training_x, male_x_train[:100], len(group3_training_y), len(male_y_train[:100]))

In [14]:
weighted_mixed_target = baseline(group1_training_x_weighted, group1_training_y_all, mixed_x_test, mixed_y_test)
weighted_female_target = baseline(group2_training_x_weighted, group2_training_y_all, female_x_test, female_y_test)
weighted_male_target = baseline(group3_training_x_weighted, group3_training_y_all, male_x_test, male_y_test)

Group one logistic regression mse: 184.0583367908611
Group one neural net mse: 160.71790808240888
Group one logistic regression mse: 194.27084166642808
Group one neural net mse: 235.43108504398828
Group one logistic regression mse: 210.71997287277645
Group one neural net mse: 163.65413533834587


In [15]:
# number is 1 2 3 denoting which positition x lies in the <x,0,0> vector
def feda(source_x, number):
    new_data = []
    for row in source_x:
        expanded_row = []
        for i in row:
            expanded_row.append(i)
            if number == 1:
                expanded_row.append(i)
                expanded_row.append(0)
                expanded_row.append(0)
            elif number == 2:
                expanded_row.append(0)
                expanded_row.append(i)
                expanded_row.append(0)
            elif number == 3:
                expanded_row.append(0)
                expanded_row.append(0)
                expanded_row.append(i)
        new_data.append(expanded_row)
    data = np.array(new_data)
    return data
            

In [41]:
tgtonly_male_target_feda = baseline(feda(male_x_train[:100],1), male_y_train[:100].ravel(), feda(male_x_test, 1), male_y_test)
tgtonly_female_target_feda = baseline(feda(female_x_train[:100], 2), female_y_train[:100].ravel(), feda(female_x_test, 2), female_y_test)
tgtonly_mixed_target_feda = baseline(feda(mixed_x_train[:100], 3), mixed_y_train[:100].ravel(), feda(mixed_x_test, 3), mixed_y_test)

Group one logistic regression mse: 197.5487341809297
Group one neural net mse: 163.15037593984962
Group one logistic regression mse: 185.17158690855246
Group one neural net mse: 188.05571847507332
Group one logistic regression mse: 159.44789235986846
Group one neural net mse: 194.58161648177497


In [55]:
# male would be <x,x,0,0>, female <x,0,x,0> and mixed <x,0,0,x> 


#_____________________________________________________

feda_male = feda(male_x_train, 1)
feda_female = feda(female_x_train, 2)
feda_mixed = feda(mixed_x_train, 3)

group1_training_x_feda = np.concatenate([feda_male, feda_female])
group1_training_y_feda = np.concatenate([male_y_train, female_y_train]).ravel()
group2_training_x_feda = np.concatenate([feda_male, feda_mixed])
group2_training_y_feda = np.concatenate([male_y_train, mixed_y_train]).ravel()
group3_training_x_feda = np.concatenate([feda_female, feda_mixed])
group3_training_y_feda = np.concatenate([female_y_train, mixed_y_train]).ravel()

group1_training_x_all_feda = np.concatenate([group1_training_x_feda, feda_mixed[:100]])
group1_training_y_all_feda = np.concatenate([group1_training_y_feda, mixed_y_train[:100].ravel()])
group2_training_x_all_feda = np.concatenate([group2_training_x_feda, feda_female[:100]])
group2_training_y_all_feda = np.concatenate([group2_training_y_feda, female_y_train[:100].ravel()])
group3_training_x_all_feda = np.concatenate([group3_training_x_feda, feda_male[:100]])
group3_training_y_all_feda = np.concatenate([group3_training_y_feda, male_y_train[:100].ravel()])

In [56]:
feda_mixed_target = baseline(group1_training_x_all_feda, group1_training_y_all_feda, feda(mixed_x_test, 3), mixed_y_test)
feda_female_target = baseline(group2_training_x_all_feda, group2_training_y_all_feda, feda(female_x_test, 2), female_y_test)
feda_male_target = baseline(group3_training_x_all_feda, group3_training_y_all_feda, feda(male_x_test, 1), male_y_test)




Linear regression mse: 157.4846132342138
Neural net mse: 170.61965134706816
Linear regression mse: 177.9834077460488
Neural net mse: 212.09970674486803
Linear regression mse: 176.76540958121961
Neural net mse: 232.40225563909775


In [None]:
Group one logistic regression mse: 147.25984490027892
Group one neural net mse: 123.17749603803486
Group one logistic regression mse: 186.9914600177854
Group one neural net mse: 180.57771260997066
Group one logistic regression mse: 165.90069417469795
Group one neural net mse: 159.13533834586465