In [52]:
import torch
from torch.autograd import Variable
from sklearn.model_selection import KFold
import torch.nn.functional as F
import matplotlib.pyplot as plt
import numpy as np
import json
import csv
from scipy.stats import spearmanr

%matplotlib inline

print(torch.version.__version__)

0.4.1


In [62]:
torch.manual_seed(1)
try:
    with open("./Features/Features_data/featuretest.json", encoding='UTF8') as f:
        features = json.load(f)
except EnvironmentError:
    print('No Feature File')

try:
    with open("./Features/Features_data/scoretest.json", encoding='UTF8') as f:
        scores = json.load(f)
except EnvironmentError:
    print("No Score File")

feature_num = 16
x = np.array(features)
print(x.shape)

y = np.array(scores)
y = np.reshape(y,(-1,1))
#y = torch.unsqueeze(y, dim = 1)
print(y.shape)
kf = KFold(n_splits = 5, shuffle = True)

(1988, 16)
(1988, 1)


In [63]:
class Net(torch.nn.Module):
    def __init__(self, n_feature, n_hidden1, n_hidden2, n_hidden3, n_output):
        super(Net, self).__init__()
        self.hidden = torch.nn.Linear(n_feature, n_hidden1)   # hidden layer
        self.hidden2 = torch.nn.Linear(n_hidden1, n_hidden2)   # hidden layer
        self.hidden3 = torch.nn.Linear(n_hidden2, n_hidden3)   # hidden layer
        self.predict = torch.nn.Linear(n_hidden3, n_output)   # output layer

    def forward(self, x):
        x = F.relu(self.hidden(x))      # activation function for hidden layer
        x = F.relu(self.hidden2(x))      # activation function for hidden layer
        x = F.relu(self.hidden3(x))      # activation function for hidden layer
        x = self.predict(x)             # linear output
        return x

In [64]:
#foldnum = 0
max = 0
for n1 in [10, 20, 40, 50]:
    for n2 in [10, 20, 40, 50]:
        for n3 in [10, 20, 40, 50]:
            print("n_hidden1 : " + str(n1) + ", n_hidden2 : " + str(n2) + ", n_hidden3 : " + str(n3))
            sum_rho = 0
            sum_pvalue = 0

            for train_index, test_index in kf.split(x):
                X_train, X_test = x[train_index], x[test_index]
                y_train, y_test = y[train_index], y[test_index]

                X_train = torch.from_numpy(X_train)
                X_test = torch.from_numpy(X_test)
                y_train = torch.from_numpy(y_train)
                y_test = torch.from_numpy(y_test)

                X_train, y_train = Variable(X_train.float()), Variable(y_train.float())
                X_test, y_test = Variable(X_test.float()), Variable(y_test.float())

                net = Net(n_feature=feature_num, n_hidden1=n1, n_hidden2 = n2, n_hidden3 = n3, n_output=1)     # define the network
                #print(net)  # net architecture
                optimizer = torch.optim.Adam(net.parameters(), lr=0.01)
                loss_func = torch.nn.MSELoss()

                for t in range(600):
                    y_train_pred = net(X_train)     # input x and predict based on x

                    loss = loss_func(y_train_pred, y_train)     # must be (1. nn output, 2. target)
                    optimizer.zero_grad()   # clear gradients for next train
                    loss.backward()         # backpropagation, compute gradients
                    optimizer.step()        # apply gradients

                    y_test_pred = net(X_test)
                    y_test_pred = torch.clamp(y_test_pred, min=1, max=5)

                    y_train_bar = y_train.mean()
                    sse_train = ((y_train - y_train_pred)**2).sum()
                    sst_train = ((y_train - y_train_bar)**2).sum()

                    y_test_bar = y_test.mean()
                    sse_test = ((y_test - y_test_pred)**2).sum()
                    sst_test = ((y_test - y_test_bar)**2).sum()

                    train_r2 = 1 - sse_train/sst_train 
                    test_r2 = 1 - sse_test/sst_test

                    if t == 400:
                        #print(train_r2, test_r2)
                        #print(loss.data.numpy())
                        #print(spearmanr(y_test.detach().numpy(), y_test_pred.detach().numpy()))
                        sum_rho += spearmanr(y_test.detach().numpy(), y_test_pred.detach().numpy()).correlation
                        sum_pvalue += spearmanr(y_test.detach().numpy(), y_test_pred.detach().numpy()).pvalue
                        # print(y_test, y_test_pred)
                        '''
                        #draw excel validation answer - prediction scatterplot
                        y_test_np = y_test.data.numpy()
                        y_test_pred_np = y_test_pred.data.numpy()
                        np.savetxt(str(filenum) + "positive_test.csv", y_test_np, delimiter=",")
                        np.savetxt(str(filenum) + "positive_test_pred.csv", y_test_pred_np, delimiter=",")
                        filenum += 1
                        '''

                        #np.savetxt(str(filenum) + "train.txt", y_train.data.numpy() + y_train_pred.data.numpy())
                        #filenum += 1
                        #print(y_test, y_test_pred)
                #np.savetxt(str(foldnum) + "test.csv", ans, delimiter=",")
                #np.savetxt(str(foldnum) + "test_pred.csv",pred, delimiter=",")
                #foldnum += 1
            print("average_rho : " + str(sum_rho / 5) + " average_pvalue : " + str(sum_pvalue / 5))
            if max < sum_rho / 5 : max = sum_rho / 5
print(max)

n_hidden1 : 10, n_hidden2 : 10, n_hidden3 : 10
average_rho : 0.6523601805263753 average_pvalue : 1.9690769149296373e-46
n_hidden1 : 10, n_hidden2 : 10, n_hidden3 : 20
average_rho : 0.6487646378190167 average_pvalue : 6.189556339787256e-44
n_hidden1 : 10, n_hidden2 : 10, n_hidden3 : 40
average_rho : 0.6550069289330999 average_pvalue : 3.1943823497890327e-49
n_hidden1 : 10, n_hidden2 : 10, n_hidden3 : 50
average_rho : 0.6486712897416882 average_pvalue : 1.0810746469377854e-39
n_hidden1 : 10, n_hidden2 : 20, n_hidden3 : 10
average_rho : 0.6499081911504673 average_pvalue : 3.033679841115183e-46
n_hidden1 : 10, n_hidden2 : 20, n_hidden3 : 20
average_rho : 0.65183662345208 average_pvalue : 9.729624050378587e-40
n_hidden1 : 10, n_hidden2 : 20, n_hidden3 : 40
average_rho : 0.6485518357216428 average_pvalue : 3.118376097509642e-46
n_hidden1 : 10, n_hidden2 : 20, n_hidden3 : 50
average_rho : 0.6577709437118031 average_pvalue : 3.716508414607915e-46
n_hidden1 : 10, n_hidden2 : 40, n_hidden3 : 10
