In [197]:
import torch
from torch.autograd import Variable
from sklearn.model_selection import KFold
import torch.nn.functional as F
import matplotlib.pyplot as plt
import numpy as np
import json
import csv
from scipy.stats import spearmanr

%matplotlib inline

print(torch.version.__version__)

0.4.1


In [223]:
torch.manual_seed(1)
try:
    with open("./Features/Features_data/featuretest.json", encoding='UTF8') as f:
        features = json.load(f)
except EnvironmentError:
    print('No Feature File')

try:
    with open("./Features/Features_data/scoretest.json", encoding='UTF8') as f:
        scores = json.load(f)
except EnvironmentError:
    print("No Score File")

feature_num = 36
x = np.array(features)
print(x.shape)

y = np.array(scores)
y = np.reshape(y,(-1,1))
#y = torch.unsqueeze(y, dim = 1)
print(y.shape)
kf = KFold(n_splits = 5, shuffle = True)

(1988, 36)
(1988, 1)


In [216]:
class Net(torch.nn.Module):
    def __init__(self, n_feature, n_hidden1, n_hidden2, n_hidden3, n_output):
        super(Net, self).__init__()
        self.hidden = torch.nn.Linear(n_feature, n_hidden1)   # hidden layer
        self.hidden2 = torch.nn.Linear(n_hidden1, n_hidden2)   # hidden layer
        self.hidden3 = torch.nn.Linear(n_hidden2, n_hidden3)   # hidden layer
        self.predict = torch.nn.Linear(n_hidden3, n_output)   # output layer

    def forward(self, x):
        x = F.relu(self.hidden(x))      # activation function for hidden layer
        x = F.relu(self.hidden2(x))      # activation function for hidden layer
        x = F.relu(self.hidden3(x))      # activation function for hidden layer
        x = self.predict(x)             # linear output
        return x

In [224]:
class Net(torch.nn.Module):
    def __init__(self, layer_list, n_output):
        super(Net, self).__init__()
        self.hidden_list = torch.nn.ModuleList([])
        for idx, value in enumerate(layer_list):
            if idx == len(layer_list) - 1:
                break
            else:
                self.hidden_list.append(torch.nn.Linear(value, layer_list[idx + 1]))
        self.predict = torch.nn.Linear(layer_list[len(layer_list) - 1], n_output) #output layer

    def forward(self, x):
        for idx, value in enumerate(self.hidden_list):
            x = F.relu(self.hidden_list[idx](x))
        x = self.predict(x)             # linear output
        return x

In [229]:
X_train = torch.from_numpy(x)
y_train = torch.from_numpy(y)
X_train, y_train = Variable(X_train.float()), Variable(y_train.float())

net = Net([feature_num, 20], 1)
optimizer = torch.optim.Adam(net.parameters(), lr=0.01)
loss_func = torch.nn.MSELoss()

epoch = 10000
for t in range(epoch):
    y_train_pred = net(X_train)
    loss = loss_func(y_train_pred, y_train)     # must be (1. nn output, 2. target)
    optimizer.zero_grad()   # clear gradients for next train
    loss.backward()         # backpropagation, compute gradients
    optimizer.step()        # apply gradients
    
    y_train_bar = y_train.mean()
    sse_train = ((y_train - y_train_pred)**2).sum()
    sst_train = ((y_train - y_train_bar)**2).sum()
    train_r2 = 1 - sse_train/sst_train 
    
    if t % 1000 == 0:
        print(train_r2)
        print(loss.data.numpy())

torch.save(net, './model.pt')


tensor(-3.3016, grad_fn=<AddBackward>)
7.7249675
tensor(0.5273, grad_fn=<AddBackward>)
0.8488379
tensor(0.5213, grad_fn=<AddBackward>)
0.8597376
tensor(0.5439, grad_fn=<AddBackward>)
0.8190587
tensor(0.5504, grad_fn=<AddBackward>)
0.8074076
tensor(0.5471, grad_fn=<AddBackward>)
0.8133852
tensor(0.5612, grad_fn=<AddBackward>)
0.78799117
tensor(0.5675, grad_fn=<AddBackward>)
0.77664316
tensor(0.5664, grad_fn=<AddBackward>)
0.77868134
tensor(0.5672, grad_fn=<AddBackward>)
0.7771561


  "type " + obj.__name__ + ". It won't be checked "


In [227]:
max = 0       
epoch = 10000

for layer_num in range(1, 4):
    for layer_size in [10, 20, 30, 40]:
        sum_rho = 0
        sum_pvalue = 0
        sum_rsquare_train = 0
        sum_rsquare_test = 0
        hidden_list = [feature_num]
        print("the number of hidden layers : ", layer_num, " layer_size : ", layer_size)
        for i in range(layer_num):
            hidden_list.append(layer_size)
        print("hidden list : ", hidden_list)
        for train_index, test_index in kf.split(x):
            X_train, X_test = x[train_index], x[test_index]
            y_train, y_test = y[train_index], y[test_index]

            X_train = torch.from_numpy(X_train)
            X_test = torch.from_numpy(X_test)
            y_train = torch.from_numpy(y_train)
            y_test = torch.from_numpy(y_test)

            X_train, y_train = Variable(X_train.float()), Variable(y_train.float())
            X_test, y_test = Variable(X_test.float()), Variable(y_test.float())

            net = Net(hidden_list, 1)
            #net = Net(n_feature=feature_num, n_hidden1 = 50, n_hidden2 = 50, n_hidden3 = 50, n_output=1)     # define the network
            # print(net, net.hidden_list)  # net architecture
            optimizer = torch.optim.Adam(net.parameters(), lr=0.01)
            loss_func = torch.nn.MSELoss()

            for t in range(epoch):
                y_train_pred = net(X_train)     # input x and predict based on x

                loss = loss_func(y_train_pred, y_train)     # must be (1. nn output, 2. target)
                optimizer.zero_grad()   # clear gradients for next train
                loss.backward()         # backpropagation, compute gradients
                optimizer.step()        # apply gradients

                y_test_pred = net(X_test)
                y_test_pred = torch.clamp(y_test_pred, min=1, max=5)

                y_train_bar = y_train.mean()
                sse_train = ((y_train - y_train_pred)**2).sum()
                sst_train = ((y_train - y_train_bar)**2).sum()

                y_test_bar = y_test.mean()
                sse_test = ((y_test - y_test_pred)**2).sum()
                sst_test = ((y_test - y_test_bar)**2).sum()

                train_r2 = 1 - sse_train/sst_train 
                test_r2 = 1 - sse_test/sst_test

                #if t % 1000 == 0:
                    #print('loss', loss)

                if t == epoch - 1:
                    #print(train_r2, test_r2)
                    #print(loss.data.numpy())
                    print(train_r2, test_r2)
                    sum_rsquare_train += train_r2
                    sum_rsquare_test += test_r2
                    print(spearmanr(y_test.detach().numpy(), y_test_pred.detach().numpy()))
                    sum_rho += spearmanr(y_test.detach().numpy(), y_test_pred.detach().numpy()).correlation
                    sum_pvalue += spearmanr(y_test.detach().numpy(), y_test_pred.detach().numpy()).pvalue
                    # print(y_test, y_test_pred)
                    '''
                    #draw excel validation answer - prediction scatterplot
                    y_test_np = y_test.data.numpy()
                    y_test_pred_np = y_test_pred.data.numpy()
                    np.savetxt(str(filenum) + "positive_test.csv", y_test_np, delimiter=",")
                    np.savetxt(str(filenum) + "positive_test_pred.csv", y_test_pred_np, delimiter=",")
                    filenum += 1
                    '''

                    #np.savetxt(str(filenum) + "train.txt", y_train.data.numpy() + y_train_pred.data.numpy())
                    #filenum += 1
                    #print(y_test, y_test_pred)
            #np.savetxt(str(foldnum) + "test.csv", ans, delimiter=",")
            #np.savetxt(str(foldnum) + "test_pred.csv",pred, delimiter=",")
            #foldnum += 1
        print(net)
        print("average_rho : " + str(sum_rho / kf.n_splits) + " average_pvalue : " + str(sum_pvalue / kf.n_splits))
        print("average_r_squared_train : " + str(sum_rsquare_train / kf.n_splits) + " average_r_squared_test : " + str(sum_rsquare_test / kf.n_splits))
        print()

the number of hidden layers :  1  layer_size :  10
hidden list :  [36, 10]
tensor(0.5222, grad_fn=<AddBackward>) tensor(0.4956, grad_fn=<AddBackward>)
SpearmanrResult(correlation=0.7098369753157742, pvalue=3.011293304938401e-62)
tensor(0.4769, grad_fn=<AddBackward>) tensor(0.4695, grad_fn=<AddBackward>)
SpearmanrResult(correlation=0.6925227235265391, pvalue=3.962960877823901e-58)
tensor(0.5329, grad_fn=<AddBackward>) tensor(0.4007, grad_fn=<AddBackward>)
SpearmanrResult(correlation=0.6341764889086049, pvalue=3.630564241291164e-46)
tensor(0.5245, grad_fn=<AddBackward>) tensor(0.4728, grad_fn=<AddBackward>)
SpearmanrResult(correlation=0.6953771328814413, pvalue=1.2110944802914797e-58)
tensor(0.5426, grad_fn=<AddBackward>) tensor(0.3238, grad_fn=<AddBackward>)
SpearmanrResult(correlation=0.6009033447014754, pvalue=2.498500109647576e-40)
Net(
  (hidden_list): ModuleList(
    (0): Linear(in_features=36, out_features=10, bias=True)
  )
  (predict): Linear(in_features=10, out_features=1, bias

tensor(0.6915, grad_fn=<AddBackward>) tensor(0.3598, grad_fn=<AddBackward>)
SpearmanrResult(correlation=0.6370791569951617, pvalue=1.0607102368145199e-46)
tensor(0.6792, grad_fn=<AddBackward>) tensor(0.3824, grad_fn=<AddBackward>)
SpearmanrResult(correlation=0.6498037303181136, pvalue=4.109863031810027e-49)
tensor(0.7121, grad_fn=<AddBackward>) tensor(0.3622, grad_fn=<AddBackward>)
SpearmanrResult(correlation=0.6559350945536498, pvalue=2.569770122036786e-50)
tensor(0.6916, grad_fn=<AddBackward>) tensor(0.3350, grad_fn=<AddBackward>)
SpearmanrResult(correlation=0.6268777695572505, pvalue=9.720862999853885e-45)
tensor(0.7060, grad_fn=<AddBackward>) tensor(0.2159, grad_fn=<AddBackward>)
SpearmanrResult(correlation=0.5443578776752862, pvalue=5.258418502103776e-32)
Net(
  (hidden_list): ModuleList(
    (0): Linear(in_features=36, out_features=40, bias=True)
    (1): Linear(in_features=40, out_features=40, bias=True)
  )
  (predict): Linear(in_features=40, out_features=1, bias=True)
)
averag