In [218]:
import torch
import os
import codecs
import pickle

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils import data

import os
import random
import numpy as np
#from visdom import Visdom
import matplotlib.pyplot as plt

import data_process_test

In [219]:
def dataset_split_h(data, mask):  # data == tensor   mask == torch    b t input
    # sample, timestep, feature
    train_data = data[2682:, :, 3:53]
    train_mask = mask[2682:, :, 3:53]
    valid_data = data[1341:2682, :, 3:53]
    valid_mask = mask[1341:2682, :, 3:53]
    test_data = data[:1341, :, 3:53]
    test_mask = mask[:1341, :, 3:53]

    # TODO
    # 舍弃第36个特征
    train_data = train_data[:, :, torch.arange(train_data.size(2)) != 36]
    train_mask = train_mask[:, :, torch.arange(train_mask.size(2)) != 36]
    valid_data = valid_data[:, :, torch.arange(valid_data.size(2)) != 36]
    valid_mask = valid_mask[:, :, torch.arange(valid_mask.size(2)) != 36]
    test_data = test_data[:, :, torch.arange(test_data.size(2)) != 36]
    test_mask = test_mask[:, :, torch.arange(test_mask.size(2)) != 36]

    return train_data, train_mask, valid_data, valid_mask, test_data, test_mask

In [220]:
class Dataset(data.Dataset):
      #'Characterizes a dataset for PyTorch'
    def __init__(self, dataset, mask, valid_data, valid_mask, test_data, test_mask):
        #'Initialization'
        #self.X = torch.load(path)
        #self.X = pickle.load(open(path, 'rb'))
        self.X = dataset
        self.M = mask
        self.vx = valid_data
        self.vm = valid_mask
        self.tx = test_data
        self.tm = test_mask


    def __len__(self):
        #'Denotes the total number of samples'
        return len(self.X)

    def __getitem__(self, index):
        #'Generates one sample of data'
        # Select sample
        return self.X[index], self.M[index], self.vx[index], self.vm[index], self.tx[index], self.tm[index]

class TrainDataset(Dataset):
    def __init__(self, data, mask):
        self.data = data
        self.mask = mask

    def __len__(self) -> int:
        return len(self.data)

    def __getitem__(self, index: int):
        return self.data[index], self.mask[index]
    
class VaildDataset(Dataset):
    def __init__(self, data, mask):
        self.data = data
        self.mask = mask

    def __len__(self) -> int:
        return len(self.data)

    def __getitem__(self, index: int):
        return self.data[index], self.mask[index]
    
class TestDataset(Dataset):
    def __init__(self, data, mask):
        self.data = data
        self.mask = mask

    def __len__(self) -> int:
        return len(self.data)

    def __getitem__(self, index: int):
        return self.data[index], self.mask[index]

In [221]:
class RNN(nn.Module):
    def __init__(self,hidden_size=64, num_layers=2, dropout=0.2,input_size=49):
        super(RNN, self).__init__()

        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.dropout = dropout
        self.input_size = input_size
        self.rnn = nn.RNN(
            input_size=input_size,
            hidden_size=self.hidden_size,     # rnn hidden unit
            num_layers=self.num_layers,       # number of rnn layer
            dropout=self.dropout,
            batch_first=True,   # input & output will has batch size as 1s dimension. e.g. (batch, time_step, input_size)
        )
        self.drop_layer = nn.Dropout(p=self.dropout)
        self.out = nn.Linear(self.hidden_size, input_size)

    def forward(self, x, h_state=None, is_train=True, gen_length=None):
        # x (batch, time_step, input_size)
        # h_state (n_layers, batch, hidden_size)
        # r_out (batch, time_step, hidden_size)
        batch = x.shape[0]

        if is_train==False:
            r_output = torch.zeros(gen_length, batch, self.input_size).cuda()  #   t b input_size
            r_out = x.permute(1,0,2)[23] #->  b hidden_size
            r_output[:23,:,:] = x.permute(1,0,2)[:23,:,:]
            r_out = r_out.view(batch, 1, -1) #->  b 1 hidden_size
            for i in range(24-1,gen_length-1):
                r_out, h_state = self.rnn(r_out, h_state) #r_out  b 1 hidden_size
                r_out = self.drop_layer(r_out)
                r_out = self.out(r_out) #-> b t inputsize
                r_output[i+1]= r_out.permute(1,0,2)[0]
            return r_output.permute(1,0,2), h_state
        r_out, h_state = self.rnn(x, h_state)
        #return r_out, h_state
        r_out = self.drop_layer(r_out)
        r_out = self.out(r_out) #-> b t inputsize        
        return r_out, h_state

# In[6]:
class LSTM(nn.Module):
    def __init__(self,hidden_size=64, num_layers=2, dropout=0.2,input_size=49):
        super(LSTM, self).__init__()

        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.dropout = dropout
        self.input_size = input_size
        self.rnn = nn.LSTM(
            input_size=input_size,
            hidden_size=self.hidden_size,     # rnn hidden unit
            num_layers=self.num_layers,       # number of rnn layer
            dropout=self.dropout,
            batch_first=True,   # input & output will has batch size as 1s dimension. e.g. (batch, time_step, input_size)
        )
        self.drop_layer = nn.Dropout(p=self.dropout)
        self.out = nn.Linear(self.hidden_size, input_size)

    def forward(self, x, h_state=None, is_train=True, gen_length=None):
        # x (batch, time_step, input_size)
        # h_state (n_layers, batch, hidden_size)
        # r_out (batch, time_step, hidden_size)
        batch = x.shape[0]

        if is_train==False:
            r_output = torch.zeros(gen_length, batch, self.input_size).cuda()  #   t b input_size
            r_out = x.permute(1,0,2)[23] #->  b hidden_size
            r_output[:23,:,:] = x.permute(1,0,2)[:23,:,:]
            r_out = r_out.view(batch, 1, -1) #->  b 1 hidden_size
            for i in range(24-1,gen_length-1):
                r_out, h_state = self.rnn(r_out, h_state) #r_out  b 1 hidden_size
                r_out = self.drop_layer(r_out)
                r_out = self.out(r_out) #-> b t inputsize
                r_output[i+1]= r_out.permute(1,0,2)[0]
            return r_output.permute(1,0,2), h_state
        r_out, h_state = self.rnn(x, h_state)
        #return r_out, h_state
        r_out = self.drop_layer(r_out)
        r_out = self.out(r_out) #-> b t inputsize        
        
        return r_out, h_state

# In[6]:
class GRU(nn.Module):
    def __init__(self,hidden_size=64, num_layers=2, dropout=0.2,input_size=49):
        super(GRU, self).__init__()

        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.dropout = dropout
        self.input_size = input_size
        self.rnn = nn.GRU(
            input_size=input_size,
            hidden_size=self.hidden_size,     # rnn hidden unit
            num_layers=self.num_layers,       # number of rnn layer
            dropout=self.dropout,
            batch_first=True,   # input & output will has batch size as 1s dimension. e.g. (batch, time_step, input_size)
        )
        self.drop_layer = nn.Dropout(p=self.dropout)
        self.out = nn.Linear(self.hidden_size, input_size)

    def forward(self, x, h_state=None, is_train=True, gen_length=None):
        # x (batch, time_step, input_size)
        # h_state (n_layers, batch, hidden_size)
        # r_out (batch, time_step, hidden_size)
        batch = x.shape[0]
        r_out, h_state = self.rnn(x, h_state)
        #return r_out, h_state
        r_out = self.drop_layer(r_out)
        r_out = self.out(r_out) #-> b t inputsize
        if is_train==False:
            r_output = torch.zeros(gen_length, batch, self.input_size).cuda()  #   t b input_size
            r_out = x.permute(1,0,2)[23] #->  b hidden_size
            r_output[:23,:,:] = x.permute(1,0,2)[:23,:,:]
            r_out = r_out.view(batch, 1, -1) #->  b 1 hidden_size
            for i in range(23,gen_length-1):
                r_out, h_state = self.rnn(r_out, h_state) #r_out  b 1 hidden_size
                r_out = self.drop_layer(r_out)
                r_out = self.out(r_out) #-> b t inputsize
                r_output[i+1]= r_out.permute(1,0,2)[0]
            return r_output.permute(1,0,2), h_state
        return r_out, h_state



def evaluate(model, data_x, data_y, mask, loss_fn):  # x (batch, time_step, input_size)
    batch = data_y.shape[0]
    length = data_y.shape[1]
    length=48
    Pre_y = torch.zeros(length, batch, data_y.shape[2]).cuda()
    Pre_y, _ = model(data_y, is_train=False, gen_length=length) # b t input
    Pre_y = torch.mul(Pre_y,mask[:,0:48,:])
    eval_loss_r = loss_fn(data_y.permute(2,1,0)[:47,24:length,:], Pre_y.permute(2,1,0)[:47,24:length,:])   # input t b
    eval_loss_t = loss_fn(data_y.permute(2,1,0)[47:,24:length,:], Pre_y.permute(2,1,0)[47:,24:length,:])
    return eval_loss_r, eval_loss_t, data_y[:,24:length,:], Pre_y[:,24:length,:]  # b t input

In [222]:
def read_dataset(path):
    pkl_file = open(path, 'rb')
    return pickle.load(pkl_file)

In [223]:
params = {'batch_size': 64,
            'shuffle': False,
            'num_workers': 0}
dataset, mask = read_dataset('../datasets/normalized_dataset_new.pkl')
train_data, train_mask, valid_data, valid_mask, test_data, test_mask = dataset_split_h(dataset, mask) # b t input
print(train_data.shape)
print(train_mask.shape)

train_dataset = TrainDataset(train_data, train_mask)
vaild_dataset = VaildDataset(valid_data, valid_mask)
test_dataset = TestDataset(test_data, test_mask)

train_iterator= data.DataLoader(train_dataset, **params)
vaild_iterator=data.DataLoader(vaild_dataset,**params)
test_iterator=data.DataLoader(test_dataset,**params)

torch.Size([10736, 74, 49])
torch.Size([10736, 74, 49])


In [224]:
model = LSTM(128, 1, 0).cuda()

In [225]:
model_r=LSTM(128, 1, 0,47).cuda()
model_t=LSTM(128, 1, 0,2).cuda()

In [226]:
def RMSE(Y, Y_pred):  # b t input
    num = 0
    for y in Y:
        num += y.shape[0]   #batch number
    ground_truth = Y[0]
    predict = Y_pred[0]
    for i, y in enumerate(Y[1:]):
        ground_truth = torch.cat((ground_truth, y),0)
        predict = torch.cat((predict, Y_pred[i+1]),0)
    g_r = ground_truth.permute(2,1,0)[:47]
    g_t = ground_truth.permute(2,1,0)[47:]
    y_r = predict.permute(2,1,0)[:47]
    y_t = predict.permute(2,1,0)[47:]
    g_r = torch.unsqueeze(g_r, 0)
    g_t = torch.unsqueeze(g_t, 0)
    y_r = torch.unsqueeze(y_r, 0)
    y_t = torch.unsqueeze(y_t, 0)
    l2_r = F.mse_loss(g_r,y_r)
    l2_t = F.mse_loss(g_t,y_t)
    l1_r = F.l1_loss(g_r,y_r)
    l1_t = F.l1_loss(g_t,y_t)
    geometric_mean = np.exp(np.log([l1_t.item(), l1_r.item(), l2_t.item(), l2_r.item()]).mean())
    return l1_r.item(), l1_t.item(), l2_r.item(), l2_t.item(),geometric_mean

In [227]:
model=torch.load("../model_test/lstm.model")

In [228]:
model_r=torch.load("../model_test/LSTM_r_t_[128, 256, 512]_[1, 2]_[0.2, 0.5]_[64, 128, 256]r.model")
model_t=torch.load("../model_test/LSTM_r_t_[128, 256, 512]_[1, 2]_[0.2, 0.5]_[64, 128, 256]t.model")

In [229]:
def evaluate_rt(model, data_x, data_y, mask, loss_fn):  # x (batch, time_step, input_size)
    batch = data_x.shape[0]
    length = data_y.shape[1]
    length = 48
    Pre_y, _ = model(data_x, is_train=False, gen_length=length) # b t input
    Pre_y = torch.mul(Pre_y,mask[:,:48,-2:])
    eval_loss_r = loss_fn(data_y[:,:length,:], Pre_y)
    return eval_loss_r, data_y[:,24:length,:], Pre_y[:,24:length,:]  # b t input

In [230]:
loss_fn = nn.MSELoss()
Y = []
Y_pred = []
TN=0
TP=0
FN=0
FP=0
pred_t_n=[]
t_n=[]
for b, batch in enumerate(test_iterator):
    data_z, mask  = batch
    # data_xy = torch.cat((data_x,data_y), 1)
    if torch.cuda.is_available():
        # data_xy = data_xy.cuda()
        data_z = data_z.cuda()
        mask = mask.cuda()
    eval_loss_r, eval_loss_t, y, y_pred = evaluate(model, data_z, data_z, mask, loss_fn)
    # eval_loss_t, y, y_pred = evaluate_rt(model_t, data_z[:,:,47:], data_z[:,:,47:], mask, loss_fn)
    for i in range(batch[0].size(0)):
        treatment=data_z[i, 24:48, -2:].nonzero()
        pred_t=y_pred[i,:,-2:]>=0.05
        treatment_pred=pred_t[:,-2:].nonzero()
        t_n.append( 0 if treatment.shape[0]==0 else 1)
        pred_t_n.append(0 if treatment_pred.shape[0]==0 else 1)
        if treatment.shape[0]==0 and treatment_pred.shape[0]==0:
            TN=TN+1
        if treatment.shape[0]!=0 and treatment_pred.shape[0]!=0:
            TP=TP+1  
        if treatment.shape[0]==0 and treatment_pred.shape[0]!=0:
            FP=FP+1   
        if treatment.shape[0]!=0 and treatment_pred.shape[0]==0:
            FN=FN+1    
    Y.append(y)
    Y_pred.append(y_pred)
l1_r,l1_t,l2_r,l2_t,geo_mean = RMSE(Y, Y_pred)

  result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,


In [231]:
TN,TP,FP,FN

(350, 327, 645, 19)

In [232]:
TP

327

In [233]:
FP

645

In [234]:
FN

19

In [235]:
Recall=TP/(TP+FN)
Precision=TP/(TP+FP)
print("recall:",str(Recall),"     precision: ",str(Precision))

recall: 0.9450867052023122      precision:  0.33641975308641975


In [236]:
from sklearn import metrics
auc=metrics.roc_auc_score(t_n,pred_t_n)

In [237]:
auc

0.6484227495860807