In [1]:
import torch
import torch.nn as nn
import h5py
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,MinMaxScaler
from torch.utils.data import Dataset,DataLoader,TensorDataset
import numpy as np
from torch.autograd import Variable
import torch.nn.functional as F
import os
import random

In [2]:
class BiLSTM_Attention(nn.Module):
    def __init__(self):
        super(BiLSTM_Attention, self).__init__()

        self.lstm = nn.LSTM(embedding_dim, n_hidden, bidirectional=True)
        self.out = nn.Linear(n_hidden * 2, num_classes)

    # lstm_output : [batch_size, n_step, n_hidden * num_directions(=2)], F matrix
    def attention_net(self, lstm_output, final_state):
        hidden = final_state.view(-1, n_hidden * 2, 1)   # hidden : [batch_size, n_hidden * num_directions(=2), 1(=n_layer)]
        attn_weights = torch.bmm(lstm_output, hidden).squeeze(2) # attn_weights : [batch_size, n_step]
        soft_attn_weights = F.softmax(attn_weights, 1)
        # [batch_size, n_hidden * num_directions(=2), n_step] * [batch_size, n_step, 1] = [batch_size, n_hidden * num_directions(=2), 1]
        context = torch.bmm(lstm_output.transpose(1, 2), soft_attn_weights.unsqueeze(2)).squeeze(2)
#         return context, soft_attn_weights.data.numpy() # context : [batch_size, n_hidden * num_directions(=2)]
        return context
    def forward(self, X):
        input = x
#         input = self.embedding(X) # input : [batch_size, len_seq, embedding_dim]
        #len_seq=time embedding_dim=vol
        input = input.permute(1, 0, 2) # input : [len_seq, batch_size, embedding_dim]
        #1层双头
        hidden_state = Variable(torch.zeros(1*2, len(X), n_hidden)).to(device) # [num_layers(=1) * num_directions(=2), batch_size, n_hidden]
        cell_state = Variable(torch.zeros(1*2, len(X), n_hidden)).to(device) # [num_layers(=1) * num_directions(=2), batch_size, n_hidden]

        # final_hidden_state, final_cell_state : [num_layers(=1) * num_directions(=2), batch_size, n_hidden]
        output, (final_hidden_state, final_cell_state) = self.lstm(input, (hidden_state, cell_state))
        output = output.permute(1, 0, 2) # output : [batch_size, len_seq, n_hidden]
        attn_output= self.attention_net(output, final_hidden_state)
        return self.out(attn_output) # model : [batch_size, num_classes], attention : [batch_size, n_step]

In [3]:
def get_file_list(folder):
    filetype = 'hdf5'
    filelist = []  
    for dirpath,dirnames,filenames in os.walk(folder):
        for file in filenames:
            filename = file.split('.')[0][:4]
            file_type = file.split('.')[-1]
            if file_type == filetype and filename in ['2015']:
                file_fullname = os.path.join(dirpath, file) #文件全名
                filelist.append(file_fullname)
    return filelist

def splitdatalist(full_list,shuffle=False,ratio=0.2):
    n_total = len(full_list)
    offset = int(n_total * ratio)
    if n_total==0 or offset<1:
        return [],full_list
    if shuffle:
        random.shuffle(full_list)
    sublist_1 = full_list[:offset]
    sublist_2 = full_list[offset:]
    return sublist_1,sublist_2

In [4]:
def make_loader(file_path):
    temp=h5py.File(file_path,"r")
    x_data = temp["vol"][:,:,:,0] + temp['vol'][:,:,:,1]
    y_data = temp['labels'][:]
    x_data = torch.from_numpy(x_data).float()
    y_data = torch.from_numpy(y_data).float()
    dataset = TensorDataset(x_data,y_data)
    loader = DataLoader(dataset=dataset,batch_size=64,shuffle=True,drop_last=True)
    return loader

In [5]:
if __name__ == '__main__':
    learning_rate = 0.001
    epoch_num = 5

    device = torch.device("cuda:0") if torch.cuda.is_available() else "cpu"
    embedding_dim = 601
    n_hidden = 100
    num_classes = 1

    model = BiLSTM_Attention()
    model.to(device)

    optimizer = torch.optim.RMSprop(model.parameters(),lr=learning_rate,alpha=0.99,eps=1e-08, weight_decay=0, momentum=0, centered=False)
    loss_fn = torch.nn.MSELoss()

    train_loss_list = []
    val_loss_list = []

    filelist = get_file_list(r'F:/shixi')
    trainlist,testlist = splitdatalist(filelist,shuffle=True,ratio=0.7)
    print(trainlist,testlist)
    
    for i in range(epoch_num):
        model.train()
        
        train_loss = 0
        for path in trainlist:
            train_loader = make_loader(path)  
            
            for j,(x,y) in enumerate(train_loader):
                x = x.to(device)
                y = y.to(device)
                output= model(x)
                loss = loss_fn(output,y)
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                train_loss += loss.cpu().item()
        train_loss_list.append(train_loss/(j+1)/len(trainlist))
        
        
#         train_loss_list.append(loss.item())

        model.eval()
        val_loss = 0
        for path in testlist:
            test_loader = make_loader(path)  
            for k,(x,y) in enumerate(test_loader):
                x = x.to(device)
                y = y.to(device)
                output,= model(x)
                loss = loss_fn(output,y)

#             val_loss_list.append(loss.item())
                val_loss += loss.item()
            
        val_loss_list.append(val_loss/(k+1)/(len(testlist)))
    #     if train_loss_list[i] == min(train_loss_list) or val_loss_list[i] == min(val_loss_list):
    #         value = str(train_loss_list[i]) +','+ str(val_loss_list[i])
    #         torch.save(model.state_dict(),'%s.pth'%value)

        print(train_loss_list[i],val_loss_list[i])
    print('ok')

['F:/shixi\\20150107.hdf5', 'F:/shixi\\20150106.hdf5'] ['F:/shixi\\20150105.hdf5']
0.13434550393786696 0.11503992064131631
0.10151890758424997 0.09776208239297073
0.09582077608340317 0.09262306470837858
0.09257398825138807 0.09219122678041458
0.0919763616596659 0.08885362309714158
ok


In [None]:
def get_file_list(folder):
    filetype = 'hdf5'
    filelist = []  
    for dirpath,dirnames,filenames in os.walk(folder):
        for file in filenames:
            filename = file.split('.')[0][:4]
#             print(filename)
            file_type = file.split('.')[-1]
            if file_type == filetype and filename in ['2015','2016']:
                file_fullname = os.path.join(dirpath, file) #文件全名
                filelist.append(file_fullname)
    return filelist

filelist = get_file_list(r'F:/shixi')
trainlist,testlist = splitdatalist(filelist,shuffle=True,ratio=0.7)
print(trainlist,testlist)