In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import time
import numpy as np
import pandas as pd
import pickle
import gc
import re
import collections
import math
from tqdm.notebook import tqdm
from pytorch_memlab import MemReporter
from torchinfo import summary
# ここから自作
import model
import result
import mode
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [2]:
class Dataset(torch.utils.data.Dataset):
    def __init__(self, data, target, transform=None):
        self.transform = transform
        self.data = data
        self.target = target
   
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        out_data = self.data[index]
        out_target = self.target[index]
        
        return out_data, out_target

In [3]:
# 時刻を表示してくれるようになるprint関数のwrapper
def datePrint(*args, **kwargs):
    from datetime import datetime
    print(datetime.now().strftime('[%Y/%m/%d %H:%M:%S] '), end="")
    print(*args, **kwargs)

In [4]:
def load_vocab(vocab_file):
    """Loads a vocabulary file into a dictionary."""
    vocab = collections.OrderedDict()
    with open(vocab_file, "r", encoding="utf-8") as reader:
        tokens = reader.readlines()
    for index, token in enumerate(tokens):
        token = token.rstrip("\n")
        vocab[token] = index
    return vocab

In [5]:
# # bert仕様から，オリジナル仕様に加工してますね
# df = pd.read_table('../data/make/forbert/dev.tsv')
# dev = df['sequence'].str.split(' ', expand=True).replace(vocab).drop(columns=508)
# dev = torch.tensor(dev.values).to(torch.float)
# print(dev.shape)
# pickle.dump(dev, open(f"../data/make/forbert/dev.pkl", "wb"))

# df = pd.read_table('../data/make/forbert/train.tsv')
# train = df['sequence'].str.split(' ', expand=True).replace(vocab)
# print(train)
# train = train.drop(columns=508)
# train = torch.tensor(train.values).to(torch.float)
# print(train.shape)
# pickle.dump(train, open(f"../data/make/forbert/train.pkl", "wb"))

In [6]:
# # kmer読み込み
# vocab = load_vocab("../data/make/forbert/vocab.txt")

# dev = pickle.load(open("../data/make/forbert/dev.pkl","rb"))
# print(dev.shape)
# train = pickle.load(open("../data/make/forbert/train.pkl","rb"))
# print(train.shape)

# target_dev = torch.tensor(pickle.load(open("../data/make/forbert/target_dev.pkl","rb")))
# target_dev = torch.flip(target_dev, dims=[1]).to(torch.float)
# print(target_dev.shape)

# target_train = torch.tensor(pickle.load(open("../data/make/forbert/target_train.pkl","rb")))
# target_train = torch.flip(target_train, dims=[1]).to(torch.float)
# print(target_train.shape)

# input_all = torch.cat([train, dev], dim=0)
# target_all = torch.cat([target_train, target_dev], dim=0)
# print(input_all.shape)
# print(target_all.shape)

# dataset = model.Dataset(input_all, target_all)
# train_dataset, val_dataset = torch.utils.data.random_split(dataset, [180000, 20000])

In [7]:
datePrint("loading pickle data")
input1 = torch.tensor(pickle.load(open("../data/r-make/maxspan100/seq_1.pkl","rb")))
target1 = torch.tensor(pickle.load(open("../data/r-make/maxspan100/target_1.pkl","rb"))).float()
target1 = torch.flip(target1, dims=[1])
input2 = torch.tensor(pickle.load(open("../data/r-make/maxspan100/seq_2.pkl","rb")))
target2 = torch.tensor(pickle.load(open("../data/r-make/maxspan100/target_2.pkl","rb"))).float()
target2 = torch.flip(target2, dims=[1])
input3 = torch.tensor(pickle.load(open("../data/r-make/maxspan100/seq_3.pkl","rb")))
target3 = torch.tensor(pickle.load(open("../data/r-make/maxspan100/target_3.pkl","rb"))).float()
target3 = torch.flip(target3, dims=[1])
# input4 = torch.tensor(pickle.load(open("../data/r-make/maxspan100/seq_4.pkl","rb")))
# target4 = torch.tensor(pickle.load(open("../data/r-make/maxspan100/target_4.pkl","rb"))).float()
# target4 = torch.flip(target4, dims=[1])
# input5 = torch.tensor(pickle.load(open("../data/r-make/maxspan100/seq_5.pkl","rb")))
# target5 = torch.tensor(pickle.load(open("../data/r-make/maxspan100/target_5.pkl","rb"))).float()
# target5 = torch.flip(target5, dims=[1])

input_all = torch.cat([input1, input2, input3], dim=0)
target_all = torch.cat([target1, target2, target3], dim=0)

dataset = model.Dataset(input_all, target_all)
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [1200000, 300000])

[2022/03/14 08:01:48] loading pickle data


  import sys
  
  # Remove the CWD from sys.path while we load stuff.
  # This is added back by InteractiveShellApp.init_path()
  del sys.path[0]
  


In [9]:
# costom loss軍団
    
class CosineLoss(nn.Module):
    def __init__(self):
        super(CosineLoss, self).__init__()

    def forward(self, outputs, targets):
        return torch.sum(1 - nn.functional.cosine_similarity(outputs, targets, dim=-1, eps=1e-6))

class HyperbolicLoss(nn.Module):
    def __init__(self):
        super(HyperbolicLoss, self).__init__()

    def forward(self, u, v, epsilon=1e-7):
        sqdist = torch.sum((u - v) ** 2, dim=-1)
        squnorm = torch.sum(u ** 2, dim=-1)
        sqvnorm = torch.sum(v ** 2, dim=-1)
        x = 1 + 2 * sqdist / ((1 - squnorm) * (1 - sqvnorm)) + epsilon
        z = torch.sqrt(x ** 2 - 1)
        return torch.sum(z)

# from geomstats.geometry.poincare_ball import PoincareBall
# class PoincareBallLoss(nn.Module):
#     def __init__(self):
#         super(PoincareBallLoss, self).__init__()
        
#     def forward(self, output, target):
#         output = output.cpu().detach().numpy()
#         target = target.cpu().detach().numpy()
#         return float(PoincareBall(dim=8).metric.dist(output[0], target[0]))


losses = [nn.MSELoss]
lrs = [1e-4, 1e-5, 1e-3]
# opts = [optim.Adam, optim.RMSprop, optim.Adamax]

In [11]:
class conv1DBatchNorm(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1):
        super(conv1DBatchNorm, self).__init__()
        self.conv = nn.Conv1d(in_channels, out_channels, kernel_size, stride, padding, dilation, groups, bias=False)
        self.batchnorm = nn.BatchNorm1d(out_channels)
        
    def forward(self, x):
        x = self.conv(x)
        output = self.batchnorm(x)

        return output

class conv1DBatchNormMish(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1):
        super(conv1DBatchNormMish, self).__init__()
        self.conv = nn.Conv1d(in_channels, out_channels, kernel_size, stride, padding, dilation, groups, bias=False)
        self.batchnorm = nn.BatchNorm1d(out_channels)
        self.mish = nn.Mish(inplace=True)
        
    def forward(self, x):
        x = self.conv(x)
        x = self.batchnorm(x)
        output = self.mish(x)

        return output
    
class scSE(nn.Module):
    def __init__(self, channels, reduction=4):
        super(scSE, self).__init__()
        self.gap = nn.AdaptiveAvgPool1d(1)
        self.fc1 = nn.Linear(channels, channels//reduction, bias=False)
        self.fc2 = nn.Linear(channels//reduction, channels, bias=False)
        
        self.conv = nn.Conv1d(channels, 1, kernel_size=1)

        self.sig = nn.Sigmoid()

    def forward(self, x):
        batch, channel, _ = x.size()
        c = self.gap(x).view(batch, channel)
        c = self.sig(self.fc2(F.relu(self.fc1(c)))).view(batch, channel, 1)
        c = x * c
        
        s = self.sig(self.conv(x))
        s = x * s
        return c + s


In [12]:
class Variable(nn.Module):
    def __init__(self, emb_dim=128, num_layer=8, num_filters=128, kernel_sizes=5):
        super(Variable, self).__init__()
        self.filter = num_filters
        self.embedding = nn.Embedding(6, emb_dim)

        self.convs = nn.ModuleList()
        self.convs.append(conv1DBatchNormMish(in_channels=emb_dim, out_channels=num_filters,
                         kernel_size=kernel_sizes, padding=kernel_sizes//2, stride=1))
        self.convs.append(scSE(channels=num_filters))
        for i in range(num_layer):
            self.convs.append(conv1DBatchNormMish(in_channels=num_filters, out_channels=num_filters,
                                                    kernel_size=kernel_sizes, padding=(kernel_sizes//2)*1, dilation=1))
            self.convs.append(conv1DBatchNormMish(in_channels=num_filters, out_channels=num_filters,
                                                    kernel_size=kernel_sizes, padding=(kernel_sizes//2)*3, dilation=3))
            self.convs.append(conv1DBatchNormMish(in_channels=num_filters, out_channels=num_filters,
                                                    kernel_size=kernel_sizes, padding=(kernel_sizes//2)*1, dilation=1))
            self.convs.append(conv1DBatchNormMish(in_channels=num_filters, out_channels=num_filters,
                                                    kernel_size=kernel_sizes, padding=(kernel_sizes//2)*5, dilation=5))




        self.convs.append(conv1DBatchNormMish(in_channels=num_filters, out_channels=num_filters, kernel_size=kernel_sizes, padding=kernel_sizes//2))
        self.convs.append(scSE(channels=num_filters))
        self.convs.append(conv1DBatchNorm(in_channels=num_filters, out_channels=1, kernel_size=5))

    def forward(self, x):
        x = self.embedding(x.long())
        x = torch.transpose(x, 1, 2)
        for i, l in enumerate(self.convs):
            x = l(x)
            if type(x) is tuple:
                x = x[0]

        x = x.view(x.shape[0], -1)
        return x

class Trans(nn.Module):
    def __init__(self, emb_dim=128, num_filters=128):
        super(Trans, self).__init__()
        self.filter = num_filters
        self.embedding = nn.Embedding(6, emb_dim)
        self.conv = conv1DBatchNormMish(in_channels=emb_dim, out_channels=1, kernel_size=5)
        self.transformer = nn.Transformer(d_model=508, nhead=4, batch_first=True)

    def forward(self, data, target):
        data = self.embedding(data.long())
        data = torch.transpose(data, 1, 2)
        data = self.conv(data)
        target = target.reshape(-1, 1, 508)
        
        output = self.transformer(data, target)

        output = output.view(output.shape[0], -1)
        return output
    
class TransEnc(nn.Module):
    def __init__(self, emb_dim=256, num_filters=128):
        super(TransEnc, self).__init__()
        self.filter = num_filters
        self.embedding = nn.Embedding(6, emb_dim)
        self.encoder_layer = nn.TransformerEncoderLayer(d_model=512, nhead=8, batch_first=True)
        self.transformer = nn.TransformerEncoder(self.encoder_layer, num_layers=4)
        self.conv = conv1DBatchNorm(in_channels=emb_dim, out_channels=1, kernel_size=5)

    def forward(self, x):
        x = self.embedding(x.long()) #(batch, length, emb)
        x = torch.transpose(x, 1, 2) #(batch, emb=256, length=512)
        x = self.transformer(x)
        x = self.conv(x)
        

        x = x.view(x.shape[0], -1)
        return x

In [None]:
batch_size = 128
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=1)
val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=True, num_workers=1)
dataloaders_dict = {'train': train_dataloader, 'val': val_dataloader}

for loss in losses:
    for lr in lrs:
        net = TransEnc().to(device)
#         net = model.Variable(num_layer=8, num_filters=128, kernel_sizes=5).to(device)
        net.apply(model.weight_init) #重みの初期化適用
        #         print(summary(net, input_size=([batch_size, 512])))
        #         reporter = MemReporter(net)
        #         reporter.report()
        #ファインチューニング
        #         optimizer = optim.Adam([{'params': net.embedding.parameters(), 'lr': 5e-4},
        #                                 {'params': net.convs.parameters(), 'lr': 1e-4},
        #                                 {'params': net.mid.parameters(), 'lr': 5e-4},
        #                                 {'params': net.fc.parameters(), 'lr': 1e-3}], weight_decay=1e-6)
        optimizer = torch.optim.RAdam(net.parameters(), lr=lr)
        print(f'optimizer: {optimizer}')
        epochs = 10
        criterion = loss().to(device)
        #         # 学習途中データ
        #         checkpoint = torch.load('max_span100.pth')
        #         net.load_state_dict(checkpoint['model_state_dict'])
        #         optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        #         epochs = checkpoint['epoch']
        #         loss = checkpoint['loss']
        scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)
        train_loss_list, val_loss_list, data_all, target_all, output_all = mode.train(device, net, dataloaders_dict, criterion, optimizer, epochs, scheduler)               
        torch.save(net.state_dict(), 'tmp.pth')

        #         print(f'memory report')
        #         reporter.report()

    #     result.learning_curve(train_loss_list, val_loss_list, epochs[1:])
    #     result.plot_result(np.array(target_all, dtype=object).reshape(-1), np.array(output_all, dtype=object).reshape(-1))
        cor_list, loss_list = result.cal_indicators(target_all, output_all)
        result.loss_hist(loss_list)
        result.cor_hist(cor_list)
    #     loss_sort, cor_sort = result.sort_list(loss_list, cor_list)
    #     dic = {'best loss': loss_sort[0],  'worst loss': loss_sort[-1], '10th bad loss': loss_sort[-10],'100th bad loss': loss_sort[-100],
    #               'best cor': cor_sort[-1], 'worst cor': cor_sort[0], '10th bad cor': cor_sort[9],'100th bad cor': cor_sort[99]} 
    #     for k, v in dic.items():
    #         print(k)
    #         result.visible_one(target_all, output_all, data_all, loss_list, cor_list, idx=v)

optimizer: RAdam (
Parameter Group 0
    betas: (0.9, 0.999)
    eps: 1e-08
    lr: 0.0001
    weight_decay: 0
)
Epoch 1/10
val Loss:13.1027 Timer:93.9264
Epoch 2/10
train Loss:9.0973 Timer:1126.0053
val Loss:6.9117 Timer:90.5646
Epoch 3/10


In [None]:
# # 作りたてほやほやをinputとtargetに渡すよ
# input_array = []
# target_array = []
# for i in range(10):
#     input_path = f"../data/makedata/val/index/input_{i+1}.csv"
#     target_path = f"../data/makedata/val/accessibility/target_{i+1}.csv"
#     input_array.append(torch.Tensor(np.loadtxt(input_path, delimiter=",", dtype=np.float).astype(np.int)))
#     target_array.append(torch.Tensor(np.loadtxt(target_path, delimiter=",", dtype=np.float)))


In [None]:
# input_test = pickle.load(open("../data/test_sets/input_600_1000.pkl","rb"))
# target_test = pickle.load(open("../data/test_sets/target_600_1000.pkl","rb"))
# input_test = pickle.load(open("../data/RF00156/input_RF00156.pkl","rb"))
# target_test = pickle.load(open("../data/RF00156/target_RF00156.pkl","rb"))
# input_test = torch.stack(input_array)
# target_test = torch.stack(target_array)

# target_test = torch.flip(target_test, dims=[1])

input_test = pickle.load(open("../data/max_span100_512/input_val15.pkl","rb"))
target_test = pickle.load(open("../data/max_span100_512/target_val15.pkl","rb")) #512のみ
target_test = torch.flip(target_test, dims=[1])
# input_test = input_test[0:10000] 
# target_test = target_test[0:10000]

datePrint(input_test.shape)
datePrint(target_test.shape)

test_dataset = model.Dataset(input_test, target_test)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=32,shuffle=False, num_workers=1)

net = model.dilation1(num_layer=16, num_filters=128, kernel_sizes=5).to(device)
net.load_state_dict(torch.load('big_data.pth'))
criterion = nn.MSELoss().to(device)

start = time.time()
test_loss, data_all, target_all, output_all = mode.test(device, net, test_dataloader, criterion) 
datePrint('finish prediction loss', test_loss)
    
# np.savetxt('accessibility_output.txt', output_all, fmt='%.3e')
finish = time.time()
datePrint('予測時間', (finish-start))

In [None]:
result.plot_result(np.array(target_all, dtype=object).reshape(-1), np.array(output_all, dtype=object).reshape(-1))
cor_list, loss_list = result.cal_indicators(target_all, output_all)
result.loss_hist(loss_list)
result.cor_hist(cor_list)
loss_sort, cor_sort = result.sort_list(loss_list, cor_list)
dic = {'best loss': loss_sort[0],  'worst loss': loss_sort[-100], 'best cor': cor_sort[-1], 'worst cor': cor_sort[100]} 
for k, v in dic.items():
    print(k)
    result.visible_one(target_all, output_all, data_all, loss_list, cor_list, idx=v)

In [None]:
loss_target, loss_output, loss_data, cor_target, cor_output, cor_data = result.remake_bad(target_all, output_all, data_all, loss_sort, cor_sort, length=1000)
result.plot_result(np.array(loss_target, dtype=object).reshape(-1), np.array(loss_output, dtype=object).reshape(-1))
result.plot_result(np.array(cor_target, dtype=object).reshape(-1), np.array(cor_output, dtype=object).reshape(-1))

In [None]:
def count_diff(data_all):
    diff_list = []
    for i in data_all:
        count_A, count_U, count_G, count_C = 0, 0, 0, 0
        count_A += np.count_nonzero(i==1)
        count_U += np.count_nonzero(i==2)
        count_G += np.count_nonzero(i==3)
        count_C += np.count_nonzero(i==4)
#         diff = abs(count_A-len(i)/4) + abs(count_U-len(i)/4) + abs(count_G-len(i)/4) + abs(count_C-len(i)/4)
        diff = count_G-len(i)/4
        diff_list.append(diff)

    return diff_list

_, loss_list = result.cal_indicators(loss_target, loss_output)
diff_list = count_diff(loss_data)
result.heat_scatter(diff_list, loss_list)

cor_list, _ = result.cal_indicators(cor_target, cor_output)
diff_list = count_diff(cor_data)
result.heat_scatter(diff_list, cor_list)

# diff_list = count_diff(data_all)
# result.heat_scatter(diff_list, loss_list)
# result.heat_scatter(diff_list, cor_list)

In [None]:
diff_list = result.count_diff(data_all)
result.heat_scatter(diff_list, loss_list)
result.heat_scatter(diff_list, cor_list)

_, loss_bad = result.cal_indicators(loss_target, loss_output)
diff_list = result.count_diff(loss_data)
result.heat_scatter(diff_list, loss_bad)

cor_bad, _ = result.cal_indicators(cor_target, cor_output)
diff_list = result.count_diff(cor_data)
result.heat_scatter(diff_list, cor_bad)

In [None]:
start = time.time()
path = "../data/human_data/seq7.fa"
with open(path, mode = 'r', encoding = 'utf-8') as f:
    seq_file = f.read().splitlines()
indexes = [i for i, n in enumerate(seq_file) if n.startswith('>')]
name = seq_file[0]
seq = ''.join(seq_file[1:]).replace('A', '1').replace('T', '2').replace('U', '2').replace('G', '3').replace('C', '4')
input_seq = torch.Tensor(list(map(int, seq)))
input_seq = torch.flip(input_seq, dims=[0])
input_seq = input_seq.unsqueeze(0)
out_length = len(input_seq[0])-4

input_seq = input_seq.unsqueeze(0)

net = model.dilation1(num_layer=16, num_filters=128, kernel_sizes=5).to(device)
net.load_state_dict(torch.load('big_data.pth'))
    
data_all, output_all = mode.predict(device, net, input_seq) 

  
import matplotlib.pyplot as plt
max_length = output_all.shape[1]
with open('../data/human_data/out7.txt', 'r') as f:
        next(f)
        acc = f.readlines()
        acc_list = []
        for i in range(len(acc)-1):
                acc_list.append(re.findall(',(.*);', acc[i])[0])
acc_list = [float(x) for x in acc_list]

plt.figure(figsize=(15, 7))
plt.plot(range(out_length), acc_list, label='target', color='b')
plt.plot(range(out_length), output_all[0][:out_length], label='output', color='r')
plt.legend()
plt.xlabel('base position')
plt.ylabel('accessibility')
plt.title('one')
plt.show()


cor = np.corrcoef(acc_list, output_all[0][:out_length])
mse = ((acc_list - output_all[0][:out_length])**2).mean(axis=0)
print('cor', cor[0,1])
print('mse', mse)

    
np.savetxt('accessibility_output.txt', output_all, fmt='%.3f')
finish = time.time()
datePrint('予測時間', (finish-start))

In [None]:
path = "../data/real_data/RF00156.fa"

start = time.time()
with open(path, mode = 'r', encoding = 'utf-8') as f:
    seq_file = f.read().splitlines()
indexes = [i for i, n in enumerate(seq_file) if n.startswith('>')]
name_list = []
seq_list = torch.empty(0, 256)

for i in range(len(indexes)-1):
    name_list.append(seq_file[indexes[i]])
    seq = ''.join(seq_file[indexes[i]+1:indexes[i+1]]).replace('A', '1').replace('T', '2').replace('U', '2').replace('G', '3').replace('C', '4').replace('N', '0')
    seq = torch.Tensor(list(map(int, seq)))
    seq = torch.flip(seq, dims=[0])
    if (seq.shape[0]%256 != 0):
        seq = F.pad(seq, (0, 256-seq.shape[0]%256))
    seq = seq.unsqueeze(0)
    seq_list = torch.cat([seq_list, seq], dim=0)


# if (input_seq.shape[1]%256 != 0):
#     input_seq = F.pad(input_seq, (0, 256-input_seq.shape[1]%256))
# if (input_seq.shape[1]>256):
#     transform = True
#     division = (input_seq.shape[1])//128 - 1
#     input_init = input_seq
#     input_seq = input_seq.unfold(1, 256, 128).reshape(-1, 256)
# else:
#     transform = False
# datePrint(input_seq.shape)
# input_seq = input_seq.unsqueeze(0)
# datePrint(input_seq.shape)

seq_list = seq_list.unsqueeze(0)
net = model.Variable(num_layer=16, kernel_sizes=33, flag=False).to(device)
net.load_state_dict(torch.load('max_span20.pth'))
    
data_all, output_all = mode.predict(device, net, seq_list) 
# if (transform==True):
#     output_tmp = torch.tensor(output_all)
#     for n in range(division):
#         if (n==0):
#             output_all = output_tmp[n::division, :192]
#         elif (n==division-1):
#             output_all = torch.cat([output_all, output_tmp[n::division, 64:]], dim=1)
#         else:
#             output_all = torch.cat([output_all, output_tmp[n::division, 64:192]], dim=1)
    
#     data_all = input_init.numpy()
#     output_all = output_all.numpy()
    
np.savetxt('accessibility_output.txt', output_all, fmt='%.3f')
finish = time.time()
datePrint('予測時間', (finish-start))

In [None]:
# sample作成
path = "../data/RF01210/RF01210.fa"

with open(path, mode = 'r', encoding = 'utf-8') as f:
    seq_file = f.read().splitlines()
indexes = [i for i, n in enumerate(seq_file) if n.startswith('>')]

for i in range(len(indexes)-1):
    with open(f"../data/RF01210/sample_{i}.txt", mode="w") as f:
        f.write(seq_file[indexes[i]] + "\n")
        f.write(''.join(seq_file[indexes[i]+1:indexes[i+1]]))

In [None]:
# castしちゃう
from pathlib import Path
import numpy as np
from tqdm import tqdm
import pickle
import torch
import sys

for cond, cnt in (("train", 500000), ("test", 500000)):
    data_path = Path(f"../data/makedata/{cond}")
    input_array = []
    target_array = []
    print(f"reading {cond} files")
    for i in tqdm(range(cnt)):
        input_path = data_path / f"index/input_{i+1}.csv"
        target_path = data_path / f"accessibility/target_{i+1}.csv"

        input_array.append(torch.Tensor(np.loadtxt(input_path, delimiter=",", dtype=np.float).astype(np.int)))
        target_array.append(torch.Tensor(np.loadtxt(target_path, delimiter=",", dtype=np.float)))
    print(f"saving to input_{cond}.pkl")
    pickle.dump(torch.stack(input_array), open(f"../data/input_{cond}.pkl", 'wb'))
        
    print(f"saving to target_{cond}.pkl")
    pickle.dump(torch.stack(target_array), open(f"../data/target_{cond}.pkl", 'wb'))
