In [1]:
from FID_helper import *
import random
import torch.nn.functional as F
import torch
import numpy as np
import torch.nn as nn
from scipy.stats import entropy, wasserstein_distance

In [2]:
def load_data(dataset_name, seq_len, sample_num):
    seq = np.load(dataset_name, allow_pickle=True)
    seq = seq.tolist()
    seq_sample = random.sample(seq, sample_num)

    return  seq2onehot(seq_sample, seq_len)

In [10]:
def get_am_score(syn_seq_name, nat_seq_name, model, seq_len):
    eps = 1e-16
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    sample_num = 1000
    syn_seq = load_data(syn_seq_name, seq_len, sample_num)

    syn_seq_2d = syn_seq.reshape(sample_num, seq_len, 4, 1)
    syn_seq_2d = train_dataset(syn_seq_2d)
    syn_seq_2d = DataLoader(syn_seq_2d, batch_size=1024, shuffle=False)

    nat_seq = load_data(nat_seq_name, seq_len, sample_num)
    nat_seq_2d = nat_seq.reshape(sample_num, seq_len, 4, 1)
    nat_seq_2d = train_dataset(nat_seq_2d)
    nat_seq_2d = DataLoader(nat_seq_2d, batch_size=1024, shuffle=False)

    model.to(device)

    with torch.no_grad():
        for batch in syn_seq_2d:
            batch = batch['feature'].to(device)
            syn_preds = model(batch).squeeze(1)
        for batch in nat_seq_2d:
            batch = batch['feature'].to(device)
            nat_preds = model(batch).squeeze(1)
    am_score = entropy(syn_preds.cpu(), nat_preds.cpu())
    # print(nat_preds.shape, syn_preds.shape)
    # am_score = wasserstein_distance(syn_preds.cpu(), nat_preds.cpu())
    # print(syn_preds.cpu()[:50])
    # print(nat_preds.cpu()[:50])

    return am_score


In [7]:
class PREDICT_1(nn.Module):

    def __init__(self):
        super().__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(50, 100, kernel_size=(6, 1), padding=(3, 0)),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2, 1))
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(100, 200, kernel_size=(5, 1), padding=(3, 0)),
            nn.ReLU()
        )
        self.layer3 = nn.Sequential(
            nn.Conv2d(200, 200, kernel_size=(6, 1), padding=(3, 0)),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2, 1)),
            nn.Flatten()
        )
        self.fc = nn.Sequential(
            nn.Linear(400, 1024),
            nn.ReLU(),
            nn.Linear(1024, 1)
        )

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.fc(x)
        # x = x.squeeze(-1)
        # x = x.squeeze(-1)
        return x

model = PREDICT_1()
model.load_state_dict(torch.load('fid_model/promoter_1/CNN_train.pth'))
model.eval()
dif_is = []
gan_is = []
print("ep.\t gan\t dif")
for i in range(130):
    if i % 10 == 9:
        gan_epoch_is = get_am_score("syn_seq/promoter_1/new_seq_epoch_{}.npy".format(i), "data/promoter_1.npy", model, 50)
        dif_epoch_is = get_am_score("gan_seq/promoter_1/samples_{}.npy".format(i), "data/promoter_1.npy", model, 50)
        dif_is.append(dif_epoch_is)
        gan_is.append(gan_epoch_is)
        print("{}\t {:.4f}\t {:.4f}".format(i, gan_epoch_is, dif_epoch_is))
        # print(dif_epoch_is, gan_epoch_is)



ep.	 gan	 dif
9	 0.0026	 0.0038
19	 0.0026	 0.0026
29	 0.0025	 0.0026
39	 0.0026	 0.0025
49	 0.0025	 0.0027
59	 0.0026	 0.0025
69	 0.0027	 0.0027
79	 0.0023	 0.0025
89	 0.0026	 0.0026
99	 0.0026	 0.0025
109	 0.0027	 0.0026
119	 0.0025	 0.0026
129	 0.0030	 0.0024


In [11]:
class PREDICT_2(nn.Module):

    def __init__(self):
        super().__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(165, 300, kernel_size=(6, 1), padding=(3, 0)),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2, 1))
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(300, 600, kernel_size=(5, 1), padding=(3, 0)),
            nn.ReLU()
        )
        self.layer3 = nn.Sequential(
            nn.Conv2d(600, 600, kernel_size=(6, 1), padding=(3, 0)),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2, 1)),
            nn.Flatten()
        )
        self.fc = nn.Sequential(
            nn.Linear(1200, 1024),
            nn.ReLU(),
            nn.Linear(1024, 1)
        )

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.fc(x)
        # x = x.squeeze(-1)
        # x = x.squeeze(-1)
        return x

model = PREDICT_2()
model.load_state_dict(torch.load('fid_model/promoter_2/CNN_train.pth'))
model.eval()
dif_fid = []
gan_fid = []
print("ep.\t gan\t dif")
for i in range(130):
    if i % 10 == 9:
        dif_fid.append(get_am_score("syn_seq/promoter_2/new_seq_epoch_{}.npy".format(i), "data/promoter_2.npy", model, 165))
        gan_fid.append(get_am_score("gan_seq/promoter_2/samples_{}.npy".format(i), "data/promoter_2.npy", model, 165))
        print("{}\t {:.4f}\t {:.4f}".format(i, gan_fid[-1], dif_fid[-1]))




ep.	 gan	 dif
9	 3.8089	 0.1411
19	 0.2065	 0.2446
29	 0.2710	 0.0652
39	 0.3697	 0.0786
49	 0.3968	 0.0209
59	 0.1272	 0.1736
69	 0.1622	 0.0875
79	 0.1508	 0.0685
89	 0.1202	 0.1340
99	 0.1667	 0.0941
109	 0.1337	 0.0399
119	 0.1355	 0.1380
129	 0.1527	 0.0267


In [9]:
class PREDICT_3(nn.Module):

    def __init__(self):
        super().__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(118, 200, kernel_size=(6, 1), padding=(3, 0)),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2, 1))
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(200, 400, kernel_size=(5, 1), padding=(3, 0)),
            nn.ReLU()
        )
        self.layer3 = nn.Sequential(
            nn.Conv2d(400, 400, kernel_size=(6, 1), padding=(3, 0)),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2, 1)),
            nn.Flatten()
        )
        self.fc = nn.Sequential(
            nn.Linear(800, 1024),
            nn.ReLU(),
            nn.Linear(1024, 1)
        )

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.fc(x)
        # x = x.squeeze(-1)
        # x = x.squeeze(-1)
        return x

model = PREDICT_3()
model.load_state_dict(torch.load('fid_model/promoter_3/CNN_train.pth'))
model.eval()
dif_fid = []
gan_fid = []
print("ep.\t gan\t dif")
for i in range(130):
    if i % 10 == 9:

        dif_fid.append(get_am_score("syn_seq/promoter_3/new_seq_epoch_{}.npy".format(i), "data/promoter_3.npy", model, 118))
        gan_fid.append(get_am_score("gan_seq/promoter_3/samples_{}.npy".format(i), "data/promoter_3.npy", model, 118))
        print("{}\t {:.4f}\t {:.4f}".format(i, gan_fid[-1], dif_fid[-1]))




ep.	 gan	 dif
9	 0.0192	 0.0233
19	 0.0218	 0.0326
29	 0.0220	 0.0305
39	 0.0197	 0.0317
49	 0.0211	 0.0267
59	 0.0219	 0.0288
69	 0.0218	 0.0332
79	 0.0230	 0.0301
89	 0.0210	 0.0360
99	 0.0222	 0.0316
109	 0.0221	 0.0297
119	 0.0205	 0.0307
129	 0.0221	 0.0320


In [14]:
help(DataLoader)

Help on class DataLoader in module torch.utils.data.dataloader:

class DataLoader(typing.Generic)
 |  DataLoader(dataset: torch.utils.data.dataset.Dataset[+T_co], batch_size: Optional[int] = 1, shuffle: Optional[bool] = None, sampler: Union[torch.utils.data.sampler.Sampler, Iterable, NoneType] = None, batch_sampler: Union[torch.utils.data.sampler.Sampler[Sequence], Iterable[Sequence], NoneType] = None, num_workers: int = 0, collate_fn: Optional[Callable[[List[~T]], Any]] = None, pin_memory: bool = False, drop_last: bool = False, timeout: float = 0, worker_init_fn: Optional[Callable[[int], NoneType]] = None, multiprocessing_context=None, generator=None, *, prefetch_factor: int = 2, persistent_workers: bool = False, pin_memory_device: str = '')
 |  
 |  Data loader. Combines a dataset and a sampler, and provides an iterable over
 |  the given dataset.
 |  
 |  The :class:`~torch.utils.data.DataLoader` supports both map-style and
 |  iterable-style datasets with single- or multi-process l