In [25]:
# run this block *first* every time your kernel starts/restarts
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "6"  # either 3 or 6

In [26]:
GENOMES = { "mouse" : "/users/kcochran/genomes/mm10_no_alt_analysis_set_ENCODE.fasta",
            "human" : "/users/kcochran/genomes/GRCh38_no_alt_analysis_set_GCA_000001405.15.fasta" }

ROOT = "/users/kcochran/projects/cs197_cross_species_domain_adaptation/"
DATA_DIR = ROOT + "data/"

EPSILON = 0.00001
SPECIES = ["mouse", "human"]

ACCESSIBILITY_FILE = "/users/angezhao/cs197_covariates/data/human/accessibility/reads.bigWig"
POS_BIND = "/users/angezhao/cs197_covariates/data/human/CTCF/chr3toY_pos_shuf.bed.gz"
NEG_BIND = "/users/angezhao/cs197_covariates/data/human/ctcf/chr3toY_neg_shuf.bed.gz"
#WINDOWS = np.array(mouse_false_negatives_windows)
train_species = "mouse"
tf = "CTCF"

TFS = ["CTCF", "CEBPA", "HNF4A", "RXRA"]

# Data Loaders

In [27]:
import gzip
import random
import numpy as np
from pyfaidx import Fasta
from torch.utils.data import Dataset
import pyBigWig
import torch
torch.device('cuda')
# from torch.utils.data import Dataset
#import pytorch
torch.__version__

'1.8.1'

In [28]:
def expand_window(start, end, target_len):
    midpoint = (start + end) / 2
    if not midpoint.is_integer() and target_len % 2 == 0:
        midpoint = midpoint - 0.5
    if midpoint.is_integer() and target_len % 2 != 0:
        midpoint = midpoint - 0.5
    new_start = midpoint - target_len / 2
    new_end = midpoint + target_len / 2
    
    assert new_start.is_integer(), new_start
    assert new_end.is_integer(), new_end
    assert new_start >= 0
    assert new_end - new_start == target_len, (new_end, new_start, target_len)
    
    return int(new_start), int(new_end)



In [29]:
class TrainGenerator(Dataset):
    # must also load in accessibility here
    # see profile model starter code for train_generator
    # remove the control track, replace the bit w/ not control track with accessibility
    # print out shapes

    letter_dict = {
        'a':[1,0,0,0],'c':[0,1,0,0],'g':[0,0,1,0],'t':[0,0,0,1],
        'n':[0,0,0,0],'A':[1,0,0,0],'C':[0,1,0,0],'G':[0,0,1,0],
        'T':[0,0,0,1],'N':[0,0,0,0]
    }

    def __init__(self, species, tf):
        self.posfile = DATA_DIR + species + "/" + tf + "/chr3toY_pos_shuf.bed.gz"
        self.negfile = DATA_DIR + species + "/" + tf + "/chr3toY_neg_shuf_run1_1E.bed.gz"
        #set accessibility according to correct species
        ACCESSIBILITY_FILE = "/users/angezhao/cs197_covariates/data/" + species + "/accessibility/reads.bigWig"
        self.accessFile = ACCESSIBILITY_FILE
        self.converter = Fasta(GENOMES[species])
        self.batchsize = 400
        self.halfbatchsize = self.batchsize // 2
        self.current_epoch = 1

        self.get_coords()
        self.on_epoch_end()

        self.profiles, self.logcounts_sum, self.logcounts_mean, self.logcounts_max = [0,0,0,1]#self.get_profiles_and_logcounts(self.coords)

    def __len__(self):
        return self.steps_per_epoch


    def get_coords(self):
        with gzip.open(self.posfile) as posf:
            pos_coords_tmp = [line.decode().split()[:3] for line in posf]  # expecting bed file format
            self.pos_coords = [(coord[0], int(coord[1]), int(coord[2])) for coord in pos_coords_tmp]  # no strand consideration
        with gzip.open(self.negfile) as negf:
            neg_coords_tmp = [line.decode().split()[:3] for line in negf]
            self.neg_coords = [(coord[0], int(coord[1]), int(coord[2])) for coord in neg_coords_tmp]
            
        # Load in all accessibility for positive coords
        self.pos_profiles = []
        self.neg_profiles = []
        with pyBigWig.open(self.accessFile) as accessibility_reader:
            for chrom, start, end in self.pos_coords:
                #print (chrom, " is chrom. ", start, " is start. ", end, " is end.")
                prof_start, prof_end = expand_window(start, end,
                                                 500 + 2 * 0)
                pos_profile = np.array(
                    accessibility_reader.values(chrom, prof_start, prof_end))
                pos_profile[np.isnan(pos_profile)] = EPSILON
                profile = np.array(pos_profile)

                self.pos_profiles.append(profile)
             
            # load in accessibility for negative coords
            for chrom, start, end in self.neg_coords:
                #print (chrom, " is chrom. ", start, " is start. ", end, " is end.")
                prof_start, prof_end = expand_window(start, end,
                                                 500 + 2 * 0)
                neg_profile = np.array(
                    accessibility_reader.values(chrom, prof_start, prof_end))
                neg_profile[np.isnan(neg_profile)] = EPSILON
                profile = np.array(neg_profile)

                self.neg_profiles.append(profile)
        
            
        self.steps_per_epoch = int(len(self.pos_coords) / self.halfbatchsize)
        print(self.steps_per_epoch)
                
    def get_profiles_and_logcounts(self, coords):
        #print("inside profiles")
        profiles = []
        logcounts_sum = []
        logcounts_mean = []
        logcounts_max = []

        with pyBigWig.open(self.accessFile) as accessibility_reader:
            for chrom, start, end in coords:
                #print (chrom, " is chrom. ", start, " is start. ", end, " is end.")
                prof_start, prof_end = expand_window(start, end,
                                                 500 + 2 * 0)
                pos_profile = np.array(
                    accessibility_reader.values(chrom, prof_start, prof_end))
                pos_profile[np.isnan(pos_profile)] = EPSILON
                profile = np.array(pos_profile)

                profiles.append(profile)
                logcounts_sum.append(np.array(np.sum(pos_profile)))
                logcounts_mean.append(np.array(np.mean(pos_profile)))
                logcounts_max.append(np.array(np.max(pos_profile)))
                
        #print("Finished going thorugh it all. ")        
        profiles = np.array(profiles)
        logcounts_sum = np.array(logcounts_sum)
        logcounts_mean = np.array(logcounts_mean)
        logcounts_max = np.array(logcounts_max)

        return profiles, logcounts_sum, logcounts_mean, logcounts_max
    
    def convert(self, coords):
        seqs_onehot = []
        #with pyBigWig.open(self.accessFile) as acc_bw_reader:
        for coord in coords:
            chrom, start, stop = coord
            #prof_start, prof_end = expand_window(start, stop,
             #                                500 + 2 * 0)
            #pos_profile = np.array(
            #    acc_bw_reader.values(chrom, prof_start, prof_end))
            seq = self.converter[chrom][start:stop].seq
            seq_onehot = np.array([self.letter_dict.get(x,[0,0,0,0]) for x in seq])
            #print(seq_onehot, "convert traingenerator");
            seqs_onehot.append(seq_onehot)

        seqs_onehot = np.array(seqs_onehot)
        return seqs_onehot

    def addAccess(self, coords, seqs_onehot):
        with pyBigWig.open(self.accessFile) as acc_bw_reader:
            for coord in coords:
                chrom, start, stop = coord
                seq = self.converter[chrom][start:stop].seq
                
        return seqs_onehot

    def __getitem__(self, batch_index):	
        # First, get chunk of coordinates
        pos_coords_batch = self.pos_coords[batch_index * self.halfbatchsize : (batch_index + 1) * self.halfbatchsize]
        neg_coords_batch = self.neg_coords[batch_index * self.halfbatchsize : (batch_index + 1) * self.halfbatchsize]
        pos_access_batch = self.pos_profiles[batch_index * self.halfbatchsize : (batch_index + 1) * self.halfbatchsize]
        neg_access_batch = self.neg_profiles[batch_index * self.halfbatchsize : (batch_index + 1) * self.halfbatchsize]

        # if train_steps calculation is off, lists of coords may be empty
        assert len(pos_coords_batch) > 0, len(pos_coords_batch)
        assert len(neg_coords_batch) > 0, len(neg_coords_batch)

        # Second, convert the coordinates into one-hot encoded sequences
        pos_onehot = self.convert(pos_coords_batch)
        neg_onehot = self.convert(neg_coords_batch)
        
        #add in access
        
        # seqdataloader returns empty array if coords are empty list or not in genome
        assert pos_onehot.shape[0] > 0, pos_onehot.shape[0]
        assert neg_onehot.shape[0] > 0, neg_onehot.shape[0]

        # Third, combine bound and unbound sites into one large array, and create label vector
        # We don't need to shuffle here because all these examples will correspond
        # to a simultaneous gradient update for the whole batch
        all_seqs = np.concatenate((pos_onehot, neg_onehot))
        labels = np.concatenate((np.ones(pos_onehot.shape[0],), np.zeros(neg_onehot.shape[0],)))
        
        all_seqs = torch.tensor(all_seqs, dtype=torch.float).permute(0, 2, 1)
        labels = torch.tensor(labels, dtype=torch.float)
        assert all_seqs.shape[0] == self.batchsize, all_seqs.shape[0]
        
        #Getting Accessibility for these coordinate
        #profiles1, logcounts_sum1, logcounts_mean1, logcounts_max1 = self.get_profiles_and_logcounts(pos_coords_batch)
        #profiles, logcounts_sum, logcounts_mean, logcounts_max = self.get_profiles_and_logcounts(neg_coords_batch)
        profiles = np.concatenate((pos_access_batch, neg_access_batch))
        profiles = torch.tensor(profiles, dtype=torch.float)
        #self.logcounts_sum = logcounts_sum1 + logcounts_sum
        #self.logcounts_mean = logcounts_max1 + logcounts_max

        return all_seqs, labels, profiles


    def on_epoch_end(self):
        # switch to next set of negative examples
        prev_epoch = self.current_epoch
        next_epoch = prev_epoch + 1

        # update file where we will retrieve unbound site coordinates from
        prev_negfile = self.negfile
        next_negfile = prev_negfile.replace(str(prev_epoch) + "E", str(next_epoch) + "E")
        self.negfile = next_negfile

        # load in new unbound site coordinates
        with gzip.open(self.negfile) as negf:
            neg_coords_tmp = [line.decode().split()[:3] for line in negf]
            self.neg_coords = [(coord[0], int(coord[1]), int(coord[2])) for coord in neg_coords_tmp]

        # then shuffle positive examples
        random.shuffle(self.pos_coords)

In [30]:
class ValGenerator(Dataset):
    letter_dict = {
        'a':[1,0,0,0],'c':[0,1,0,0],'g':[0,0,1,0],'t':[0,0,0,1],
        'n':[0,0,0,0],'A':[1,0,0,0],'C':[0,1,0,0],'G':[0,0,1,0],
        'T':[0,0,0,1],'N':[0,0,0,0]}

    def __init__(self, species, tf, return_labels = True):
        self.valfile = DATA_DIR + species + "/" + tf + "/chr1_random_1m.bed.gz"
        self.converter = Fasta(GENOMES[species])
        self.batchsize = 1000  # arbitrarily large number that will fit into memory
        self.return_labels = return_labels
        ACCESSIBILITY_FILE = "/users/angezhao/cs197_covariates/data/" + species + "/accessibility/reads.bigWig"
        self.accessFile = ACCESSIBILITY_FILE
        self.get_coords_and_labels()
        


    def __len__(self):
        return self.steps_per_epoch


    def get_coords_and_labels(self):
        with gzip.open(self.valfile) as f:
            coords_tmp = [line.decode().split()[:4] for line in f]  # expecting bed file format
        
        self.labels = [int(coord[3]) for coord in coords_tmp]
        self.coords = [(coord[0], int(coord[1]), int(coord[2])) for coord in coords_tmp]  # no strand consideration
        
        self.steps_per_epoch = int(len(self.coords) / self.batchsize)
        # Load in all accessibility for positive coords
        self.profiles = []

        with pyBigWig.open(self.accessFile) as accessibility_reader:
            for chrom, start, end in self.coords:
                #print (chrom, " is chrom. ", start, " is start. ", end, " is end.")
                prof_start, prof_end = expand_window(start, end,
                                                 500 + 2 * 0)
                pos_profile = np.array(
                    accessibility_reader.values(chrom, prof_start, prof_end))
                pos_profile[np.isnan(pos_profile)] = EPSILON
                profile = np.array(pos_profile)

                self.profiles.append(profile)
        

    def convert(self, coords):
        seqs_onehot = []
        
        for coord in coords:
            chrom, start, stop = coord
            seq = self.converter[chrom][start:stop].seq
            seq_onehot = np.array([self.letter_dict.get(x,[0,0,0,0]) for x in seq])
            seqs_onehot.append(seq_onehot)

        seqs_onehot = np.array(seqs_onehot)
        return seqs_onehot

    def get_profiles_and_logcounts(self, coords):
        #print("inside profiles")
        profiles = []
        logcounts_sum = []
        logcounts_mean = []
        logcounts_max = []

        with pyBigWig.open(self.accessFile) as accessibility_reader:
            for chrom, start, end in coords:
                #print (chrom, " is chrom. ", start, " is start. ", end, " is end.")
                prof_start, prof_end = expand_window(start, end,
                                                 500 + 2 * 0)
                pos_profile = np.array(
                    accessibility_reader.values(chrom, prof_start, prof_end))
                pos_profile[np.isnan(pos_profile)] = EPSILON
                profile = np.array(pos_profile)

                profiles.append(profile)
                logcounts_sum.append(np.array(np.sum(pos_profile)))
                logcounts_mean.append(np.array(np.mean(pos_profile)))
                logcounts_max.append(np.array(np.max(pos_profile)))
                
        #print("Finished going thorugh it all. ")        
        profiles = np.array(profiles)
        logcounts_sum = np.array(logcounts_sum)
        logcounts_mean = np.array(logcounts_mean)
        logcounts_max = np.array(logcounts_max)

        return profiles, logcounts_sum, logcounts_mean, logcounts_max

    def __getitem__(self, batch_index):	
        # First, get chunk of coordinates
        batch_start = batch_index * self.batchsize
        batch_end = (batch_index + 1) * self.batchsize
        coords_batch = self.coords[batch_start : batch_end]
        profiles = self.profiles[batch_start : batch_end]
        # if train_steps calculation is off, lists of coords may be empty
        assert len(coords_batch) > 0, len(coords_batch)

        # Second, convert the coordinates into one-hot encoded sequences
        onehot = self.convert(coords_batch)

        # array will be empty if coords are not found in the genome
        assert onehot.shape[0] > 0, onehot.shape[0]

        onehot = torch.tensor(onehot, dtype=torch.float).permute(0, 2, 1)
        profiles = torch.tensor(profiles, dtype=torch.float)
        
        if self.return_labels:
            labels = self.labels[batch_start : batch_end]
            labels = torch.tensor(labels, dtype=torch.float)
            return onehot, labels, profiles
        else:
            return onehot

# Model Training And Evaluation

In [31]:
# Performance metric functions

from sklearn.metrics import average_precision_score, roc_auc_score, confusion_matrix, log_loss


def print_metrics(preds, labels):
    preds = np.array(preds)
    labels = np.array(labels)
    preds = preds.squeeze()

    # this is the binary cross-entropy loss, same as in training
    print("Loss:\t", log_loss(labels, preds))
    print("auROC:\t", roc_auc_score(labels, preds))
    auPRC = average_precision_score(labels, preds)
    print("auPRC:\t", auPRC)
    print_confusion_matrix(preds, labels)
    return auPRC

def print_confusion_matrix(preds, labels):
    npthresh = np.vectorize(lambda t: 1 if t >= 0.5 else 0)
    preds_binarized = npthresh(preds)
    conf_matrix = confusion_matrix(labels, preds_binarized)
    print("Confusion Matrix (at t = 0.5):\n", conf_matrix)

In [32]:
import torch
from torch.utils.data import DataLoader



class BasicModel(torch.nn.Module):
    def __init__(self):
        super(BasicModel, self).__init__()
        self.input_seq_len = 500
        num_conv_filters = 240
        lstm_hidden_units = 32
        fc_layer1_units = 1024
        fc_layer2_units = 512
        
        #How do we know which species of data we should use???
        #Our added on data.
        #self.access_data = Generator(train_species, tf, "val")
        #inits for our Access
        self.convAccess = torch.nn.Conv1d(1, 10, kernel_size=10, padding=0)
        self.lstmAccess = torch.nn.LSTM(input_size=10,
                                  hidden_size=32,
                                  batch_first=True)
        # Defining the layers to go into our model
        # (see the forward function for how they fit together)
        self.conv = torch.nn.Conv1d(4, num_conv_filters, kernel_size=20, padding=0)
        # self.conv = torch.nn.Conv1d(1, num_conv_filters, kernel_size=5, padding=0) -> accessibility
        self.relu = torch.nn.ReLU()
        self.maxpool = torch.nn.MaxPool1d(15, stride=15, padding=0)
        self.lstm = torch.nn.LSTM(input_size=num_conv_filters,
                                  hidden_size=lstm_hidden_units,
                                  batch_first=True)
        self.fc1 = torch.nn.Linear(in_features=lstm_hidden_units,
                                   out_features=fc_layer1_units)
        self.dropout = torch.nn.Dropout(p=0.5)
        self.fc2 = torch.nn.Linear(in_features=fc_layer1_units,
                                   out_features=fc_layer2_units)
        self.fc_final = torch.nn.Linear(in_features=fc_layer2_units,
                                        out_features=1)
        self.sigmoid = torch.nn.Sigmoid()

        # The loss function we'll use -- binary cross-entropy
        # (this is the standard loss to use for binary classification)
        self.loss = torch.nn.BCELoss()

        # We'll store performance metrics during training in these lists
        self.train_loss_by_epoch = []
        self.source_val_loss_by_epoch = []
        self.source_val_auprc_by_epoch = []
        self.target_val_loss_by_epoch = []
        self.target_val_auprc_by_epoch = []

        # We'll record the best model we've seen yet each epoch
        self.best_state_so_far = self.state_dict()
        self.best_auprc_so_far = 1


    def forward(self, X, Y):
        #print("Inside forward")
        #print(X.traingen.profiles)
        #print("Y shape ", Y.shape)
        Y = Y.permute(1,0,2)
        #print("Y shape after ", Y.shape)
        Y_1 = self.relu(self.convAccess(Y))
        #print("Y_1 shape ", Y_1.shape)
        Y_2 = self.maxpool(Y_1).permute(0, 2, 1)
        #print("Y_2 shape ", Y_2.shape)
        Y_3, _ = self.lstmAccess(Y_2)
        #print("Y_3 shape ", Y_3.shape)
        Y_4 = Y_3[:, -1]
        #print("Y_4 shape ", Y_4.shape)
        X_1 = self.relu(self.conv(X))
        # LSTM is expecting input of shape (batches, seq_len, conv_filters)
        X_2 = self.maxpool(X_1).permute(0, 2, 1)
        X_3, _ = self.lstm(X_2)
        X_4 = X_3[:, -1]  # only need final output of LSTM
        #print("X_4 shape ", X_4.shape)
        XY_4 = torch.cat((X_4, Y_4))
        # pass in 2 xs to this function
        # concat the two xs at X_4, and then return 1 y
        
        
        X_5 = self.relu(self.fc1(X_4))
        X_6 = self.dropout(X_5)
        X_7 = self.sigmoid(self.fc2(X_6))
        y = self.sigmoid(self.fc_final(X_7)).squeeze()
        return y
    
    def validation(self, data_loader):
        # only run this within torch.no_grad() context!
        losses = []
        preds = []
        labels = []
        for seqs_onehot_batch, labels_batch, profiles in data_loader:
            # push batch through model, get predictions, calculate loss
            preds_batch = self(seqs_onehot_batch.squeeze().cuda(), profiles.cuda())
            labels_batch = labels_batch.squeeze()
            loss_batch = self.loss(preds_batch, labels_batch.cuda())
            losses.append(loss_batch.item())

            # storing labels + preds for auPRC calculation later
            labels.extend(labels_batch.detach().numpy())  
            preds.extend(preds_batch.cpu().detach().numpy())
            
        return np.array(losses), np.array(preds), np.array(labels)


    def fit(self, train_gen, source_val_data_loader, target_val_data_loader,
            optimizer, epochs=15):
        
        for epoch in range(epochs):
            torch.cuda.empty_cache()  # clear memory to keep stuff from blocking up
            
            print("=== Epoch " + str(epoch + 1) + " ===")
            print("Training...")
            self.train()
            #print (train_gen.profiles, " is profiles in fit")
            # using a batch size of 1 here because the generator returns
            # many examples in each batch
            train_data_loader = DataLoader(train_gen,
                               batch_size = 1, shuffle = True)

            train_losses = []
            train_preds = []
            train_labels = []
            for seqs_onehot_batch, labels_batch, profile_batch in train_data_loader:
                # reset the optimizer; need to do each batch after weight update
                optimizer.zero_grad()

                # push batch through model, get predictions, and calculate loss
                preds = self(seqs_onehot_batch.squeeze().cuda(), profile_batch.cuda())
                labels_batch = labels_batch.squeeze()
                loss_batch = self.loss(preds, labels_batch.cuda())
                
                # brackpropogate the loss and update model weights accordingly
                loss_batch.backward()
                optimizer.step()
                
                train_losses.append(loss_batch.item())
                train_labels.extend(labels_batch)
                train_preds.extend(preds.cpu().detach().numpy())

            self.train_loss_by_epoch.append(np.mean(train_losses))
            print_metrics(train_preds, train_labels)
            
            # load new set of negative examples for next epoch
            train_gen.on_epoch_end()

            
            # Assess model performance on same-species validation set
            print("Evaluating on source validation data...")
            
            # Since we don't use gradients during model evaluation,
            # the following two lines let the model predict for many examples
            # more efficiently (without having to keep track of gradients)
            self.eval()
            with torch.no_grad():
                source_val_losses, source_val_preds, source_val_labels = self.validation(source_val_data_loader)

                print("Validation loss:", np.mean(source_val_losses))
                self.source_val_loss_by_epoch.append(np.mean(source_val_losses))

                # calc auPRC over source validation set
                source_val_auprc = print_metrics(source_val_preds, source_val_labels)
                self.source_val_auprc_by_epoch.append(source_val_auprc)

                # check if this is the best performance we've seen so far
                # if yes, save the model weights -- we'll use the best model overall
                # for later analyses
                if source_val_auprc < self.best_auprc_so_far:
                    self.best_auprc_so_far = source_val_auprc
                    self.best_state_so_far = self.state_dict()
                
                
                # now repeat for target species data 
                print("Evaluating on target validation data...")
                
                target_val_losses, target_val_preds, target_val_labels = self.validation(target_val_data_loader)

                print("Validation loss:", np.mean(target_val_losses))
                self.target_val_loss_by_epoch.append(np.mean(target_val_losses))

                # calc auPRC over source validation set
                target_val_auprc = print_metrics(target_val_preds, target_val_labels)
                self.target_val_auprc_by_epoch.append(target_val_auprc)
                

# Setup + Train

In [36]:
# setup generators / data loaders for training and validation
tfType = "CEBPA"
source = "mouse"
target = "human"
# we'll make the training data loader in the training loop,
# since we need to update some of the examples used each epoch
train_gen = TrainGenerator(source, tfType)

source_val_gen = ValGenerator(source, tfType)
# using a batch size of 1 here because the generator returns
# many examples in each batch
source_val_data_loader = DataLoader(source_val_gen, batch_size = 1, shuffle = False)

target_val_gen = ValGenerator(target, tfType)
target_val_data_loader = DataLoader(target_val_gen, batch_size = 1, shuffle = False)

# consider issues of overfitting using auPRC in trainGen and ValGen -> issue arises from the fact that adding in accessibility
# should improve same species prediction performance
# check for overfitting -> consider reducing the number of parameters
# last resort: binarize accessibility files

2468


In [34]:

from __future__ import print_function
import sys  

In [38]:

#%%capture cap --no-stderr
#with open ('ModelOutput.txt', 'w') as f:
#    f.write(cap.stdout)
#print("Test")
# initialize the model
model = BasicModel()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# train!
model.cuda()
model.fit(train_gen, source_val_data_loader, target_val_data_loader, optimizer, epochs = 5)
model.load_state_dict(model.best_state_so_far)
model.cpu()



=== Epoch 1 ===
Training...
Loss:	 0.4620185729008359
auROC:	 0.8612548547009501
auPRC:	 0.858659238609381
Confusion Matrix (at t = 0.5):
 [[376427 117173]
 [106295 387305]]
Evaluating on source validation data...
Validation loss: 0.2743188961446285
Loss:	 0.274318883414389
auROC:	 0.9108266037438801
auPRC:	 0.22769938496729417
Confusion Matrix (at t = 0.5):
 [[871248 116707]
 [  2796   9249]]
Evaluating on target validation data...
Validation loss: 0.4305892720520496
Loss:	 0.4305892514164263
auROC:	 0.9049039086988327
auPRC:	 0.08892357997183793
Confusion Matrix (at t = 0.5):
 [[795749 197196]
 [  1006   6049]]
=== Epoch 2 ===
Training...
Loss:	 0.35866116750102695
auROC:	 0.9204518523609495
auPRC:	 0.9161829924048634
Confusion Matrix (at t = 0.5):
 [[412651  80949]
 [ 72910 420690]]
Evaluating on source validation data...
Validation loss: 0.4232696333229542
Loss:	 0.4232696132154192
auROC:	 0.9269294662091987
auPRC:	 0.26818732769487375
Confusion Matrix (at t = 0.5):
 [[794353 19360

KeyboardInterrupt: 

In [None]:
with open ('ModelOutput.txt', 'r') as f:
    print(f.read());