In [1]:
import numpy as np

import random
import json

import torch
from torch import nn
from torch.utils.data import DataLoader, SubsetRandomSampler

import datetime
import sys 
import os
sys.path.append(os.path.abspath('/tank/local/ndf3868/GODDS/GAN'))

# file_path = os.path.realpath(__file__)

from utils import ASV_DATASET, Generator, Discriminator, Whispers, train, test_data, set_up_metrics_list

In [2]:
def get_current_timestamp():
    now = datetime.datetime.now()
    timestamp = now.strftime("%d%m%y-%H:%M:%S")
    return timestamp

def set_seed(seed: int = 42) -> None:
    random.seed(seed)

    np.random.seed(seed)

    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    
    os.environ["PYTHONHASHSEED"] = str(seed)
    print(f"Random seed set as {seed}")

def get_balanced_indeces(dataset, n_samples_per_class, shuffle):
    class_indeces = []

    for label, indices in dataset.class_indeces.items():
        chosen_indeces = np.random.choice(indices, n_samples_per_class, replace=False)
        class_indeces.extend(chosen_indeces)
        print("For class", label, "chose", len(chosen_indeces), "objects from dataset")
    
    if shuffle: np.random.shuffle(class_indeces)
    return class_indeces

In [3]:

device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [4]:
set_seed(3407)

bonafide_class      = 0

bootstrap_iterations= 10
n_epochs            = 15

input_size  = 190_000
hidden_size = 200
output_size = 1

lr = 1e-8

batch_size = 16

Random seed set as 3407


In [5]:
# Generator & Optimizer for Generator
gen = Generator(input_size, hidden_size).to(device)
gen_opt = torch.optim.Adam(gen.parameters(), lr=lr)

# Discriminator & Optimizer for Discriminator
disc = Discriminator(input_size, hidden_size, output_size).to(device)
disc_opt = torch.optim.Adam(disc.parameters(), lr=lr)

criterion = nn.MSELoss()

In [6]:
asv_directory = '/tank/local/ndf3868/GODDS/datasets/ASV'

print("reading TRAIN dataset")
train_dataset = ASV_DATASET(asv_directory, 'train', 'LA', class_balance=None) #oversample undersample undersample_all

print("reading TEST  dataset")
test_dataset = ASV_DATASET(asv_directory, 'dev', 'LA', class_balance=None)

reading TRAIN dataset


Reading target:   0%|          | 0/25380 [00:00<?, ?it/s]

Size of dataset 25380
Finished reading
reading TEST  dataset


Reading target:   0%|          | 0/24844 [00:00<?, ?it/s]

Size of dataset 24844
Finished reading


In [7]:
print("sampling TEST dataset")
sampler = SubsetRandomSampler(get_balanced_indeces(dataset=test_dataset, n_samples_per_class=2500, shuffle=False))
test_dataloader  = DataLoader(test_dataset,  batch_size=batch_size, shuffle=False, sampler=sampler)

sampling TEST dataset
For class 0 chose 2500 objects from dataset
For class 1 chose 2500 objects from dataset


In [8]:
set_seed(3407)
for _ in range(bootstrap_iterations):#bootstrap_iterations):

    print("sampling TRAIN dataset for bootstrap iteration", _)
    sampler = SubsetRandomSampler(get_balanced_indeces(dataset=train_dataset, n_samples_per_class=2500, shuffle=True))
    # dataset = CustomAudioDataset(audio_samples, targets)
    train_dataloader = DataLoader(train_dataset, batch_size=batch_size, sampler=sampler)


    # print(get_current_timestamp())
    train(train_dataloader, gen, disc, 
        criterion, disc_opt, gen_opt, 
        15, device)
    
    logs_dir = '/tank/local/ndf3868/GODDS/GAN/logs'
    metrics_list    = set_up_metrics_list(train_dataset.bonafide_class)
    metrics         = test_data(metrics_list, gen, disc, test_dataloader, device)
    timestamp = f'{get_current_timestamp()}'
    # timestamp = 'None'
    with open(os.path.join(logs_dir, f"{timestamp}_sample_iteration_{_}.json"), "w") as outfile: 
        json.dump(metrics, outfile)
    # print(metrics)

Random seed set as 3407
sampling TRAIN dataset for bootstrap iteration 0
For class 0 chose 2500 objects from dataset
For class 1 chose 2500 objects from dataset


description:   0%|          | 0/313 [00:00<?, ?it/s]

description:   0%|          | 0/313 [00:00<?, ?it/s]

description:   0%|          | 0/313 [00:00<?, ?it/s]

description:   0%|          | 0/313 [00:00<?, ?it/s]

description:   0%|          | 0/313 [00:00<?, ?it/s]

description:   0%|          | 0/313 [00:00<?, ?it/s]

description:   0%|          | 0/313 [00:00<?, ?it/s]

description:   0%|          | 0/313 [00:00<?, ?it/s]

description:   0%|          | 0/313 [00:00<?, ?it/s]

description:   0%|          | 0/313 [00:00<?, ?it/s]

description:   0%|          | 0/313 [00:00<?, ?it/s]

description:   0%|          | 0/313 [00:00<?, ?it/s]

description:   0%|          | 0/313 [00:00<?, ?it/s]

description:   0%|          | 0/313 [00:00<?, ?it/s]

description:   0%|          | 0/313 [00:00<?, ?it/s]

Evaluate disc:   0%|          | 0/313 [00:00<?, ?it/s]


-----
SUM using threshold 2500.0 noise 5000 clean 5000
SUM as is 2500.0 noise 2505.131299853325 clean 2511.837711453438
Lengths 5000 noise 5000 clean 5000

----
Examples
 [1. 0. 0. 0. 1. 0. 1. 0. 0. 0.] [0.5010232  0.50102633 0.5010251  0.5010247  0.5010249  0.501025
 0.50102204 0.50102836 0.50102705 0.50102764] [0.5038026  0.5016787  0.5020973  0.50132066 0.50209284 0.5027038
 0.50446016 0.5021147  0.5024534  0.50285375] 
-----

sampling TRAIN dataset for bootstrap iteration 1
For class 0 chose 2500 objects from dataset
For class 1 chose 2500 objects from dataset


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


description:   0%|          | 0/313 [00:00<?, ?it/s]

description:   0%|          | 0/313 [00:00<?, ?it/s]

description:   0%|          | 0/313 [00:00<?, ?it/s]

description:   0%|          | 0/313 [00:00<?, ?it/s]

description:   0%|          | 0/313 [00:00<?, ?it/s]

description:   0%|          | 0/313 [00:00<?, ?it/s]

description:   0%|          | 0/313 [00:00<?, ?it/s]

description:   0%|          | 0/313 [00:00<?, ?it/s]

description:   0%|          | 0/313 [00:00<?, ?it/s]

description:   0%|          | 0/313 [00:00<?, ?it/s]

description:   0%|          | 0/313 [00:00<?, ?it/s]

description:   0%|          | 0/313 [00:00<?, ?it/s]

description:   0%|          | 0/313 [00:00<?, ?it/s]

description:   0%|          | 0/313 [00:00<?, ?it/s]

description:   0%|          | 0/313 [00:00<?, ?it/s]

Evaluate disc:   0%|          | 0/313 [00:00<?, ?it/s]


-----
SUM using threshold 2500.0 noise 0 clean 3503
SUM as is 2500.0 noise 2492.9759769141674 clean 2507.814911067486
Lengths 5000 noise 5000 clean 5000

----
Examples
 [1. 1. 0. 1. 1. 0. 1. 0. 1. 1.] [0.49859372 0.49859384 0.49859664 0.4985945  0.4985927  0.49859527
 0.49859527 0.49859703 0.4985975  0.49859586] [0.5065608  0.5019731  0.49884433 0.50103825 0.5050346  0.49983555
 0.50522405 0.5003727  0.5001227  0.50022036] 
-----

sampling TRAIN dataset for bootstrap iteration 2
For class 0 chose 2500 objects from dataset
For class 1 chose 2500 objects from dataset


description:   0%|          | 0/313 [00:00<?, ?it/s]

description:   0%|          | 0/313 [00:00<?, ?it/s]

description:   0%|          | 0/313 [00:00<?, ?it/s]

description:   0%|          | 0/313 [00:00<?, ?it/s]

description:   0%|          | 0/313 [00:00<?, ?it/s]

description:   0%|          | 0/313 [00:00<?, ?it/s]

description:   0%|          | 0/313 [00:00<?, ?it/s]

description:   0%|          | 0/313 [00:00<?, ?it/s]

description:   0%|          | 0/313 [00:00<?, ?it/s]

description:   0%|          | 0/313 [00:00<?, ?it/s]

description:   0%|          | 0/313 [00:00<?, ?it/s]

description:   0%|          | 0/313 [00:00<?, ?it/s]

description:   0%|          | 0/313 [00:00<?, ?it/s]

description:   0%|          | 0/313 [00:00<?, ?it/s]