In [133]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader , Dataset
from torchvision import datasets
from torchvision.transforms import ToTensor
import torchvision.transforms as transforms
import torchvision

import nevergrad as ng

import numpy as np
import pandas as pd
import sklearn
from sklearn.preprocessing import MinMaxScaler,StandardScaler
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import copy
import os

import sys
sys.path.append("../")
sys.path.append("nevergrad/")



import models
from models import CNN_Simple as CNN_Simple
from models import All_CNN_C as All_CNN_C
from models import VAE as VAE
from models import LSTM as LSTM


from pytorch_optim_training_manager import train_manager
import torch
import torchvision
from datasets import load_dataset
import datasets
import torchvision.transforms as transforms
import models
import os
import re
from collections import Counter
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm

In [72]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
os.environ["WANDB_DISABLED"] = "true"
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="0"

P1

In [73]:
torch.manual_seed(0)
torch.cuda.manual_seed_all(0)
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Lambda(lambda x: x.double()),
    transforms.Normalize((0.5,), (0.5,))])
training_set = torchvision.datasets.FashionMNIST("./data", train=True, transform=transform, download=True)
validation_set = torchvision.datasets.FashionMNIST("./data", train=False, transform=transform, download=True)
training_loader = torch.utils.data.DataLoader(training_set, batch_size=64, shuffle=True)
validation_loader = torch.utils.data.DataLoader(validation_set, batch_size=64, shuffle=False)


In [74]:
def load_params(model, param_tensor):
    current_index = 0
    for param in model.parameters():
        param_length = param.numel()
        #print(param_length, param.size())
        param.data = param_tensor[current_index:current_index + param_length].reshape(param.size())
        current_index += param_length

In [75]:
class TrainManager:
    def __init__(self,model,dataloader_train,dataloader_test,loss,output):
        self.model = model
        self.best_model = None
        self.dataloader_train = dataloader_train
        self.dataloader_test = dataloader_test
        self.nb_batch = len(dataloader_train)
        self.best_score = 1e9
        self.loss = loss
        self.iteration = 0
        self.output = output
        self.best_output = output
        #self.best_accuracy = 0
    
    def weights_updating(self,weights):
        for n, layer in enumerate(self.model.parameters()):
            layer.data = torch.from_numpy(weights[n]).to(dtype=torch.double)
            
    def evaluate(self):
        #correct = 0
        total_loss = 0
        for x , y in self.dataloader_test :
            yhat = self.model(x)
            #print("yhat", yhat,"y", y)
            #correct += (torch.abs(yhat-y)<0.5).type(torch.double).sum().item() # Compute classification error
            loss = self.loss(yhat,y)
            total_loss += loss.item()
        #correct /= len(self.dataloader_test.dataset)
        total_loss /= len(self.dataloader_test)
        return total_loss#,correct

    def cost_function(self, parameters):
        load_params(self.model, torch.tensor(parameters, dtype=torch.double))

        test_loss = self.evaluate() #, accuracy = self.evaluate()
        if self.best_score > test_loss :
            self.best_score = test_loss
            self.best_model = copy.deepcopy(self.model)
        #if self.best_accuracy < accuracy :
        #    self.best_accuracy = accuracy
        #    self.best_model = copy.deepcopy(self.model)

        print(f'test loss function (crossentropyloss) : {test_loss}, best score : {self.best_score}') #precision : {(100*accuracy):>0.1f}% ,
        self.output[self.iteration,:] = np.array([test_loss])
        self.best_output[self.iteration,:] = np.array([self.best_score])
        self.iteration += 1
        return test_loss

In [76]:
model = CNN_Simple().to(device)
loss = nn.CrossEntropyLoss()
model.load_state_dict(torch.load('models/CNN_Simple.pt'))


<All keys matched successfully>

In [77]:
epochs = 20

output = np.empty((epochs,2))

trainer = TrainManager(model,training_loader,validation_loader,loss,output)
fitness = trainer.cost_function

# Compute number of parameters of the model + initialize parametrization
num_params = sum(p.numel() for p in model.parameters())
parametrization = ng.p.Array(shape=(num_params,))
optimizer = ng.optimizers.SPSA(parametrization=parametrization, budget=epochs)


In [78]:
learned_param = optimizer.minimize(fitness)

test loss function (mse) : 2.358402514401358, best score : 2.358402514401358
test loss function (mse) : 2.358886341232512, best score : 2.358402514401358
test loss function (mse) : 2.302994156365421, best score : 2.302994156365421
test loss function (mse) : 2.322891583824568, best score : 2.302994156365421
test loss function (mse) : 2.2989695839314948, best score : 2.2989695839314948
test loss function (mse) : 2.310800665366812, best score : 2.2989695839314948
test loss function (mse) : 2.3574915454361394, best score : 2.2989695839314948
test loss function (mse) : 2.299661727711368, best score : 2.2989695839314948
test loss function (mse) : 2.3560911883034836, best score : 2.2989695839314948
test loss function (mse) : 2.3245422305659735, best score : 2.2989695839314948
test loss function (mse) : 2.370400631278531, best score : 2.2989695839314948
test loss function (mse) : 2.3322889634747503, best score : 2.2989695839314948
test loss function (mse) : 2.3266613244104177, best score : 2.2

P2

In [79]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Lambda(lambda x: x.double())])

In [80]:
training_set = torchvision.datasets.MNIST('./data', train=True, transform=transform, download=True)
validation_set = torchvision.datasets.MNIST('./data', train=False, transform=transform, download=True)

In [81]:
training_loader = torch.utils.data.DataLoader(training_set, batch_size=64, shuffle=True)
validation_loader = torch.utils.data.DataLoader(validation_set, batch_size=64, shuffle=False)

In [82]:
model = models.VAE()

In [83]:
torch.save(model.state_dict(), 'models/VAE.pt')

In [84]:
class VAE_loss(torch.nn.Module):
    def __init__(self):
        super(VAE_loss, self).__init__()
        self.loss_fn = torch.nn.CrossEntropyLoss()
    def forward(self, preds, labels):
        x, mean, logvar = preds
        reproduction_loss = self.loss_fn(x, labels)
        return reproduction_loss

In [85]:
loss = VAE_loss()

Restart here for a different optimizer

In [86]:
model.load_state_dict(torch.load('models/VAE.pt'))

<All keys matched successfully>

In [87]:
optimizer = optimizer = ng.optimizers.SPSA(parametrization=parametrization, budget=epochs)


In [91]:
epochs = 50

output = np.empty((epochs,2))

trainer = TrainManager(model,training_loader,validation_loader,loss,output)
fitness = trainer.cost_function

# Compute number of parameters of the model + initialize parametrization
num_params = sum(p.numel() for p in model.parameters())
parametrization = ng.p.Array(shape=(num_params,))
optimizer = ng.optimizers.SPSA(parametrization=parametrization, budget=epochs)



In [92]:
learned_param = optimizer.minimize(fitness)

test loss function (mse) : 6.661176861385032, best score : 6.661176861385032
test loss function (mse) : 6.6712369771543765, best score : 6.661176861385032
test loss function (mse) : 6.66504180332468, best score : 6.661176861385032
test loss function (mse) : 6.667532498200626, best score : 6.661176861385032
test loss function (mse) : 6.66346659006216, best score : 6.661176861385032
test loss function (mse) : 6.665673173967799, best score : 6.661176861385032
test loss function (mse) : 6.659545614241042, best score : 6.659545614241042
test loss function (mse) : 6.679408236837802, best score : 6.659545614241042
test loss function (mse) : 6.669745445928182, best score : 6.659545614241042
test loss function (mse) : 6.647554329206105, best score : 6.647554329206105
test loss function (mse) : 6.664105181089007, best score : 6.647554329206105
test loss function (mse) : 6.664643962723846, best score : 6.647554329206105
test loss function (mse) : 6.657447295130584, best score : 6.647554329206105


P3

In [93]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Lambda(lambda x: x.double()),
    transforms.Normalize((0.5,), (0.5,))])

In [94]:
training_set = torchvision.datasets.FashionMNIST('./data', train=True, transform=transform, download=True)
validation_set = torchvision.datasets.FashionMNIST('./data', train=False, transform=transform, download=True)
training_loader = torch.utils.data.DataLoader(training_set, batch_size=128, shuffle=True)
validation_loader = torch.utils.data.DataLoader(validation_set, batch_size=128, shuffle=False)

In [95]:
model = models.VAE()

In [97]:
loss = VAE_loss()

In [98]:
torch.save(model.state_dict(), 'models/VAE.pt')

Restart here for different optimizers

In [99]:
model.load_state_dict(torch.load('models/VAE.pt'))

<All keys matched successfully>

In [100]:
optimizer = ng.optimizers.SPSA(parametrization=parametrization, budget=epochs)


In [101]:
epochs = 20

output = np.empty((epochs,2))

trainer = TrainManager(model,training_loader,validation_loader,loss,output)
fitness = trainer.cost_function

# Compute number of parameters of the model + initialize parametrization
num_params = sum(p.numel() for p in model.parameters())
parametrization = ng.p.Array(shape=(num_params,))
optimizer = ng.optimizers.SPSA(parametrization=parametrization, budget=epochs)


In [102]:
learned_param = optimizer.minimize(fitness)

test loss function (mse) : 6.642322217255193, best score : 6.642322217255193
test loss function (mse) : 6.664957735035388, best score : 6.642322217255193
test loss function (mse) : 6.660617371469891, best score : 6.642322217255193
test loss function (mse) : 6.644525445492171, best score : 6.642322217255193
test loss function (mse) : 6.660206301097731, best score : 6.642322217255193
test loss function (mse) : 6.673321846232124, best score : 6.642322217255193
test loss function (mse) : 6.655907959338862, best score : 6.642322217255193
test loss function (mse) : 6.671545413658366, best score : 6.642322217255193
test loss function (mse) : 6.685117846175073, best score : 6.642322217255193
test loss function (mse) : 6.650063830461741, best score : 6.642322217255193
test loss function (mse) : 6.6715362896263155, best score : 6.642322217255193
test loss function (mse) : 6.653201306122378, best score : 6.642322217255193
test loss function (mse) : 6.666598537193816, best score : 6.64232221725519

P3

In [103]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Lambda(lambda x: x.double()),
    transforms.Normalize((0.5,), (0.5,))])

In [104]:
training_set = torchvision.datasets.CIFAR100('./data', train=True, transform=transform, download=True)
validation_set = torchvision.datasets.CIFAR100('./data', train=False, transform=transform, download=True)
training_loader = torch.utils.data.DataLoader(training_set, batch_size=256, shuffle=True)
validation_loader = torch.utils.data.DataLoader(validation_set, batch_size=256, shuffle=False)

Downloading https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz to ./data/cifar-100-python.tar.gz


100%|██████████| 169001437/169001437 [00:06<00:00, 26284216.78it/s]


Extracting ./data/cifar-100-python.tar.gz to ./data
Files already downloaded and verified


In [105]:
model = models.All_CNN_C()

In [106]:
loss = torch.nn.CrossEntropyLoss()

In [107]:
torch.save(model.state_dict(), 'models/All_CNN_C.pt')

restart here if evaluating multiple optimizers

In [108]:
model.load_state_dict(torch.load('models/All_CNN_C.pt'))

<All keys matched successfully>

In [110]:
epochs = 20

output = np.empty((epochs,2))

trainer = TrainManager(model,training_loader,validation_loader,loss,output)
fitness = trainer.cost_function

# Compute number of parameters of the model + initialize parametrization
num_params = sum(p.numel() for p in model.parameters())
parametrization = ng.p.Array(shape=(num_params,))
optimizer = ng.optimizers.SPSA(parametrization=parametrization, budget=epochs)


In [111]:
learned_param = optimizer.minimize(fitness)

test loss function (mse) : 259.0143385284827, best score : 259.0143385284827
test loss function (mse) : 250.35841448954207, best score : 250.35841448954207
test loss function (mse) : 181.76296058976112, best score : 181.76296058976112
test loss function (mse) : 206.09000366009255, best score : 181.76296058976112


KeyboardInterrupt: 

P4

In [112]:
transform = transforms.Compose(
    [transforms.ToTensor(),
    transforms.Lambda(lambda x: x.double()),
    transforms.Normalize((0.5074,0.4867,0.4411),(0.2011,0.1987,0.2025))]
)

In [113]:
training_set = torchvision.datasets.CIFAR100('./data', train=True, transform=transform, download=True)
validation_set = torchvision.datasets.CIFAR100('./data', train=False, transform=transform, download=True)
training_loader = torch.utils.data.DataLoader(training_set, batch_size=256, shuffle=True)
validation_loader = torch.utils.data.DataLoader(validation_set, batch_size=256, shuffle=False)

Files already downloaded and verified
Files already downloaded and verified


In [114]:
model = models.All_CNN_C()

In [115]:
loss = torch.nn.CrossEntropyLoss()

In [116]:
torch.save(model.state_dict(), 'models/All_CNN_C.pt')

restart here if evaluating multiple optimizers

In [117]:
model.load_state_dict(torch.load('models/All_CNN_C.pt'))

<All keys matched successfully>

In [118]:
epochs = 20

output = np.empty((epochs,2))

trainer = TrainManager(model,training_loader,validation_loader,loss,output)
fitness = trainer.cost_function

# Compute number of parameters of the model + initialize parametrization
num_params = sum(p.numel() for p in model.parameters())
parametrization = ng.p.Array(shape=(num_params,))
optimizer = ng.optimizers.SPSA(parametrization=parametrization, budget=epochs)


In [119]:
learned_param = optimizer.minimize(fitness)

test loss function (mse) : 548.0120398875177, best score : 548.0120398875177
test loss function (mse) : 349.257397669483, best score : 349.257397669483
test loss function (mse) : 275.35710093303993, best score : 275.35710093303993
test loss function (mse) : 207.66149525968973, best score : 207.66149525968973


KeyboardInterrupt: 

P5

In [121]:
from datasets import load_dataset
dataset = load_dataset("wikitext", 'wikitext-2-v1', split='train')

  from .autonotebook import tqdm as notebook_tqdm
Downloading readme: 100%|██████████| 10.5k/10.5k [00:00<00:00, 30.7MB/s]
Downloading data: 100%|██████████| 685k/685k [00:00<00:00, 1.60MB/s]
Downloading data: 100%|██████████| 6.07M/6.07M [00:00<00:00, 13.6MB/s]
Downloading data: 100%|██████████| 618k/618k [00:00<00:00, 2.08MB/s]
Generating test split: 100%|██████████| 4358/4358 [00:00<00:00, 149322.99 examples/s]
Generating train split: 100%|██████████| 36718/36718 [00:00<00:00, 2107541.04 examples/s]
Generating validation split: 100%|██████████| 3760/3760 [00:00<00:00, 1699050.10 examples/s]


In [122]:
def isEnglish(sample):
    try:
        sample.encode(encoding='utf-8').decode('ascii')
    except UnicodeDecodeError:
        return False
    else:
        return True
def lowerCase(sample):
    return {"text": sample["text"].lower()}    

In [124]:
import re

dataset = dataset.filter(lambda x: 100 <= len(x['text'].split()) <= 128)
dataset = dataset.filter(lambda x: not re.match(" = .* = \n", x['text']))
dataset = dataset.filter(lambda x: isEnglish(x['text']))
dataset = dataset.map(lambda x: lowerCase(x))

Filter: 100%|██████████| 2668/2668 [00:00<00:00, 87785.77 examples/s]
Filter: 100%|██████████| 2668/2668 [00:00<00:00, 226852.42 examples/s]
Filter: 100%|██████████| 2668/2668 [00:00<00:00, 215162.82 examples/s]
Map: 100%|██████████| 1972/1972 [00:00<00:00, 47082.48 examples/s]


In [128]:
from collections import Counter
def count_tokens(dataset):
    """Counts the frequency of each token in the dataset.
    return a dict with token as keys, frequency as values."""

    token_freq_dict = Counter(" ".join((x['text'] for x in dataset)).split())
    return token_freq_dict

def replace_rare_tokens(sample, rare_tokens, unk_token):
    text = sample["text"]
    modified_tokens = [(token if token not in rare_tokens else unk_token)
                       for token in text.split()]
    return {"text": " ".join(modified_tokens)}

def is_unknown_sequence(sample, unk_token, unk_threshold=0.1):
    sample_tokens = sample["text"].split()
    if sample_tokens.count(unk_token)/len(sample_tokens) > unk_threshold:
        return True
    else:
        return False


def build_vocabulary(dataset, min_freq=5, unk_token='<unk>'):
    """Builds a vocabulary dict for the given dataset."""
    # Get unique tokens and their frequencies.
    token_freq_dict = count_tokens(dataset)

    # Find a set of rare tokens with frequency lower than `min_freq` and replace them with `unk_token`.
    rare_tokens_set = set()
    low_freq = [x[0] for x in token_freq_dict.items() if x[1] <= min_freq]
    rare_tokens_set.update(low_freq)
    dataset = dataset.map(replace_rare_tokens, fn_kwargs={"rare_tokens": rare_tokens_set,
                                                  "unk_token": unk_token})

    # Filter out sequences with more than 15% rare tokens.
    dataset = dataset.filter(lambda x: not is_unknown_sequence(x, unk_token, unk_threshold=0.15))

    # Recompute the token frequency to get final vocabulary dict.
    token_freq_dict = count_tokens(dataset)
    return dataset, token_freq_dict


In [129]:
wikitext_dataset, token_freq_dict = build_vocabulary(dataset, min_freq=5, unk_token='<unk>')

Map: 100%|██████████| 1972/1972 [00:00<00:00, 43588.67 examples/s]
Filter: 100%|██████████| 1972/1972 [00:00<00:00, 163584.66 examples/s]


In [134]:
class LSTMDataset(Dataset):
    def __init__(self,
                 dataset: datasets.arrow_dataset.Dataset,
                 max_seq_length: int, ):
        self.train_data = self.prepare_dataset(dataset)
        self.max_seq_length = max_seq_length + 2  # as <start> and <stop> will be added
        self.dataset_vocab = self.get_vocabulary(dataset)
        self.token2idx = {element: index for index, element in enumerate(self.dataset_vocab)}
        self.idx2token = dict(enumerate(self.dataset_vocab))
        self.pad_idx = self.token2idx["<pad>"]

    def __len__(self):
        return len(self.train_data)

    def __getitem__(self, idx):
        # Get a list of tokens of the given sequence. Represent each token with its index in `self.token2idx`.
        token_list = self.train_data[idx].split()
        # having a fallback to <unk> token if an unseen word is encoded.
        token_ids = [self.token2idx.get(t, self.token2idx['<unk>']) for t in token_list]

        # Add padding token to the sequence to reach the max_seq_length. 
        token_ids += [self.token2idx['<pad>']] * (self.max_seq_length - len(token_ids))

        return torch.tensor(token_ids)

    def get_vocabulary(self, dataset: datasets.arrow_dataset.Dataset):
        vocab = set()
        print("Getting dataset's vocabulary")
        for sample in tqdm(dataset):
            vocab.update(set(sample["text"].split()))
        vocab.update(set(["<start>", "<stop>", "<pad>"]))
        vocab = sorted(vocab)
        return vocab

    @staticmethod
    def prepare_dataset(target_dataset: datasets.arrow_dataset.Dataset):
        """
        Encapsulate sequences between <start> and <stop>.
        
        :param: target_dataset: the target dataset to extract samples
        return: a list of encapsulated samples.
        """
        prepared_dataset = []
        for sample in target_dataset:
            prepared_dataset.append(f"<start> {sample['text']} <stop>")
        return prepared_dataset


In [135]:
MAX_SEQ_LENGTH = 128
lstm_dataset = LSTMDataset(dataset=wikitext_dataset,
                         max_seq_length=MAX_SEQ_LENGTH)

Getting dataset's vocabulary


100%|██████████| 1158/1158 [00:00<00:00, 33723.57it/s]


In [136]:
def get_dataloader(lstm_dataset, test_ratio=0.1):
    # split train/test dataset.
    lstm_train_dataset, lstm_test_dataset = torch.utils.data.random_split(lstm_dataset, [1-test_ratio, test_ratio])
    # get pytorch DataLoader
    train_dataloader = DataLoader(lstm_train_dataset, batch_size=8, shuffle=True)
    test_dataloader = DataLoader(lstm_test_dataset, batch_size=8, shuffle=False)
    return train_dataloader, test_dataloader

In [137]:
train_loader, val_loader = get_dataloader(lstm_dataset, test_ratio=0.1)

In [138]:
vocab_size = len(lstm_dataset.token2idx)
embedding_dim = 100
hidden_dim = 100
dropout_rate = 0.15

In [139]:
model = models.LSTM(vocab_size=vocab_size,input_dim=embedding_dim,hidden_dim=hidden_dim,dropout_rate=dropout_rate)

In [140]:
loss = torch.nn.CrossEntropyLoss(ignore_index=lstm_dataset.pad_idx)

In [141]:
torch.save(model.state_dict(), 'models/LSTM.pt')

Restart here for evaluating multiple optimizers

In [142]:
model.load_state_dict(torch.load('models/LSTM.pt'))

<All keys matched successfully>

In [143]:
epochs = 20

output = np.empty((epochs,2))

trainer = TrainManager(model,training_loader,validation_loader,loss,output)
fitness = trainer.cost_function

# Compute number of parameters of the model + initialize parametrization
num_params = sum(p.numel() for p in model.parameters())
parametrization = ng.p.Array(shape=(num_params,))
optimizer = ng.optimizers.SPSA(parametrization=parametrization, budget=epochs)


In [144]:
learned_param = optimizer.minimize(fitness)

RuntimeError: Expected tensor for argument #1 'indices' to have one of the following scalar types: Long, Int; but got torch.DoubleTensor instead (while checking arguments for embedding)