In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.autograd as autograd
import torch.nn.functional as F
from torchvision import datasets, transforms, models
import math
import cnn
import matplotlib.pyplot as plt

from sklearn.utils import shuffle
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

rng = np.random.RandomState(1234)
random_state = 42

## Neurogenesisの関数の設定
neurogenesisの手法
* taeget_neurogenesis:正則化を施してノルムの重要度の低いものを消す
1. 入力重み行列の形状を取得　weights.shape
1. 各ニューロンの重要性をL1ノルムを計算して求める　torch.norm
1. ドロップアウトするニューロンの数を指定　roundは小数点を切り落とす。weights_shape[0]で１番目の次元をとってくる。-1をすることでニューロンの数を１減らして0から始めるインデックスを使用する。
1. 2で得たimportanceを3で得たidx番目まで値を残す
1. 理論式でunimportance_maskは閾値未満のものがTrueになる。閾値以上のものがFalseになる
1. dropout_maskは要素が１のものをdropoutして0に対応するものが出力が保持される　np.whereでunimportance_maskの中でTrueのものを取得。それをn_replace個だけランダムに取得。dropout_maskで値を全部0にする。出力に対応するインデックスに1を代入することで、dropputされる
1. dropout_maskが0（ドロップアウトしない対象）に対応する位置の重みのみを保持して、ドロップアウトする対象に対応する位置の重みを削除します。結果として、ドロップアウトしない重みのみを保持した新しいweightsテンソルが得られる。


In [2]:

def flatten(t):
    t = t.reshape(1, -1)
    t = t.squeeze()
    return t


def targeted_neurogenesis(weights, n_replace, targeted_portion, is_training):
    """
    Takes a weight matrix and applied targetted dropout based on weight
    importance (From Gomez et al. 2019; https://for.ai/blog/targeted-dropout/)

    Args:
        weights - the input by ouput matrix of weights
        dropout_rate - float (0,1), the proprotion of targeted neurons to dropout
        targeted_portion - the proportion of neurons/weights to consider 'unimportant'
            from which dropout_rate targets from
        is_training - bool, whether model is training, or being evaluated
    """
    # get the input vs output size
    weights_shape = weights.shape

    # l1-norm of neurons based on input weights to sort by importance
    importance = torch.norm(weights, p=1, dim=1)

    # chose number of indices to remove of the output neurons
    idx = round(targeted_portion * weights_shape[0]) - 1

    # when sorting the abs valued weights ascending order
    # take the index of the targeted portion to get a threshold
    importance_threshold = torch.sort(importance)[0][-idx] # TODO -idx

    # only weights below threshold will be set to None
    unimportance_mask = importance < importance_threshold  #TODO > change < regular

    # during evaluation, only use important weights, without dropout threshold
    if not is_training:
       weights = torch.reshape(weights, weights_shape)
       return weights

    # difference between dropout_rate and unimportance_mask (i.e. threshold)
    idx_drop = np.random.choice(np.where(unimportance_mask)[0], size=n_replace, replace=False)
    dropout_mask = torch.zeros_like(unimportance_mask)
    dropout_mask[idx_drop] = 1
    
    # delete dropped out units
    weights = weights[~dropout_mask]

    return weights, dropout_mask

# メインとなるモデルのNgn_CNNの箇所の説明
## abrateの定義

        if self.ablate:
                if ix == 1:
                    activation_size = x.size()[1]
                    if self.ablation_mode == "random":
                        ablate_size = int(self.ablation_prop * activation_size)
                        indices = np.random.choice(
                            range(activation_size),
                            size=size,
                            replace=False,
                        )
                    if self.ablation_mode == "targetted":
                        indices = self.ablate_indices
                    x[:, indices] = 0
            if extract_layer == ix:
                return x
        x = self.fc3(x)

- if self.abrate でabrate=Trueの時に以下のコマンドを実行することを明示する
- if ix==1　で１番目の全結合層の時にこれを実行することを明示
- x.size()[1]でxのテンソルの中で1次元目のものを取り出す
- randomのモードの場合、alrate_size は予め決めていたabration_propにactivation_sizeをかけたもの
- ablationの対象になるindexをランダムに取り出している。　targetedの場合は予め指定されていたablate_indicesが選ばれる。
- x[:,indices]=0 で選ばれたindicesの要素を全て0にする。（重み行列の中で、indicesの列のものの全ての行を選択して0にする）
- 実行が終わったらfc3(x)に0にしたものを代入する


## add_newで新しいニューロンを追加する方法


    def add_new(
        self,
        p_new=0.01,
        replace=True,
        targeted_portion=None,
        return_idx=False,
        layer=1,
    ):
        
        pnew: float, proportion of hidden layer to add
        replace: float,Lina M. Tran  from 0-1 which is the proportion of new neurons that replace old neurons
        target: bool, neurons that are lost are randomly chosen, or targetted
                based on variance of activity
        
        # get a copy of current parameters
        bias = [ix.bias.detach().clone().cpu() for ix in self.fcs]
        current = [ix.weight.detach().clone().cpu() for ix in self.fcs]
        if layer == 2:
            current_fc3 = self.fc3.weight.detach().clone().cpu()

##### 1. biasとcurrentのweightをコピーして、実際のパラメータに影響しないようにして変数をいじっていく。

        # how many neurons to add?
        if not p_new:
            return
        # if int given, use this as number of neurons to add
        if (p_new % 1) == 0:
            n_new = p_new
        # if float given, use to calculate number of neurons to add
        else:
            n_new = int(self.layer_size * p_new)

        if targeted_portion is not None:
            targ_diff = round(targeted_portion * current[layer].shape[0]) - n_new
            if targ_diff <= 0:
                n_new = n_new + targ_diff - 3

##### 2. ニューロンがなんこ追加されるかをいじっていく。p_newがなかったら操作をしない、整数値なら、その値をののまま使って適応する。小数ならば、整数値に変換して追加する。
##### 3.targeted_portionがNoneの時に実際に減らすニューロンの数を指定する。targ_diffを決定する。roundは小数点四捨五入するメソッド

        self.n_new = n_new
        n_replace = n_new if replace else 0  # number lost
        difference = n_new - n_replace  # net addition or loss
        self.layer_size += difference  # final layer size

##### 4. self.n_new, n_replace,differnce,self.layer_sizeを定義している。　n_replaceはreplaceがtrueの時にn_newの値になる　Falseの時に0

        # reallocate the weights and biases
        if replace:
            # if some neurons are being removed
            if targeted_portion is not None:
                try:
                    weights, mask = targeted_neurogenesis(
                        current[layer], n_replace, targeted_portion, self.training
                    )
                except ValueError:
                    print(
                        "n_replace",
                        n_replace,
                        "targ",
                        targeted_portion * (current[layer].shape[0]),
                    )

                # if neurons are targetted for removal
                idx = np.where(mask)[0]
                bias[1] = np.delete(bias[1], idx)
                current[layer] = np.delete(current[layer], idx, axis=0)
                current[layer + 1] = np.delete(current[layer + 1], idx, axis=1)
            else:
                # if neurons are randomly chosen for removal
                idx = np.random.choice(
                    range(current[layer].shape[0]), size=n_replace, replace=False
                )

                # delete idx neurons from bias and current weights (middle layer)
                bias[1] = np.delete(bias[1], idx)
                current[layer] = np.delete(current[layer], idx, axis=0)
                try:
                    current[layer + 1] = np.delete(current[layer + 1], idx, axis=1)
                except IndexError:
                    current_fc3 = np.delete(current_fc3, idx, axis=1)


            self.idx = idx
##### 5.これ以前は改修中
        # create new weight shapes
        w_in = torch.Tensor(
            self.layer_size,
            current[layer].shape[1],
        )
        b_in = torch.Tensor(self.layer_size)
        if layer < 2:
            w_out = torch.Tensor(
                current[layer + 1].shape[0],
                self.layer_size,
            )
        elif layer == 2:
            w_out = torch.Tensor(
                current_fc3.shape[0],
                self.layer_size,
            )
##### 6. toech.Tensorで新しくパラメータを作る。torch.Tensorの引数が新しいパラメータの形状。<br>　　b_inはレイヤー全体で同じパラメータになるので引数はself.layer_sizeになる
        # initialize new weights
        nn.init.kaiming_uniform_(w_in, a=math.sqrt(5))
        nn.init.kaiming_uniform_(w_out, a=math.sqrt(5))

        # in bias (out bias unaffected by neurogenesis)
        fan_in, _ = nn.init._calculate_fan_in_and_fan_out(w_in)
        bound = 1 / math.sqrt(fan_in)
        nn.init.uniform_(b_in, -bound, bound)

##### 7. Heの初期化でそれぞれのパラメータを初期化しているみたい。これまじでわかりにくいので改善の余地あり

        # put back current bias and weights into newly initiliazed layers
        b_in[:-n_new] = bias[1]
        w_in[:-n_new, :] = current[layer]
        if layer == 2:
            w_out[:, :-n_new] = current_fc3
        else:
            w_out[:, :-n_new] = current[layer + 1]
##### 8.b_in[:-n_new]はb_inのパラメータの中からn_newを取り除いたものであり、これをすることで全てのb_inの中から古いからパラメータをb_inに格納して<br>新しいものは値を変えるように設定できる
        # create the parameters again
        self.fcs[layer].bias = nn.Parameter(b_in)
        self.fcs[layer].weight = nn.Parameter(w_in)
        if layer == 2:
            self.fc3.weight = nn.Parameter(w_out)
        else:
            self.fcs[layer + 1].weight = nn.Parameter(w_out)

        # need to send all the data to GPU again
        self.fcs.to(dev)
        if layer == 2:
            self.fc3.to(dev)

        if return_idx and (n_replace > 0):
            return idx
##### 9.この項は改装中


In [3]:
from ast import Index
import numpy as np
from scipy import stats
import torch
from torch.optim.lr_scheduler import LambdaLR
import math
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data.sampler import SubsetRandomSampler
import torchvision
import torchvision.transforms as transforms
import os
import targeted_neurogenesis

dev = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


def load_data(
    mode,
    data_folder="./data",
    num_workers=16,
    batch_size=50,
    split=0.1,
    seed=23,
    fashion=False,
):
    """
    Helper function to read in image dataset, and split into
    training, validation and test sets.
    ===
    mode: str, ['validation', 'test]. If 'validation', training data
         will be divided based on split parameter.
         If test, .valid = None, and all training data is used for training
    split: float, where 0 < split < 1. Where train = split * num_samples
        and valid = (1 - split) * num_samples
    seed: int, random seed to generate validation/training split
    """
    transform = transforms.Compose(
        [transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
    )
    assert mode in ["validation", "test"]

    if fashion:
        trainset = torchvision.datasets.MNIST(
            data_folder,
            train=True,
            transform=transforms.Compose(
                [transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))]
            ),
        )
        testset = torchvision.datasets.MNIST(
            data_folder,
            train=False,
            transform=transforms.Compose(
                [transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))]
            ),
        )
        print("Loaded FMNIST dataset")
    else:
        trainset = torchvision.datasets.CIFAR10(
            root=data_folder, train=True, download=False, transform=transform
        )

        testset = torchvision.datasets.CIFAR10(
            root=data_folder, train=False, download=False, transform=transform
        )

    testloader = torch.utils.data.DataLoader(
        testset,
        batch_size=4,
        shuffle=False,
        num_workers=num_workers,
        drop_last=True,
    )

    if mode == "validation":
        from sklearn.model_selection import train_test_split

        num_train = 50000
        indices = list(range(num_train))

        train_idx, valid_idx = train_test_split(
            indices, test_size=split, random_state=seed
        )

        train_sampler = SubsetRandomSampler(train_idx)
        valid_sampler = SubsetRandomSampler(valid_idx)

        trainloader = torch.utils.data.DataLoader(
            trainset,
            batch_size=batch_size,
            num_workers=num_workers,
            drop_last=True,
            sampler=train_sampler,
        )

        validloader = torch.utils.data.DataLoader(
            trainset,
            batch_size=batch_size,
            num_workers=num_workers,
            sampler=valid_sampler,
            drop_last=True,
        )
        print("Created data loaders")
        return trainloader, validloader, testloader

    elif mode == "test":
        trainloader = torch.utils.data.DataLoader(
            trainset,
            batch_size=batch_size,
            num_workers=num_workers,
            shuffle=True,
            drop_last=True,
        )
        print("Created data loaders")
        return trainloader, testloader


class Cifar10_data(object):
    def __init__(
        self,
        mode="validation",
        data_folder="./data",
        batch_size=50,
        fashion=False,
        num_workers=16,
        split=0.1,
        seed=23,
    ):
        if mode == "validation":
            self.train, self.valid, self.test = load_data(
                mode=mode,
                data_folder=data_folder,
                batch_size=batch_size,
                num_workers=num_workers,
                split=split,
                fashion=fashion,
                seed=seed,
            )
        elif mode == "test":
            self.train, self.test = load_data(
                mode=mode,
                data_folder=data_folder,
                seed=seed,
                fashion=fashion,
                batch_size=batch_size,
                num_workers=num_workers,
            )
            self.valid = None


def early_stopping(starting, patience, count, best_score, prediction):
    # starting accuracy (in case network is not training at all)
    if starting is None:
        starting = prediction["Accuracy"][0]
    # first epoch
    if best_score is None:
        best_score = prediction["Loss"][0]
    # if score is decreasing, start counter
    elif np.round(prediction["Loss"][0], 4) < best_score:
        count = 0
        best_score = prediction["Loss"][0]
        return count, best_score
    else:
        # if we've reached patience threshold, end training
        count += 1
        if count > patience:
            return
        # network is not training
        elif prediction["Accuracy"][0] < (starting):
            return


def train_model(
    model,
    dataset,
    epochs=15,
    device=dev,
    dtype=torch.float,
    neurogenesis=None,
    optim_fn=optim.Adam,
    optim_args={"lr": 0.0002},
    turnover=True,
    frequency=0,
    excite=False,
    end_neurogenesis=8,
    early_stop=True,
    patience=2,
    checkpoint=False,
    layer=1,
    targeted_portion=None,
    **kwargs,
):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim_fn(model.parameters(), **optim_args)

    log = np.zeros(epochs)
    best_score = None
    starting = None
    count = 0

    # neurogenesis
    epoch_neurogenesis = False
    batch_neurogenesis = False

    if (neurogenesis is not None) and (neurogenesis):
        if frequency:
            batch_neurogenesis = True
        else:
            epoch_neurogenesis = True
    idx_list = set()

    if excite is not None and excite > 0:
        model.excite = excite
    for epoch in range(epochs):  # loop over the dataset multiple times
        model.train()

        if epoch >= end_neurogenesis:
            epoch_neurogenesis = False
            batch_neurogenesis = False
            model.excite = False
        for i, data in enumerate(dataset.train, 0):
            if batch_neurogenesis:
                if (epoch % frequency) == 0:
                    ngn_idx = model.add_new(neurogenesis, turnover, targeted_portion, layer=layer, return_idx=True)
                    idx_list.update(ngn_idx)
            # get the inputs
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

        # If validation set exists, predict on validation set
        # Otherwise use the test set
        if dataset.valid is not None:
            prediction = predict(model, dataset, True, get_loss=False)
        elif dataset.valid is None:
            prediction = predict(model, dataset, False, get_loss=False)

        log[epoch] = prediction["Accuracy"][0]
        if epoch_neurogenesis:
            idx = model.add_new(neurogenesis, turnover, targeted_portion, layer=layer, return_idx=True)
            idx_list.append(idx)
            if not turnover:  # add new parameters
                optimizer.add_param_group(
                    {
                        "params": model.fc_new_in[-1].parameters(),
                        "lr": optim_args["lr"],
                        "momentum": optim_args["momentum"],
                    }
                )
                optimizer.add_param_group(
                    {
                        "params": model.fc_new_out[-1].parameters(),
                        "lr": optim_args["lr"],
                        "momentum": optim_args["momentum"],
                    }
                )

    return list(log), optimizer


def predict(model, dataset, valid=False, train=False, device=dev, get_loss=False):
    criterion = nn.CrossEntropyLoss()
    correct = []
    total = 0
    losses = []

    model.eval()
    # use the correct dataset
    if valid:
        try:
            loader = dataset.valid
        except AttributeError:
            print("No validation set. You are in test mode.")
            return
    elif train:
        loader = dataset.train
    else:
        loader = dataset.test

    with torch.no_grad():
        for data in loader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)

            # calculate loss
            loss = criterion(outputs, labels)
            losses.append(loss.item())

            # calculate accuracy (do not use softmax)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct.append((predicted == labels).sum().item())

    avg_loss = np.array(losses).mean()
    sem_loss = stats.sem(np.array(losses))

    accuracy = 100 * float(np.array(correct).sum()) / total
    sem_accuracy = 0

    #    return accuracy, avg_loss
    return {"Loss": (avg_loss, sem_loss), "Accuracy": (accuracy, sem_accuracy)}

def error_types(model, dataset, device=dev):
    correct = []
    total = 0
    losses = []

    model.eval()
    # use the correct dataset

    loader = dataset.test


    predictions, actual = [], []

    with torch.no_grad():
        for data in loader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)

            # calculate accuracy (do not use softmax)
            _, predicted = torch.max(outputs.data, 1)
            actual.append(labels.cpu().numpy())
            predictions.append(predicted.cpu().numpy())

    return np.array(list(zip(predictions, actual))).T


def ablate_targetted(model, dataset, indices):
    model.ablate = True
    model.ablate_indices = indices
    model.ablation_mode = "targetted"
    acc = predict(model, dataset)
    result = acc["Accuracy"][0]
    return result


def ablation(model, dataset, mode="random", step=0.05):
    """
    layer: layers to remove neurons
    proportion: float, fraction of neurons to ablate
    """
    assert mode in ["random", "targetted"], "mode must be random or targetted"
    model.ablate = True
    proportions = np.arange(0, 1 + step, step)
    results = np.zeros((len(proportions), 2))

    counter = 0
    for prop in proportions:
        model.ablation_prop = prop
        model.ablation_mode = mode
        acc = predict(model, dataset, train=True)
        result[counter] = (prop, acc["Accuracy"][0])
        counter += 1

    model.ablate = False
    return result


class NgnCnn(nn.Module):
    def __init__(
        self,
        layer_size=250,
        channels=3,
        control=False,
        seed=0,
        excite=False,
        neural_noise=None,
    ):
        torch.manual_seed(seed)
        super(NgnCnn, self).__init__()
        # parameters
        self.ablate = False
        self.dropout = 0
        self.channels = channels
        self.excite = excite
        self.n_new = 0
        self.control = False
        if self.conZrol:
            self.idx_control = np.random.choice(
                range(layer_size), size=8, replace=False
            )
        self.neural_noise = neural_noise

        # 3@16x16
        self.conv1 = nn.Conv2d(channels, 16, 3, padding=1)
        self.conv2 = nn.Conv2d(16, 16, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)

        self.conv3 = nn.Conv2d(16, 32, 3, padding=1)
        self.conv4 = nn.Conv2d(32, 32, 3, padding=1)
        self.pool2 = nn.MaxPool2d(2, 2)

        self.conv5 = nn.Conv2d(32, 64, 3, padding=1)
        self.conv6 = nn.Conv2d(64, 64, 3, padding=1)
        self.pool3 = nn.MaxPool2d(2, 2)
        self.pool4 = nn.AvgPool2d(kernel_size=1, stride=1)

        self.layer_size = layer_size

        self.fc_new_in = nn.ModuleList()
        self.fc_new_out = nn.ModuleList()

        if self.channels == 3:
            self.cnn_output = 64 * 4 * 4
        elif self.channels == 1:
            self.cnn_output = 64 * 9
        # three fully connected layers
        self.fcs = nn.ModuleList(
            [
                nn.Linear(self.cnn_output, self.layer_size),  # 0
                nn.Linear(self.layer_size, self.layer_size),  # 1 on dim 2 neurogenesis
                nn.Linear(self.layer_size, self.layer_size),  # 2
            ]
        )
        self.fc3 = nn.Linear(self.layer_size, 10, bias=False)

    def forward(self, x, extract_layer=None):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = self.pool(x)
        x = F.relu(self.conv3(x))
        x = F.relu(self.conv4(x))
        x = self.pool2(x)
        x = F.relu(self.conv5(x))
        x = F.relu(self.conv6(x))
        x = self.pool3(x)
        x = self.pool4(x)

        x = x.view(-1, self.cnn_output)

        for ix, fc in enumerate(self.fcs):
            x = fc(x)
            if self.neural_noise is not None and ix == 0 and self.training:
                mean, std = self.neural_noise
                noise = torch.zeros_like(x, device=dev)
                noise = noise.log_normal_(mean=mean, std=std)
                x = x * noise
            x = F.relu(x)

            if self.excite and ix == 1 and self.n_new and self.training:
                idx = self.idx_control if self.control else self.idx
                excite_mask = torch.ones_like(x)
                excite_mask[:, idx] = self.excite
                excite_mask.to(dev)
                x = x * excite_mask

            if self.dropout:
                x = F.dropout(x, p=self.dropout, training=self.training)
                x = torch.renorm(x, 1, 1, 3)  # max norm

            # for ablation experiments
            if self.ablate:
                if ix == 1:
                    activation_size = x.size()[1]
                    if self.ablation_mode == "random":
                        ablate_size = int(self.ablation_prop * activation_size)
                        indices = np.random.choice(
                            range(activation_size),
                            size=size,
                            replace=False,
                        )
                    if self.ablation_mode == "targetted":
                        indices = self.ablate_indices
                    x[:, indices] = 0
            if extract_layer == ix:
                return x
        x = self.fc3(x)

        return x

    def add_new(
        self,
        p_new=0.01,
        replace=True,
        targeted_portion=None,
        return_idx=False,
        layer=1,
    ):
        """
        pnew: float, proportion of hidden layer to add
        replace: float,Lina M. Tran  from 0-1 which is the proportion of new neurons that replace old neurons
        target: bool, neurons that are lost are randomly chosen, or targetted
                based on variance of activity
        """
        # get a copy of current parameters
        bias = [ix.bias.detach().clone().cpu() for ix in self.fcs]
        current = [ix.weight.detach().clone().cpu() for ix in self.fcs]
        if layer == 2:
            current_fc3 = self.fc3.weight.detach().clone().cpu()

        # how many neurons to add?
        if not p_new:
            return
        # if int given, use this as number of neurons to add
        if (p_new % 1) == 0:
            n_new = p_new
        # if float given, use to calculate number of neurons to add
        else:
            n_new = int(self.layer_size * p_new)

        if targeted_portion is not None:
            targ_diff = round(targeted_portion * current[layer].shape[0]) - n_new
            if targ_diff <= 0:
                n_new = n_new + targ_diff - 3

        self.n_new = n_new
        n_replace = n_new if replace else 0  # number lost
        difference = n_new - n_replace  # net addition or loss
        self.layer_size += difference  # final layer size

        # reallocate the weights and biases
        if replace:
            # if some neurons are being removed
            if targeted_portion is not None:
                try:
                    weights, mask = targeted_neurogenesis(
                        current[layer], n_replace, targeted_portion, self.training
                    )
                except ValueError:
                    print(
                        "n_replace",
                        n_replace,
                        "targ",
                        targeted_portion * (current[layer].shape[0]),
                    )

                # if neurons are targetted for removal
                idx = np.where(mask)[0]
                bias[1] = np.delete(bias[1], idx)
                current[layer] = np.delete(current[layer], idx, axis=0)
                current[layer + 1] = np.delete(current[layer + 1], idx, axis=1)
            else:
                # if neurons are randomly chosen for removal
                idx = np.random.choice(
                    range(current[layer].shape[0]), size=n_replace, replace=False
                )

                # delete idx neurons from bias and current weights (middle layer)
                bias[1] = np.delete(bias[1], idx)
                current[layer] = np.delete(current[layer], idx, axis=0)
                try:
                    current[layer + 1] = np.delete(current[layer + 1], idx, axis=1)
                except IndexError:
                    current_fc3 = np.delete(current_fc3, idx, axis=1)


            self.idx = idx

        # create new weight shapes
        w_in = torch.Tensor(
            self.layer_size,
            current[layer].shape[1],
        )
        b_in = torch.Tensor(self.layer_size)
        if layer < 2:
            w_out = torch.Tensor(
                current[layer + 1].shape[0],
                self.layer_size,
            )
        elif layer == 2:
            w_out = torch.Tensor(
                current_fc3.shape[0],
                self.layer_size,
            )

        # initialize new weights
        nn.init.kaiming_uniform_(w_in, a=math.sqrt(5))
        nn.init.kaiming_uniform_(w_out, a=math.sqrt(5))

        # in bias (out bias unaffected by neurogenesis)
        fan_in, _ = nn.init._calculate_fan_in_and_fan_out(w_in)
        bound = 1 / math.sqrt(fan_in)
        nn.init.uniform_(b_in, -bound, bound)

        # put back current bias and weights into newly initiliazed layers
        b_in[:-n_new] = bias[1]
        w_in[:-n_new, :] = current[layer]
        if layer == 2:
            w_out[:, :-n_new] = current_fc3
        else:
            w_out[:, :-n_new] = current[layer + 1]

        # create the parameters again
        self.fcs[layer].bias = nn.Parameter(b_in)
        self.fcs[layer].weight = nn.Parameter(w_in)
        if layer == 2:
            self.fc3.weight = nn.Parameter(w_out)
        else:
            self.fcs[layer + 1].weight = nn.Parameter(w_out)

        # need to send all the data to GPU again
        self.fcs.to(dev)
        if layer == 2:
            self.fc3.to(dev)

        if return_idx and (n_replace > 0):
            return idx


In [4]:
def ablate_targetted(model, dataset, indices):
    model.ablate = True
    model.ablate_indices = indices
    model.ablation_mode = "targetted"
    acc = predict(model, dataset)
    result = acc["Accuracy"][0]
    return result


def ablation(model, dataset, mode="random", step=0.05):
    """
    layer: layers to remove neurons
    proportion: float, fraction of neurons to ablate
    """
    assert mode in ["random", "targetted"], "mode must be random or targetted"
    model.ablate = True
    proportions = np.arange(0, 1 + step, step)
    results = np.zeros((len(proportions), 2))

    counter = 0
    for prop in proportions:
        model.ablation_prop = prop
        model.ablation_mode = mode
        acc = predict(model, dataset, train=True)
        results[counter] = (prop, acc["Accuracy"][0])
        counter += 1

    model.ablate = False
    return results


In [5]:
import pytest

@pytest.fixture()
def model(request):
    mdl = cnn.NgnCnn(layer_size=10)
    mdl.to(device)

    def fin():
        print("Teardown model")

    request.addfinalizer(fin)
    return mdl


@pytest.fixture()
def model_ngn(request, model):
    p_new = 0.5
    replace = 0.6
    model.add_new(p_new=p_new, replace=replace)

    def fin():
        print("Teardown model")

    request.addfinalizer(fin)
    return model


@pytest.fixture()
def dataset(request):
    dt = cnn.Cifar10_data()

    def fin():
        print("Teardown dataset")

    request.addfinalizer(fin)
    return dt


def test_load_data_valid_split():
    split = 0.2
    batch_size = 4
    num_samples = 50000
    num_valid = int(50000 * split)
    print(num_valid)
    num_train = num_samples - int(num_samples * split)
    train, valid, test = cnn.load_data("validation", split=split)
    assert len(train) * batch_size == num_train
    assert len(valid) * batch_size == num_valid


def test_model_updates(model, dataset):
    before = list(model.parameters())[0].clone().detach().cpu().numpy()
    cnn.train_model(model, dataset, epochs=1)
    after = list(model.parameters())[0].clone().detach().cpu().numpy()
    for b, a in zip(before, after):
        # Make sure something changed.
        assert (b != a).any()


def test_neurogenesis_turnover(request, model):
    p_new = 5
    replace = 0.6
    added = int(p_new * (1 - replace))
    layer_size = model.layer_size

    before = model.fcs[1].weight.shape[0]
    model.add_new(p_new=p_new, replace=replace)
    after = model.fcs[1].weight.shape[0]

    assert (
        after - before
    ) == added, "Difference between layer sizes before and after neurogenesis does not equal net addition"
    assert after == (
        layer_size + added
    ), "Final size after neurogenesis not original size + net added"


def test_neurogenesis_kept_replacement(model):
    """
    Test whether
    """
    p_new = 5
    replace = 0.6
    #    removed = int(p_new * replace)

    before = model.fcs[1].weight.clone().cpu().data.numpy()
    idx = model.add_new(p_new=p_new, replace=replace, return_idx=True)
    after = model.fcs[1].weight.clone().cpu().data.numpy()
    before = np.delete(before, idx, axis=0)
    assert (before == after[:-p_new]).all()


def test_neurogenesis_kept_no_replacement(model):
    p_new = 5
    replace = 0

    before = model.fcs[1].weight.clone()
    model.add_new(p_new=p_new, replace=replace)
    after = model.fcs[1].weight.clone()

    assert (before == after[:-p_new]).all()


def test_model_updates_post_neurogenesis(model_ngn, dataset):
    before = list(model_ngn.parameters())[0].clone().detach().cpu().numpy()
    cnn.train_model(model_ngn, dataset, epochs=1)
    after = list(model_ngn.parameters())[0].clone().detach().cpu().numpy()
    for b, a in zip(before, after):
        # Make sure something changed.
        assert (b != a).any()


def test_targeted_threshold():
    dropout_rate = 0.5
    threshold = 0.75
    weights = torch.arange(100).reshape((10, 10))
    weights_out, mask = targeted_neurogenesis(
        weights, dropout_rate=dropout_rate, targeted_portion=threshold, is_training=True
    )

    # targeted population must be below the 7th index
    assert torch.all(~mask[7:])


#    cnn.train_model(model, dataset, epochs=1, neurogenesis=5, frequency=None,
#                turnover=0.5)


ModuleNotFoundError: No module named 'pytest'

In [None]:
class BatchNorm(nn.Module):
    def __init__(self, shape, epsilon=np.float32(1e-5)):
        super().__init__()
        self.gamma = nn.Parameter(torch.tensor(np.ones(shape, dtype='float32')))
        self.beta = nn.Parameter(torch.tensor(np.zeros(shape, dtype='float32')))
        self.epsilon = epsilon

    def forward(self, x):
        mean = torch.mean(x, (0, 2, 3), keepdim=True)  
        std = torch.std(x, (0, 2, 3), keepdim=True)  
        x_normalized = (x - mean) / (std**2 + self.epsilon)**0.5  
        return self.gamma * x_normalized + self.beta  

In [None]:
class Dropout(nn.Module):
    """
    http://arxiv.org/abs/1207.0580
    """
    def __init__(self, dropout_ratio=0.5):
        super().__init__()
        self.dropout_ratio = dropout_ratio
        self.mask = None

    def forward(self, x):
        # 学習時はdropout_ratio分だけ出力をシャットアウト
        if self.training:
            self.mask = torch.rand(*x.size()) > self.dropout_ratio
            return x * self.mask.to(x.device)
        # 推論時は出力に`1.0 - self.dropout_ratio`を乗算することで学習時の出力の大きさに合わせる
        else:
            return x * (1.0 - self.dropout_ratio)

In [None]:
class Conv(nn.Module):
    def __init__(self, filter_shape, function=lambda x: x, stride=(1, 1), padding=0):
        super().__init__()
        # Heの初期化
        # filter_shape: (出力チャンネル数)x(入力チャンネル数)x(縦の次元数)x(横の次元数)
        fan_in = filter_shape[1] * filter_shape[2] * filter_shape[3]
        fan_out = filter_shape[0] * filter_shape[2] * filter_shape[3]

        self.W = nn.Parameter(torch.tensor(rng.normal(
                        0,
                        np.sqrt(2/fan_in),
                        size=filter_shape
                    ).astype('float32')))

        # バイアスはフィルタごとなので, 出力フィルタ数と同じ次元数
        self.b = nn.Parameter(torch.tensor(np.zeros((filter_shape[0]), dtype='float32')))

        self.function = function  # 活性化関数
        self.stride = stride  # ストライド幅
        self.padding = padding  # パディング

    def forward(self, x):
        u = F.conv2d(x, self.W, bias=self.b, stride=self.stride, padding=self.padding)
        return self.function(u)

In [None]:
class Pooling(nn.Module):
    def __init__(self, ksize=(2, 2), stride=(2, 2), padding=0):
        super().__init__()
        self.ksize = ksize  # カーネルサイズ
        self.stride = stride  # ストライド幅
        self.padding = padding  # パディング

    def forward(self, x):
        return F.avg_pool2d(x, kernel_size=self.ksize, stride=self.stride, padding=self.padding)

In [None]:
class Flatten(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, x):
        return x.view(x.size()[0], -1)

In [None]:
class Dense(nn.Module):
    def __init__(self, in_dim, out_dim, function=lambda x: x):
        super().__init__()
        # Heの初期化
        # in_dim: 入力の次元数，out_dim: 出力の次元数

        self.W = nn.Parameter(torch.tensor(rng.normal(
                        0,
                        np.sqrt(2/in_dim),
                        size=(in_dim, out_dim)
                    ).astype('float32')))

        self.b = nn.Parameter(torch.tensor(np.zeros([out_dim]).astype('float32')))
        self.function = function

    def forward(self, x):
        return self.function(torch.matmul(x, self.W) + self.b)

In [None]:
class Activation(nn.Module):
    def __init__(self, function=lambda x: x):
        super().__init__()
        self.function = function

    def __call__(self, x):
        return self.function(x)

In [None]:
# torch.log(0)によるnanを防ぐ
def torch_log(x):
    return torch.log(torch.clamp(x, min=1e-10))

この下にネットワークを構築する。ニューロン新生を組み込んだものを入れるNgc_CNNとか

In [None]:
#これをモデルにして書くこと。

conv_net = nn.Sequential(
    Conv((32, 3, 3, 3)),        # 画像の大きさ：32x32x3 -> 30x30x32  # WRITE ME(入出力の画像サイズ）
    BatchNorm((32, 30, 30)),
    Activation(F.relu),
    Pooling((2, 2)),            # 30x30x32 -> 15x15x32  # WRITE ME(入出力の画像サイズ）
    Conv((64, 32, 3, 3)),       # 15x15x32 -> 13x13x64  # WRITE ME(入出力の画像サイズ）
    BatchNorm((64, 13, 13)),
    Activation(F.relu),
    Pooling((2, 2)),            # 13x13x64 -> 6x6x64  # WRITE ME(入出力の画像サイズ）
    Conv((128, 64, 3, 3)),      # 6x6x64 -> 4x4x128  # WRITE ME(入出力の画像サイズ）
    BatchNorm((128, 4, 4)),
    Activation(F.relu),
    Pooling((2, 2)),            # 4x4x128 -> 2x2x128  # WRITE ME(入出力の画像サイズ）
    Flatten(),
    Dense(2*2*128, 256, F.relu),  # WRITE ME(in_features)
    Dense(256, 10)
)


batch_size = 100
n_epochs = 10
lr = 0.01
device = 'cuda'

conv_net.to(device)
optimizer = optim.Adam(conv_net.parameters(), lr=lr)

In [None]:
class NgnCnn(nn.Module):
    def __init__(
        self,
        layer_size=250,
        channels=3,
        control=False,
        seed=0,
        excite=False,
        neural_noise=None,
    ):
        torch.manual_seed(seed)
        super().__init__()
        # parameters
        self.ablate = False
        self.dropout = 0
        self.channels = channels
        self.excite = excite
        self.n_new = 0
        self.control = False
        if self.conZrol:
            self.idx_control = np.random.choice(
                range(layer_size), size=8, replace=False
            )
        self.neural_noise = neural_noise

        # 3@16x16
        self.conv1 = nn.Conv2d(channels, 16, 3, padding=1)
        self.conv2 = nn.Conv2d(16, 16, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)

        self.conv3 = nn.Conv2d(16, 32, 3, padding=1)
        self.conv4 = nn.Conv2d(32, 32, 3, padding=1)
        self.pool2 = nn.MaxPool2d(2, 2)

        self.conv5 = nn.Conv2d(32, 64, 3, padding=1)
        self.conv6 = nn.Conv2d(64, 64, 3, padding=1)
        self.pool3 = nn.MaxPool2d(2, 2)
        self.pool4 = nn.AvgPool2d(kernel_size=1, stride=1)

        self.layer_size = layer_size

        self.fc_new_in = nn.ModuleList()
        self.fc_new_out = nn.ModuleList()

        if self.channels == 3:
            self.cnn_output = 64 * 4 * 4
        elif self.channels == 1:
            self.cnn_output = 64 * 9
        # three fully connected layers
        self.fcs = nn.ModuleList(
            [
                nn.Linear(self.cnn_output, self.layer_size),  # 0
                nn.Linear(self.layer_size, self.layer_size),  # 1 on dim 2 neurogenesis
                nn.Linear(self.layer_size, self.layer_size),  # 2
            ]
        )
        self.fc3 = nn.Linear(self.layer_size, 10, bias=False)

    def forward(self, x, extract_layer=None):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = self.pool(x)
        x = F.relu(self.conv3(x))
        x = F.relu(self.conv4(x))
        x = self.pool2(x)
        x = F.relu(self.conv5(x))
        x = F.relu(self.conv6(x))
        x = self.pool3(x)
        x = self.pool4(x)

        x = x.view(-1, self.cnn_output)

        for ix, fc in enumerate(self.fcs):
            x = fc(x)
            if self.neural_noise is not None and ix == 0 and self.training:
                mean, std = self.neural_noise
                noise = torch.zeros_like(x, device)
                noise = noise.log_normal_(mean=mean, std=std)
                x = x * noise
            x = F.relu(x)

            if self.excite and ix == 1 and self.n_new and self.training:
                idx = self.idx_control if self.control else self.idx
                excite_mask = torch.ones_like(x)
                excite_mask[:, idx] = self.excite
                excite_mask.to(device)
                x = x * excite_mask

            if self.dropout:
                x = F.dropout(x, p=self.dropout, training=self.training)
                x = torch.renorm(x, 1, 1, 3)  # max norm

            # for ablation experiments
            if self.ablate:
                if ix == 1:
                    activation_size = x.size()[1]
                    if self.ablation_mode == "random":
                        ablate_size = int(self.ablation_prop * activation_size)
                        indices = np.random.choice(
                            range(activation_size),
                            size=self.size,
                            replace=False,
                        )
                    if self.ablation_mode == "targetted":
                        indices = self.ablate_indices
                    x[:, indices] = 0
            if extract_layer == ix:
                return x
        x = self.fc3(x)

        return x

    def add_new(
        self,
        p_new=0.01,
        replace=True,
        targeted_portion=None,
        return_idx=False,
        layer=1,
    ):
        """
        pnew: float, proportion of hidden layer to add
        replace: float,Lina M. Tran  from 0-1 which is the proportion of new neurons that replace old neurons
        target: bool, neurons that are lost are randomly chosen, or targetted
                based on variance of activity
        """
        # get a copy of current parameters
        bias = [ix.bias.detach().clone().cpu() for ix in self.fcs]
        current = [ix.weight.detach().clone().cpu() for ix in self.fcs]
        if layer == 2:
            current_fc3 = self.fc3.weight.detach().clone().cpu()

        # how many neurons to add?
        if not p_new:
            return
        # if int given, use this as number of neurons to add
        if (p_new % 1) == 0:
            n_new = p_new
        # if float given, use to calculate number of neurons to add
        else:
            n_new = int(self.layer_size * p_new)

        if targeted_portion is not None:
            targ_diff = round(targeted_portion * current[layer].shape[0]) - n_new
            if targ_diff <= 0:
                n_new = n_new + targ_diff - 3

        self.n_new = n_new
        n_replace = n_new if replace else 0  # number lost
        difference = n_new - n_replace  # net addition or loss
        self.layer_size += difference  # final layer size

        # reallocate the weights and biases
        if replace:
            # if some neurons are being removed
            if targeted_portion is not None:
                try:
                    weights, mask = targeted_neurogenesis(
                        current[layer], n_replace, targeted_portion, self.training
                    )
                except ValueError:
                    print(
                        "n_replace",
                        n_replace,
                        "targ",
                        targeted_portion * (current[layer].shape[0]),
                    )

                # if neurons are targetted for removal
                idx = np.where(mask)[0]
                bias[1] = np.delete(bias[1], idx)
                current[layer] = np.delete(current[layer], idx, axis=0)
                current[layer + 1] = np.delete(current[layer + 1], idx, axis=1)
            else:
                # if neurons are randomly chosen for removal
                idx = np.random.choice(
                    range(current[layer].shape[0]), size=n_replace, replace=False
                )

                # delete idx neurons from bias and current weights (middle layer)
                bias[1] = np.delete(bias[1], idx)
                current[layer] = np.delete(current[layer], idx, axis=0)
                try:
                    current[layer + 1] = np.delete(current[layer + 1], idx, axis=1)
                except IndexError:
                    current_fc3 = np.delete(current_fc3, idx, axis=1)


            self.idx = idx

        # create new weight shapes
        w_in = torch.Tensor(
            self.layer_size,
            current[layer].shape[1],
        )
        b_in = torch.Tensor(self.layer_size)
        if layer < 2:
            w_out = torch.Tensor(
                current[layer + 1].shape[0],
                self.layer_size,
            )
        elif layer == 2:
            w_out = torch.Tensor(
                current_fc3.shape[0],
                self.layer_size,
            )

        # initialize new weights
        nn.init.kaiming_uniform_(w_in, a=math.sqrt(5))
        nn.init.kaiming_uniform_(w_out, a=math.sqrt(5))

        # in bias (out bias unaffected by neurogenesis)
        fan_in, _ = nn.init._calculate_fan_in_and_fan_out(w_in)
        bound = 1 / math.sqrt(fan_in)
        nn.init.uniform_(b_in, -bound, bound)

        # put back current bias and weights into newly initiliazed layers
        b_in[:-n_new] = bias[1]
        w_in[:-n_new, :] = current[layer]
        if layer == 2:
            w_out[:, :-n_new] = current_fc3
        else:
            w_out[:, :-n_new] = current[layer + 1]

        # create the parameters again
        self.fcs[layer].bias = nn.Parameter(b_in)
        self.fcs[layer].weight = nn.Parameter(w_in)
        if layer == 2:
            self.fc3.weight = nn.Parameter(w_out)
        else:
            self.fcs[layer + 1].weight = nn.Parameter(w_out)

        # need to send all the data to GPU again
        self.fcs.to(device)
        if layer == 2:
            self.fc3.to(device)

        if return_idx and (n_replace > 0):
            return idx


データロードを定義する

In [None]:
trainval_dataset = datasets.CIFAR10('./data/cifar10', train=True, transform=transforms.ToTensor())


# 前処理を定義
transform = transforms.Compose([transforms.ToTensor()])

trainval_dataset = datasets.CIFAR10('../data/cifar10', train=True, transform=transform)

# trainとvalidに分割
train_dataset, val_dataset = torch.utils.data.random_split(trainval_dataset, [len(trainval_dataset)-10000, 10000])

dataloader_train = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True
)

dataloader_valid = torch.utils.data.DataLoader(
    val_dataset,
    batch_size=batch_size,
    shuffle=True
)

print("Train data number:{}, Valid data number: {}".format(len(train_dataset), len(val_dataset)))

Train data number:40000, Valid data number: 10000


In [None]:
for epoch in range(n_epochs):
    losses_train = []  # 訓練誤差を格納しておくリスト
    losses_valid = []  # 検証データの誤差を格納しておくリスト

    NgnCnn.train()  # 訓練モードにする
    n_train = 0  # 訓練データ数
    acc_train = 0  # 訓練データに対する精度
    for x, t in dataloader_train:
        n_train += t.size()[0]

        conv_net.zero_grad()  # 勾配の初期化

        x = x.to(device)  # テンソルをGPUに移動

        t_hot = torch.eye(10)[t]  # 正解ラベルをone-hot vector化

        t = t.to(device)
        t_hot = t_hot.to(device)  # 正解ラベルとone-hot vectorをそれぞれGPUに移動

        y = conv_net.forward(x)  # 順伝播

        loss = -(t_hot*torch.log_softmax(y, dim=-1)).sum(axis=1).mean()  # 誤差(クロスエントロピー誤差関数)の計算

        loss.backward()  # 誤差の逆伝播

        optimizer.step()  # パラメータの更新

        pred = y.argmax(1)  # 最大値を取るラベルを予測ラベルとする

        acc_train += (pred == t).float().sum().item()
        losses_train.append(loss.tolist())

    conv_net.eval()  # 評価モードにする
    n_val = 0
    acc_val = 0
    for x, t in dataloader_valid:
        n_val += t.size()[0]

        x = x.to(device)  # テンソルをGPUに移動

        t_hot = torch.eye(10)[t]  # 正解ラベルをone-hot vector化

        t = t.to(device)
        t_hot = t_hot.to(device)  # 正解ラベルとone-hot vectorをそれぞれGPUに移動

        y = conv_net.forward(x)  # 順伝播

        loss = -(t_hot*torch.log_softmax(y, dim=-1)).sum(axis=1).mean()  # 誤差(クロスエントロピー誤差関数)の計算

        pred = y.argmax(1)  # 最大値を取るラベルを予測ラベルとする

        acc_val += (pred == t).float().sum().item()
        losses_valid.append(loss.tolist())

    print('EPOCH: {}, Train [Loss: {:.3f}, Accuracy: {:.3f}], Valid [Loss: {:.3f}, Accuracy: {:.3f}]'.format(
        epoch,
        np.mean(losses_train),
        acc_train/n_train,
        np.mean(losses_valid),
        acc_val/n_val
    ))

TypeError: Module.train() missing 1 required positional argument: 'self'