### modelを作るclassの中に関数を入れて実行するタイプのもの

In [9]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.autograd as autograd
import torch.nn.functional as F
from torchvision import datasets, transforms, models
import matplotlib.pyplot as plt
import os
from sklearn.utils import shuffle
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score



In [10]:
config={
    'BatchSize':128,
    'seed':42,
    'n_epochs' : 50,
    'lr' : 0.01
}


cifar-10のデータをロードするだけの項

In [11]:

trainval_dataset = datasets.CIFAR10('../data/cifar10', train=True,download=True,transform=transforms.ToTensor())

# 前処理を定義
transform = transforms.Compose([transforms.ToTensor()])

trainval_dataset = datasets.CIFAR10('../data/cifar10', train=True, transform=transform)

# trainとvalidに分割
train_dataset, val_dataset = torch.utils.data.random_split(trainval_dataset, [len(trainval_dataset)-10000, 10000],generator=torch.Generator().manual_seed(config['seed']))

dataloader_train = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=config['BatchSize'],
    shuffle=True
)

dataloader_valid = torch.utils.data.DataLoader(
    val_dataset,
    batch_size=config['BatchSize'],
    shuffle=True
)

print("Train data number:{}, Valid data number: {}".format(len(train_dataset), len(val_dataset)))

Files already downloaded and verified
Train data number:40000, Valid data number: 10000


In [12]:
class Dropout(nn.Module):
    def __init__(self, dropout_ratio=0.5):
        super().__init__()
        self.dropout_ratio = dropout_ratio
        self.mask = None

    def forward(self, x):
        if self.training:
            self.mask = torch.rand(*x.size()) > self.dropout_ratio
            return x * self.mask.to(x.device)
        else:
            return x * (1.0 - self.dropout_ratio)

In [13]:

def neurogenesis(self):
        # 重みが閾値未満のニューロンを削除
    neurons_to_remove = []
    for name, param in self.named_parameters():
        if 'weight' in name:
            small_weights = torch.abs(param) < self.threshold
            neurons_to_remove.extend(torch.nonzero(small_weights, as_tuple=False))
                
            return print(f"Removed {len(neurons_to_remove)} neurons.")
        
        # 新しいニューロンを追加
    for _ in range(len(neurons_to_remove)):
        self.fc1.in_features += 1
        new_neuron = torch.randn(1, self.fc1.in_features)
        self.fc1.weight = nn.Parameter(torch.cat((self.fc1.weight, new_neuron), dim=0))
            
        return print(f"Model after neurogenesis: {self}")


nn.Moduleはカスタマイズしたレイヤーを作ることができる。foward関数を定義する必要がある。なお、backwardに関しては自動で実行してくれる。<br>
'__init__' コンストラクタの中でサブモジュールを定義する。forward関数の中で使用できるように定義する。サブモジュールを使用する場合はsuper().__init__()を実行する必要あり。<br>
パターンとして二つのものがある。
1. 学習が収束しかけているときに学習のパラメータの中で値が小さいものを消して新しく重みを持たせるもの<br>
1. 最初は情報が流れないようにして学習の後期で情報を流すようにするもの<br>
2の方はDropOutよろしく、はじめはFalseでニューロンのパラメータを固定しつつも、最終的に全てTrueにして情報を流す。DropOutのrand(*x.size)を書き換えて固定のもので済ませるようにするだけでいい。。。。？←とりあえずの実装はできている。
epochをある程度回してみて、閾値がある程度未満のニューロンの勾配情報を保存して、パラメータを維持、ある程度が経ったら学習に再度加入させるなど。

- しないといけないこと
パラメータを可視化すること。tensorbordで学習率など実験の経過を保存するようにする。


新しいニューロンを追加するとき（ニューロンの重みを変更するとき）にランダムで発生させる値はThreshholdを超えていないと<br>
もう１epoch回したときにまた同じニューロンが指定されてランダムな値を持つことになる。
ネックはアーキテクチャを変えずにどうやってニューロンを追加するのか。今はTropOutよろしくTrueかFalseを掛け合わせることでデータを流すかどうか決めている<br>
この下のコードは、取り除かれたニューロンの数を表示する機能を備えているクラス

DropOutとは違って、消したニューロンを物理的に組み替えているので、ratioを変更する必要はない
- 参考資料　https://qiita.com/kuroitu/items/d1bed8c216950d87be74

In [14]:
class NeurogenesisModel(nn.Module):
    def __init__(self):
        super(NeurogenesisModel, self).__init__()
        self.conv1=nn.Conv2d(3, 32, 3)       # 32x32x3 -> 30x30x32
        self.bn1=nn.BatchNorm2d(32)
        self.av1=nn.ReLU()
        self.pool1=nn.AvgPool2d(2)                  # 30x30x32 -> 15x15x32
        self.conv2=nn.Conv2d(32, 64, 3)             # 15x15x32 -> 13x13x64
        self.bn2=nn.BatchNorm2d(64)
        self.av2=nn.ReLU()
        self.pool2=nn.AvgPool2d(2)                  # 13x13x64 -> 6x6x64
        self.conv3=nn.Conv2d(64, 128, 3)            # 6x6x64 -> 4x4x128
        self.bn3=nn.BatchNorm2d(128)
        self.av3=nn.ReLU()
        self.pool3=nn.AvgPool2d(2)                  # 4x4x128 -> 2x2x128
        self.flatten=nn.Flatten()
        self.fc1=nn.Linear(2*2*128, 256)
        self.relu=nn.ReLU()
        self.fc2=nn.Linear(256, 10)
        #self.fc1 = nn.Linear(input_size, hidden_size)
        #self.relu = nn.ReLU()
        #self.fc2 = nn.Linear(hidden_size, output_size)
        


        # ニューロジェネシスのパラメータ
        self.threshold = 0.0001 # 重みの閾値
        #self.new_neuron_count = 10  # 新しいニューロンの数
        
    def forward(self, x):
        x=self.conv1(x)
        x=self.bn1(x)
        x=self.av1(x)
        x=self.pool1(x)
        x=self.conv2(x)
        x=self.bn2(x)
        x=self.av2(x)
        x=self.pool2(x)     
        x=self.conv3(x)
        x=self.bn3(x)
        x=self.av3(x)
        x=self.pool3(x)  
        x=self.flatten(x)
        x=self.fc1(x)
        x=self.relu(x)
        x=self.fc2(x)
        return x
    
    def neurogenesis(self):
        # 重みが閾値未満のニューロンを削除
        neurons_to_remove = []
        for name, param in self.named_parameters():
            if 'weight' in name:
                small_weights = torch.abs(param) < self.threshold
                neurons_to_remove.extend(torch.nonzero(small_weights, as_tuple=False))
                
                return print(f"Removed {len(neurons_to_remove)} neurons.")
        
        # 新しいニューロンを追加
        for _ in range(len(neurons_to_remove)):
            self.fc1.in_features += 1
            new_neuron = torch.randn(1, self.fc1.in_features)
            self.fc1.weight = nn.Parameter(torch.cat((self.fc1.weight, new_neuron), dim=0))
            
            return print(f"Model after neurogenesis: {self}")
        

# モデルのインスタンス化
model = NeurogenesisModel()
print("Initial model:", model)


Initial model: NeurogenesisModel(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1))
  (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (av1): ReLU()
  (pool1): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (av2): ReLU()
  (pool2): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1))
  (bn3): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (av3): ReLU()
  (pool3): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc1): Linear(in_features=512, out_features=256, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=256, out_features=10, bias=True)
)


In [15]:

def init_weights(m):  # Heの初期化
    if type(m) == nn.Linear or type(m) == nn.Conv2d:
        torch.nn.init.kaiming_normal_(m.weight)
        m.bias.data.fill_(0.0)

model.apply(init_weights)

device='cuda'
model.to(device)
optimizer2 = optim.Adam(model.parameters(), lr=config['lr'])
loss_function = nn.CrossEntropyLoss() 

## issues
1. thresholdを変更してもremoved neuronの数が変わらない。classの定義の仕方に問題かも。<br>neuronの数が14000桁ほどあるが、これは何のどのニューロンなのかを明示する

In [16]:
for epoch in range(config['n_epochs']):
    losses_train = []
    losses_valid = []

    model.train()
    n_train = 0
    acc_train = 0
    for x, t in dataloader_train:
        n_train += t.size()[0]

        model.zero_grad()  # 勾配の初期化

        x = x.to(device)  # テンソルをGPUに移動
        t = t.to(device)

        y = model.forward(x)  # 順伝播


        loss = loss_function(y, t)  # 誤差(クロスエントロピー誤差関数)の計算

        loss.backward()  # 誤差の逆伝播

        optimizer2.step()  # パラメータの更新

        pred = y.argmax(1)  # 最大値を取るラベルを予測ラベルとする

        acc_train += (pred == t).float().sum().item()
        losses_train.append(loss.tolist())

    model.eval()
    n_val = 0
    acc_val = 0
    for x, t in dataloader_valid:
        n_val += t.size()[0]

        x = x.to(device)  # テンソルをGPUに移動
        t = t.to(device)

        y = model.forward(x)  # 順伝播

        loss = loss_function(y, t)  # 誤差(クロスエントロピー誤差関数)の計算

        pred = y.argmax(1)  # 最大値を取るラベルを予測ラベルとする

        acc_val += (pred == t).float().sum().item()
        losses_valid.append(loss.tolist())

    print('EPOCH: {}, Train [Loss: {:.3f}, Accuracy: {:.3f}], Valid [Loss: {:.3f}, Accuracy: {:.3f}]]'.format(
        epoch+1,
        np.mean(losses_train),
        acc_train/n_train,
        np.mean(losses_valid),
        acc_val/n_val,
    ))

EPOCH: 1, Train [Loss: 1.602, Accuracy: 0.435], Valid [Loss: 1.304, Accuracy: 0.534]]
EPOCH: 2, Train [Loss: 1.117, Accuracy: 0.602], Valid [Loss: 1.209, Accuracy: 0.584]]
EPOCH: 3, Train [Loss: 0.916, Accuracy: 0.676], Valid [Loss: 1.070, Accuracy: 0.629]]
EPOCH: 4, Train [Loss: 0.792, Accuracy: 0.721], Valid [Loss: 0.928, Accuracy: 0.678]]
EPOCH: 5, Train [Loss: 0.698, Accuracy: 0.758], Valid [Loss: 0.989, Accuracy: 0.659]]
EPOCH: 6, Train [Loss: 0.621, Accuracy: 0.783], Valid [Loss: 0.842, Accuracy: 0.719]]
EPOCH: 7, Train [Loss: 0.565, Accuracy: 0.801], Valid [Loss: 0.967, Accuracy: 0.692]]
EPOCH: 8, Train [Loss: 0.512, Accuracy: 0.820], Valid [Loss: 0.882, Accuracy: 0.712]]
EPOCH: 9, Train [Loss: 0.456, Accuracy: 0.840], Valid [Loss: 0.862, Accuracy: 0.716]]
EPOCH: 10, Train [Loss: 0.427, Accuracy: 0.850], Valid [Loss: 0.836, Accuracy: 0.741]]
EPOCH: 11, Train [Loss: 0.384, Accuracy: 0.863], Valid [Loss: 0.991, Accuracy: 0.711]]
EPOCH: 12, Train [Loss: 0.350, Accuracy: 0.875], Val

In [17]:
import numpy as np

def save_weights(model, filename):
    weights = {}

    # モデルの各層の名前とパラメータを取得
    for name, param in model.named_parameters():
        if 'weight' in name:
            weights[name] = param.detach().cpu().numpy()

    # .npz ファイルとして出力
    np.savez(filename, **weights)

save_weights(model, 'weights.npz')
data = np.load('weights.npz')

for key in data.keys():
    print(key, data[key].shape)

conv1.weight (32, 3, 3, 3)
bn1.weight (32,)
conv2.weight (64, 32, 3, 3)
bn2.weight (64,)
conv3.weight (128, 64, 3, 3)
bn3.weight (128,)
fc1.weight (256, 512)
fc2.weight (10, 256)


In [18]:
'''
def print_weights(model):
    # モデルの各層の名前とパラメータを取得
    for name, param in model.named_parameters():
        if 'weight' in name:
            print(f"{name} の重み:")
            print(param.detach().cpu().numpy())
            print()

print_weights(model)
'''
state_dict = model.state_dict()
print(state_dict)

OrderedDict([('conv1.weight', tensor([[[[ 2.9807e-01, -8.0740e-01,  8.1319e-01],
          [ 6.4374e-01, -1.6776e+00,  6.5143e-01],
          [ 9.2795e-01, -1.9483e+00,  1.0461e+00]],

         [[ 5.8032e-01, -1.2153e+00, -1.2455e-02],
          [ 1.1678e+00, -1.7014e+00,  1.0983e+00],
          [ 1.1662e+00, -1.9621e+00,  8.8394e-01]],

         [[ 4.0767e-01, -8.6512e-01,  4.7141e-01],
          [ 6.8499e-01, -1.6017e+00,  1.2108e+00],
          [ 8.7292e-01, -1.6919e+00,  5.4589e-01]]],


        [[[-1.7178e+00, -1.0029e+00, -1.2142e+00],
          [ 6.3381e-03,  1.0226e+00,  7.4499e-01],
          [ 1.2770e+00,  1.9234e+00,  1.1627e+00]],

         [[-4.0283e-01, -2.4492e-01, -8.5522e-01],
          [ 1.1750e+00,  6.3850e-01,  1.8041e-01],
          [ 8.2741e-01,  8.5648e-01, -2.8900e-01]],

         [[ 1.7404e+00,  1.0630e+00,  3.9262e-01],
          [ 1.0721e+00,  8.5329e-01,  4.2826e-01],
          [-1.1695e+00, -1.0425e+00, -1.4219e+00]]],


        [[[ 1.1408e+00,  2.3060e+00,

In [20]:
conv_net2 = nn.Sequential(
    nn.Conv2d(3, 32, 3),              # 32x32x3 -> 30x30x32
    nn.BatchNorm2d(32),
    nn.ReLU(),
    nn.AvgPool2d(2),                  # 30x30x32 -> 15x15x32
    nn.Conv2d(32, 64, 3),             # 15x15x32 -> 13x13x64
    nn.BatchNorm2d(64),
    nn.ReLU(),
    nn.AvgPool2d(2),                  # 13x13x64 -> 6x6x64
    nn.Conv2d(64, 128, 3),            # 6x6x64 -> 4x4x128
    nn.BatchNorm2d(128),
    nn.ReLU(),
    nn.AvgPool2d(2),                  # 4x4x128 -> 2x2x128
    nn.Flatten(),
    nn.Linear(2*2*128, 256),
    nn.ReLU(),
    nn.Linear(256, 10)
)


def init_weights(m):  # Heの初期化
    if type(m) == nn.Linear or type(m) == nn.Conv2d:
        torch.nn.init.kaiming_normal_(m.weight)
        m.bias.data.fill_(0.0)


conv_net2.apply(init_weights)

batch_size = 100
n_epochs = 5
lr = 0.01
device = 'cuda'

conv_net2.to(device)
optimizer2 = optim.Adam(conv_net2.parameters(), lr=lr)
loss_function = nn.CrossEntropyLoss()  # nn.ClossEntropyLossは，出力のsoftmax変換と，正解ラベルのone-hot vector化の機能を持っている

In [21]:
for epoch in range(config['n_epochs']):
    losses_train = []
    losses_valid = []

    conv_net2.train()
    n_train = 0
    acc_train = 0
    for x, t in dataloader_train:
        n_train += t.size()[0]

        conv_net2.zero_grad()  # 勾配の初期化

        x = x.to(device)  # テンソルをGPUに移動
        t = t.to(device)

        y = conv_net2.forward(x)  # 順伝播

        loss = loss_function(y, t)  # 誤差(クロスエントロピー誤差関数)の計算

        loss.backward()  # 誤差の逆伝播

        optimizer2.step()  # パラメータの更新

        pred = y.argmax(1)  # 最大値を取るラベルを予測ラベルとする

        acc_train += (pred == t).float().sum().item()
        losses_train.append(loss.tolist())

    conv_net2.eval()
    n_val = 0
    acc_val = 0
    for x, t in dataloader_valid:
        n_val += t.size()[0]

        x = x.to(device)  # テンソルをGPUに移動
        t = t.to(device)

        y = conv_net2.forward(x)  # 順伝播

        loss = loss_function(y, t)  # 誤差(クロスエントロピー誤差関数)の計算

        pred = y.argmax(1)  # 最大値を取るラベルを予測ラベルとする

        acc_val += (pred == t).float().sum().item()
        losses_valid.append(loss.tolist())

    print('EPOCH: {}, Train [Loss: {:.3f}, Accuracy: {:.3f}], Valid [Loss: {:.3f}, Accuracy: {:.3f}]'.format(
        epoch+1,
        np.mean(losses_train),
        acc_train/n_train,
        np.mean(losses_valid),
        acc_val/n_val
    ))

EPOCH: 1, Train [Loss: 1.540, Accuracy: 0.452], Valid [Loss: 1.735, Accuracy: 0.410]
EPOCH: 2, Train [Loss: 1.049, Accuracy: 0.628], Valid [Loss: 1.374, Accuracy: 0.558]
EPOCH: 3, Train [Loss: 0.852, Accuracy: 0.701], Valid [Loss: 1.014, Accuracy: 0.647]
EPOCH: 4, Train [Loss: 0.732, Accuracy: 0.745], Valid [Loss: 1.085, Accuracy: 0.638]
EPOCH: 5, Train [Loss: 0.643, Accuracy: 0.777], Valid [Loss: 0.775, Accuracy: 0.732]
EPOCH: 6, Train [Loss: 0.579, Accuracy: 0.798], Valid [Loss: 0.871, Accuracy: 0.712]
EPOCH: 7, Train [Loss: 0.512, Accuracy: 0.820], Valid [Loss: 0.955, Accuracy: 0.689]
EPOCH: 8, Train [Loss: 0.467, Accuracy: 0.838], Valid [Loss: 1.220, Accuracy: 0.635]
EPOCH: 9, Train [Loss: 0.422, Accuracy: 0.852], Valid [Loss: 1.023, Accuracy: 0.687]
EPOCH: 10, Train [Loss: 0.369, Accuracy: 0.871], Valid [Loss: 1.043, Accuracy: 0.694]
EPOCH: 11, Train [Loss: 0.348, Accuracy: 0.881], Valid [Loss: 0.887, Accuracy: 0.732]
EPOCH: 12, Train [Loss: 0.326, Accuracy: 0.885], Valid [Loss: 1

In [22]:
import torchvision
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transforms.ToTensor() )
testloader = torch.utils.data.DataLoader(testset, batch_size=4, shuffle=True, num_workers=2)

correct = 0
total = 0
# 勾配を記憶せず（学習せずに）に計算を行う
with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device),data[1].to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
print('Accuracy of the network on the 10000 test images: %d %%' % (100 * correct / total))



Files already downloaded and verified
Accuracy of the network on the 10000 test images: 73 %


In [23]:
import torchvision
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transforms.ToTensor() )
testloader = torch.utils.data.DataLoader(testset, batch_size=4, shuffle=True, num_workers=2)

correct = 0
total = 0
# 勾配を記憶せず（学習せずに）に計算を行う
with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = conv_net2(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
print('Accuracy of the network on the 10000 test images: %d %%' % (100 * correct / total))


Files already downloaded and verified
Accuracy of the network on the 10000 test images: 72 %
