In [15]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.autograd as autograd
import torch.nn.functional as F
from torchvision import datasets, transforms, models
import matplotlib.pyplot as plt
import os
from sklearn.utils import shuffle
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [16]:
config={
    'BatchSize':128,
    'seed':42,
    'n_epochs' : 50,
    'lr' : 0.01
}

seed = 42
torch.manual_seed(seed)

<torch._C.Generator at 0x7f13f7fc0c30>

In [17]:

trainval_dataset = datasets.CIFAR10('../data/cifar10', train=True,download=True,transform=transforms.ToTensor())

# 前処理を定義
transform = transforms.Compose([transforms.ToTensor()])

trainval_dataset = datasets.CIFAR10('../data/cifar10', train=True, transform=transform)

# trainとvalidに分割
train_dataset, val_dataset = torch.utils.data.random_split(trainval_dataset, [len(trainval_dataset)-10000, 10000],generator=torch.Generator().manual_seed(config['seed']))

dataloader_train = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=config['BatchSize'],
    shuffle=True
)

dataloader_valid = torch.utils.data.DataLoader(
    val_dataset,
    batch_size=config['BatchSize'],
    shuffle=True
)

print("Train data number:{}, Valid data number: {}".format(len(train_dataset), len(val_dataset)))

Files already downloaded and verified
Train data number:40000, Valid data number: 10000


In [18]:
class Dropout(nn.Module):
    def __init__(self, dropout_ratio=0.5):
        super().__init__()
        self.dropout_ratio = dropout_ratio
        self.mask = None

    def forward(self, x):
        if self.training:
            self.mask = torch.rand(*x.size()) > self.dropout_ratio
            return x * self.mask.to(x.device)
        else:
            return x * (1.0 - self.dropout_ratio)

In [19]:

class DropOutModel(nn.Module):
    def __init__(self):
        super(DropOutModel, self).__init__()
        self.conv1=nn.Conv2d(3, 32, 3)       # 32x32x3 -> 30x30x32
        self.bn1=nn.BatchNorm2d(32)
        self.av1=nn.ReLU()
        self.pool1=nn.AvgPool2d(2)                  # 30x30x32 -> 15x15x32
        self.conv2=nn.Conv2d(32, 64, 3)             # 15x15x32 -> 13x13x64
        self.bn2=nn.BatchNorm2d(64)
        self.av2=nn.ReLU()
        self.pool2=nn.AvgPool2d(2)                  # 13x13x64 -> 6x6x64
        self.conv3=nn.Conv2d(64, 128, 3)            # 6x6x64 -> 4x4x128
        self.bn3=nn.BatchNorm2d(128)
        self.av3=nn.ReLU()
        self.pool3=nn.AvgPool2d(2)                  # 4x4x128 -> 2x2x128
        self.flatten=nn.Flatten()
        self.fc1=nn.Linear(2*2*128, 256)
        self.relu=nn.ReLU()
        self.dropout=Dropout(0.5)
        self.fc2=nn.Linear(256, 10)
        #self.fc1 = nn.Linear(input_size, hidden_size)
        #self.relu = nn.ReLU()
        #self.fc2 = nn.Linear(hidden_size, output_size)
        

    def forward(self, x):
        x=self.conv1(x)
        x=self.bn1(x)
        x=self.av1(x)
        x=self.pool1(x)
        x=self.conv2(x)
        x=self.bn2(x)
        x=self.av2(x)
        x=self.pool2(x)     
        x=self.conv3(x)
        x=self.bn3(x)
        x=self.av3(x)
        x=self.pool3(x)  
        x=self.flatten(x)
        x=self.fc1(x)
        x=self.relu(x)
        x=self.dropout(x)
        x=self.fc2(x)
        return x

model=DropOutModel()


In [20]:

def init_weights(m):  # Heの初期化
    if type(m) == nn.Linear or type(m) == nn.Conv2d:
        torch.nn.init.kaiming_normal_(m.weight)
        m.bias.data.fill_(0.0)

model.apply(init_weights)

device='cuda'
model.to(device)
optimizer2 = optim.Adam(model.parameters(), lr=config['lr'])
loss_function = nn.CrossEntropyLoss() 

In [21]:
for epoch in range(config['n_epochs']):
    losses_train = []
    losses_valid = []

    model.train()
    n_train = 0
    acc_train = 0
    for x, t in dataloader_train:
        n_train += t.size()[0]

        model.zero_grad()  # 勾配の初期化

        x = x.to(device)  # テンソルをGPUに移動
        t = t.to(device)

        y = model.forward(x)  # 順伝播


        loss = loss_function(y, t)  # 誤差(クロスエントロピー誤差関数)の計算

        loss.backward()  # 誤差の逆伝播

        optimizer2.step()  # パラメータの更新

        pred = y.argmax(1)  # 最大値を取るラベルを予測ラベルとする

        acc_train += (pred == t).float().sum().item()
        losses_train.append(loss.tolist())

    model.eval()
    n_val = 0
    acc_val = 0
    for x, t in dataloader_valid:
        n_val += t.size()[0]

        x = x.to(device)  # テンソルをGPUに移動
        t = t.to(device)

        y = model.forward(x)  # 順伝播

        loss = loss_function(y, t)  # 誤差(クロスエントロピー誤差関数)の計算

        pred = y.argmax(1)  # 最大値を取るラベルを予測ラベルとする

        acc_val += (pred == t).float().sum().item()
        losses_valid.append(loss.tolist())

    print('EPOCH: {}, Train [Loss: {:.3f}, Accuracy: {:.3f}], Valid [Loss: {:.3f}, Accuracy: {:.3f}]]'.format(
        epoch+1,
        np.mean(losses_train),
        acc_train/n_train,
        np.mean(losses_valid),
        acc_val/n_val,
        #{layer: getattr(model, layer).removed_neurons for layer in ['neurogenesis']}
    ))

EPOCH: 1, Train [Loss: 1.573, Accuracy: 0.424], Valid [Loss: 3.219, Accuracy: 0.274]]
EPOCH: 2, Train [Loss: 1.233, Accuracy: 0.567], Valid [Loss: 1.578, Accuracy: 0.486]]
EPOCH: 3, Train [Loss: 1.050, Accuracy: 0.637], Valid [Loss: 1.106, Accuracy: 0.617]]
EPOCH: 4, Train [Loss: 0.918, Accuracy: 0.684], Valid [Loss: 0.968, Accuracy: 0.662]]
EPOCH: 5, Train [Loss: 0.822, Accuracy: 0.720], Valid [Loss: 1.065, Accuracy: 0.653]]
EPOCH: 6, Train [Loss: 0.755, Accuracy: 0.745], Valid [Loss: 0.860, Accuracy: 0.713]]
EPOCH: 7, Train [Loss: 0.698, Accuracy: 0.765], Valid [Loss: 0.825, Accuracy: 0.716]]
EPOCH: 8, Train [Loss: 0.647, Accuracy: 0.781], Valid [Loss: 1.277, Accuracy: 0.643]]
EPOCH: 9, Train [Loss: 0.604, Accuracy: 0.797], Valid [Loss: 0.749, Accuracy: 0.750]]
EPOCH: 10, Train [Loss: 0.567, Accuracy: 0.805], Valid [Loss: 0.795, Accuracy: 0.736]]
EPOCH: 11, Train [Loss: 0.537, Accuracy: 0.816], Valid [Loss: 0.898, Accuracy: 0.724]]
EPOCH: 12, Train [Loss: 0.495, Accuracy: 0.831], Val

In [22]:
import numpy as np

def save_weights(model, filename):
    weights = {}

    # モデルの各層の名前とパラメータを取得
    for name, param in model.named_parameters():
        if 'weight' in name:
            weights[name] = param.detach().cpu().numpy()

    # .npz ファイルとして出力
    np.savez(filename, **weights)

save_weights(model, 'weights.npz')
data = np.load('weights.npz')

for key in data.keys():
    print(key, data[key].shape)

conv1.weight (32, 3, 3, 3)
bn1.weight (32,)
conv2.weight (64, 32, 3, 3)
bn2.weight (64,)
conv3.weight (128, 64, 3, 3)
bn3.weight (128,)
fc1.weight (256, 512)
fc2.weight (10, 256)


In [23]:

state_dict = model.state_dict()
print(state_dict)

OrderedDict([('conv1.weight', tensor([[[[ 4.5266e-01, -3.9832e-01, -4.2274e-01],
          [ 1.2405e+00,  4.6666e-01, -1.3328e+00],
          [-1.7021e-02,  1.7839e-01, -1.0648e-01]],

         [[ 8.8445e-01, -3.2774e-01, -1.1558e+00],
          [ 2.1127e+00, -1.0511e-01, -2.1254e+00],
          [ 4.7534e-01,  4.5148e-01, -9.5908e-01]],

         [[ 1.5952e+00,  1.1127e-01, -1.3915e+00],
          [ 2.0461e+00, -4.3525e-01, -2.3586e+00],
          [ 1.1171e+00,  3.9700e-01, -1.0064e+00]]],


        [[[ 8.3696e-01,  9.3401e-01,  1.0043e+00],
          [-1.7085e+00, -1.8642e+00, -1.4535e+00],
          [-1.3987e+00, -1.0372e+00, -1.3609e+00]],

         [[ 1.4679e+00,  1.5641e+00,  1.4289e+00],
          [ 4.9815e-01,  1.7946e-02, -3.5232e-02],
          [ 1.2677e+00,  1.8333e+00,  1.0132e+00]],

         [[-7.3286e-02, -8.7000e-01, -1.9758e+00],
          [-4.6702e-01, -1.3841e+00, -1.4442e+00],
          [ 1.2090e+00,  1.2501e+00,  1.3980e-01]]],


        [[[ 1.4314e+00,  1.9157e+00,

In [24]:
conv_net2 = nn.Sequential(
    nn.Conv2d(3, 32, 3),              # 32x32x3 -> 30x30x32
    nn.BatchNorm2d(32),
    nn.ReLU(),
    nn.AvgPool2d(2),                  # 30x30x32 -> 15x15x32
    nn.Conv2d(32, 64, 3),             # 15x15x32 -> 13x13x64
    nn.BatchNorm2d(64),
    nn.ReLU(),
    nn.AvgPool2d(2),                  # 13x13x64 -> 6x6x64
    nn.Conv2d(64, 128, 3),            # 6x6x64 -> 4x4x128
    nn.BatchNorm2d(128),
    nn.ReLU(),
    nn.AvgPool2d(2),                  # 4x4x128 -> 2x2x128
    nn.Flatten(),
    nn.Linear(2*2*128, 256),
    nn.ReLU(),
    nn.Linear(256, 10)
)


def init_weights(m):  # Heの初期化
    if type(m) == nn.Linear or type(m) == nn.Conv2d:
        torch.nn.init.kaiming_normal_(m.weight)
        m.bias.data.fill_(0.0)


conv_net2.apply(init_weights)

batch_size = 100
n_epochs = 5
lr = 0.01
device = 'cuda'

conv_net2.to(device)
optimizer2 = optim.Adam(conv_net2.parameters(), lr=lr)
loss_function = nn.CrossEntropyLoss()  # nn.ClossEntropyLossは，出力のsoftmax変換と，正解ラベルのone-hot vector化の機能を持っている

In [25]:
for epoch in range(config['n_epochs']):
    losses_train = []
    losses_valid = []

    conv_net2.train()
    n_train = 0
    acc_train = 0
    for x, t in dataloader_train:
        n_train += t.size()[0]

        conv_net2.zero_grad()  # 勾配の初期化

        x = x.to(device)  # テンソルをGPUに移動
        t = t.to(device)

        y = conv_net2.forward(x)  # 順伝播

        loss = loss_function(y, t)  # 誤差(クロスエントロピー誤差関数)の計算

        loss.backward()  # 誤差の逆伝播

        optimizer2.step()  # パラメータの更新

        pred = y.argmax(1)  # 最大値を取るラベルを予測ラベルとする

        acc_train += (pred == t).float().sum().item()
        losses_train.append(loss.tolist())

    conv_net2.eval()
    n_val = 0
    acc_val = 0
    for x, t in dataloader_valid:
        n_val += t.size()[0]

        x = x.to(device)  # テンソルをGPUに移動
        t = t.to(device)

        y = conv_net2.forward(x)  # 順伝播

        loss = loss_function(y, t)  # 誤差(クロスエントロピー誤差関数)の計算

        pred = y.argmax(1)  # 最大値を取るラベルを予測ラベルとする

        acc_val += (pred == t).float().sum().item()
        losses_valid.append(loss.tolist())

    print('EPOCH: {}, Train [Loss: {:.3f}, Accuracy: {:.3f}], Valid [Loss: {:.3f}, Accuracy: {:.3f}]'.format(
        epoch+1,
        np.mean(losses_train),
        acc_train/n_train,
        np.mean(losses_valid),
        acc_val/n_val
    ))

EPOCH: 1, Train [Loss: 1.529, Accuracy: 0.449], Valid [Loss: 1.575, Accuracy: 0.441]
EPOCH: 2, Train [Loss: 1.078, Accuracy: 0.616], Valid [Loss: 1.297, Accuracy: 0.565]
EPOCH: 3, Train [Loss: 0.894, Accuracy: 0.686], Valid [Loss: 1.018, Accuracy: 0.648]
EPOCH: 4, Train [Loss: 0.764, Accuracy: 0.734], Valid [Loss: 1.143, Accuracy: 0.620]
EPOCH: 5, Train [Loss: 0.671, Accuracy: 0.765], Valid [Loss: 1.802, Accuracy: 0.507]
EPOCH: 6, Train [Loss: 0.605, Accuracy: 0.789], Valid [Loss: 0.817, Accuracy: 0.712]
EPOCH: 7, Train [Loss: 0.542, Accuracy: 0.811], Valid [Loss: 0.980, Accuracy: 0.677]
EPOCH: 8, Train [Loss: 0.487, Accuracy: 0.832], Valid [Loss: 0.919, Accuracy: 0.698]
EPOCH: 9, Train [Loss: 0.440, Accuracy: 0.847], Valid [Loss: 0.790, Accuracy: 0.737]
EPOCH: 10, Train [Loss: 0.394, Accuracy: 0.862], Valid [Loss: 0.910, Accuracy: 0.723]
EPOCH: 11, Train [Loss: 0.370, Accuracy: 0.870], Valid [Loss: 1.792, Accuracy: 0.597]
EPOCH: 12, Train [Loss: 0.323, Accuracy: 0.885], Valid [Loss: 1

In [26]:
import torchvision
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transforms.ToTensor() )
testloader = torch.utils.data.DataLoader(testset, batch_size=4, shuffle=True, num_workers=2)

correct = 0
total = 0
# 勾配を記憶せず（学習せずに）に計算を行う
with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device),data[1].to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
print('Accuracy of the network on the 10000 test images: %d %%' % (100 * correct / total))



Files already downloaded and verified
Accuracy of the network on the 10000 test images: 74 %


In [27]:
import torchvision
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transforms.ToTensor() )
testloader = torch.utils.data.DataLoader(testset, batch_size=4, shuffle=True, num_workers=2)

correct = 0
total = 0
# 勾配を記憶せず（学習せずに）に計算を行う
with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = conv_net2(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
print('Accuracy of the network on the 10000 test images: %d %%' % (100 * correct / total))


Files already downloaded and verified
Accuracy of the network on the 10000 test images: 74 %
