PyTorchでは、テンソルの勾配に対してさらに逆伝播を行うことが可能です。これは、勾配自体が別の計算グラフの一部として扱われ、その勾配を計算する際に役立ちます。このような操作を行うためには、torch.autograd.grad関数を使用し、create_graph=Trueを指定して計算グラフを構築する必要はある

In [None]:
import torch

# 入力テンソルの定義
x = torch.tensor([2.0], requires_grad=True)

# 順伝播の計算
y = x ** 2  # y = x^2

# yに対するxの勾配を計算
grad_y = torch.autograd.grad(y, x, create_graph=True)[0]

# grad_yに対するxの勾配を計算
grad2_y = torch.autograd.grad(grad_y, x)[0]

print(f"x: {x.item()}")
print(f"y: {y.item()}")
print(f"dy/dx: {grad_y.item()}")
print(f"d^2y/dx^2: {grad2_y.item()}")

x: 2.0
y: 4.0
dy/dx: 4.0
d^2y/dx^2: 2.0


# 2次元の簡易的に作成したテンソルで実験


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter

# TensorBoardの設定
writer = SummaryWriter(log_dir="1tensor")

# データの生成
torch.manual_seed(42)  # 再現性のためのシード値
x1 = torch.linspace(0, 1, 100, dtype=torch.float32)  # x1: 線形
x2 = torch.rand(100, dtype=torch.float32)  # x2: ランダム
x = torch.stack([x1, x2], dim=1)  # 入力: [x1, x2]
x.requires_grad_()  # 勾配計算を有効にする
y = x1.clone()  # 出力は x1 のみ依存

# モデルパラメータ [w1, w2]
w = torch.nn.Parameter(torch.tensor([0.0, 0.0], requires_grad=True))

# 損失関数とオプティマイザ
criterion = nn.MSELoss()
optimizer = optim.SGD([w], lr=0.1)

# 学習ループ
for epoch in range(100):
    # モデルの出力
    y_pred = torch.matmul(x, w)  # y_pred = w1 * x1 + w2 * x2
    # 損失計算
    loss = criterion(y_pred, y)
    dlossdx = torch.autograd.grad(loss, x, create_graph=True)[0]  # x に対する勾配を取得
    # print(dlossdx.shape)
    # loss2 = (dlossdx**2).sum()
    loss2 = torch.max(dlossdx.abs())
    # loss2 = torch.max(dlossdx.abs(), dim=1)[0].sum()
    # loss = loss + loss2  # 損失に loss2 を加える

    # 勾配計算と更新
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # TensorBoard に損失を記録
    writer.add_scalar('Loss/Total Loss', loss.item(), epoch)
    # writer.add_scalar('Loss/Loss1 (MSE)', criterion(y_pred, y).item(), epoch)
    # writer.add_scalar('Loss/Loss2 (Grad Penalty)', loss2.item(), epoch)
    # writer.add_scalar('Weights/w1', w[0].item(), epoch)
    # writer.add_scalar('Weights/w2', w[1].item(), epoch)

    # 進捗を出力
    if (epoch + 1) % 10 == 0:
        print(f"Epoch [{epoch+1}/100], Loss: {loss.item():.4f}, w: {w.detach().numpy()}")

# TensorBoardを閉じる
writer.close()

# 学習結果
print(f"最終的な w: {w.detach().numpy()}")

Epoch [10/100], Loss: 0.0666, w: [0.4382876  0.26491398]
Epoch [20/100], Loss: 0.0269, w: [0.6151691 0.2922606]
Epoch [30/100], Loss: 0.0165, w: [0.70722514 0.25977173]
Epoch [40/100], Loss: 0.0108, w: [0.76744395 0.21765819]
Epoch [50/100], Loss: 0.0072, w: [0.8123824  0.17890564]
Epoch [60/100], Loss: 0.0048, w: [0.84785426 0.14606124]
Epoch [70/100], Loss: 0.0032, w: [0.87643355 0.11893417]
Epoch [80/100], Loss: 0.0021, w: [0.89961696 0.09673017]
Epoch [90/100], Loss: 0.0014, w: [0.9184603  0.07861938]
Epoch [100/100], Loss: 0.0009, w: [0.9337813  0.06387097]
最終的な w: [0.9337813  0.06387097]


In [None]:
%load_ext tensorboard
%tensorboard --logdir ./1tensor

# CIFAR10での実験

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torchvision.datasets import CIFAR10
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

num_epochs = 10  # 100エポックに設定
use_loss2 = False  # dlossdxを使うかどうかを指定
channel = 1 # coler
batch_size = 1
learning_rate = 0.001  # 初期学習率
# TensorBoardの設定
writer = SummaryWriter()

# 画像の前処理
if channel > 1:
  transform = transforms.Compose([
      transforms.Resize((32, 32)),  # 画像サイズの統一
      transforms.ToTensor(),  # テンソルに変換
      transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # RGBの各チャンネルを正規化
  ])
else:
  transform = transforms.Compose([
      transforms.Grayscale(num_output_channels=1),  # 白黒画像に変換（チャンネル数を1に）
      transforms.Resize((32, 32)),  # 画像サイズの統一
      transforms.ToTensor(),  # テンソルに変換
      transforms.Normalize((0.5,), (0.5,))  # 正規化
  ])
# CIFAR-10 データセットのダウンロードとデータローダーの作成
dataset = CIFAR10(root='./data', train=True, download=True, transform=transform)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# 画像分類モデルの定義
class ImageClassifier(nn.Module):
    def __init__(self, input_size, num_classes):
        super(ImageClassifier, self).__init__()
        self.fc = nn.Linear(input_size, num_classes, bias=False)

    def forward(self, x):
        return self.fc(x)

# モデルのインスタンス化
input_size = 32 * 32 * channel  # RGB画像を1Dにフラット化
num_classes = 10  # CIFAR-10は10クラス分類
model = ImageClassifier(input_size, num_classes)

# オプティマイザとスケジューラの定義
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate)
# lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)  # 10エポックごとに学習率を0.1倍に減衰

# 中心16×16のマスクを作成
mask = torch.zeros((batch_size, channel, 32, 32))
mask[:, :, 8:24, 8:24] = 1  # 中心領域を1に設定
mask = mask.view(-1, input_size)  # (1, 3072) 形式に変換

# トレーニングループの開始
for epoch in range(num_epochs):

    epoch_loss = 0.0
    for image, label in dataloader:
        image.requires_grad_()
        x = image.view(-1, input_size)  # 画像を1Dに変換
        target = torch.tensor(label)  # クラスラベル

        # 順伝播
        output = model(x)
        loss = criterion(output, target)

        if use_loss2:
            # dloss/dxの計算
            dlossdx = torch.autograd.grad(loss, x, create_graph=True)[0]

            # 中心領域の勾配に注目した二次損失の計算
            focused_dlossdx = dlossdx * mask
            loss2 = (focused_dlossdx**2).sum()

            # 総合的な損失の計算
            alpha = 1.0  # 重み付け係数
            total_loss = loss  + alpha * loss2
        else:
            total_loss = loss

        # 勾配の初期化、バックプロパゲーション、パラメータの更新
        optimizer.zero_grad()
        total_loss.backward()
        optimizer.step()

        # 損失の蓄積
        epoch_loss += total_loss.item()
    # 学習率スケジューラを更新
    # lr_scheduler.step()
    # TensorBoardに記録
    average_loss = epoch_loss / len(dataloader)
    writer.add_scalar('Loss/With Loss2' if use_loss2 else 'Loss/Without Loss2', average_loss, epoch)

    writer.add_scalar('Learning Rate', optimizer.param_groups[0]['lr'], epoch)

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {average_loss:.4f}, Learning Rate: {optimizer.param_groups[0]['lr']:.6f}")

# TensorBoardを閉じる
writer.close()

Files already downloaded and verified


  target = torch.tensor(label)  # クラスラベル


Epoch [1/10], Loss: 2.0861, Learning Rate: 0.001000
Epoch [2/10], Loss: 2.0627, Learning Rate: 0.001000
Epoch [3/10], Loss: 2.0548, Learning Rate: 0.001000
Epoch [4/10], Loss: 2.0488, Learning Rate: 0.001000
Epoch [5/10], Loss: 2.0453, Learning Rate: 0.001000
Epoch [6/10], Loss: 2.0417, Learning Rate: 0.001000
Epoch [7/10], Loss: 2.0390, Learning Rate: 0.001000
Epoch [8/10], Loss: 2.0357, Learning Rate: 0.001000
Epoch [9/10], Loss: 2.0342, Learning Rate: 0.001000
Epoch [10/10], Loss: 2.0322, Learning Rate: 0.001000


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import numpy as np
import matplotlib.pyplot as plt
from torchvision.datasets import CIFAR10
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

num_epochs = 20
use_loss2 = True
channel = 1
batch_size = 32
learning_rate = 0.001
alpha = 0.5

writer = SummaryWriter()

transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),
    transforms.Resize((32, 32)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

dataset = CIFAR10(root='./data', train=True, download=True, transform=transform)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

class CNNClassifier(nn.Module):
    def __init__(self, num_classes=10):
        super(CNNClassifier, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1_input_size = 64 * 16 * 16
        self.fc1 = nn.Linear(self.fc1_input_size, 128)
        self.fc2 = nn.Linear(128, 10)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.3)

    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.pool(self.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

model = CNNClassifier(num_classes=10).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-4)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5)

def create_mask(batch_size, channel, height, width, mask_size=16):
    mask = torch.zeros((batch_size, channel, height, width)).to(device)
    start = (height - mask_size) // 2
    end = start + mask_size
    mask[:, :, start:end, start:end] = 1
    return mask

val_dataset = CIFAR10(root='./data', train=False, download=True, transform=transform)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

def visualize_weights(model, writer, epoch):
    with torch.no_grad():
        for name, param in model.named_parameters():
            if 'weight' in name and param.dim() == 4:
                weight_np = param.cpu().numpy()
                fig, ax = plt.subplots()
                heatmap = np.mean(weight_np, axis=(0, 1))
                img = ax.imshow(heatmap, cmap='jet', alpha=0.5)
                plt.colorbar(img)
                plt.title(f"{name} Weights Heatmap")
                writer.add_figure(f"Weights/{name}", fig, global_step=epoch)
                plt.close(fig)

def evaluate_model(model, dataloader, writer, epoch):
    model.eval()
    correct = 0
    total = 0
    val_loss = 0.0
    with torch.no_grad():
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    val_loss /= len(dataloader)
    writer.add_scalar('Loss/Validation', val_loss, epoch)
    writer.add_scalar('Accuracy/Validation', accuracy, epoch)
    print(f'Validation Accuracy: {accuracy:.2f}%, Validation Loss: {val_loss:.4f}')

for epoch in range(num_epochs):
    epoch_loss = 0.0
    model.train()
    for image, label in dataloader:
        batch_size = image.size(0)
        mask = create_mask(batch_size, channel, 32, 32)
        image, label = image.to(device), label.to(device)
        image.requires_grad_()
        output = model(image)
        loss = criterion(output, label)
        if use_loss2:
            dlossdx = torch.autograd.grad(outputs=output.mean(), inputs=image, create_graph=True)[0]
            focused_dlossdx = dlossdx * mask
            loss2 = (focused_dlossdx**2).sum() / (mask.sum() + 1e-8)
            total_loss = loss + alpha * loss2
        else:
            total_loss = loss
        optimizer.zero_grad()
        total_loss.backward()
        optimizer.step()
        epoch_loss += total_loss.item()
    average_loss = epoch_loss / len(dataloader)
    scheduler.step(average_loss)
    writer.add_scalar('Loss/With Loss2' if use_loss2 else 'Loss/Without Loss2', average_loss, epoch)
    writer.add_scalar('Learning Rate', optimizer.param_groups[0]['lr'], epoch)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {average_loss:.4f}, Learning Rate: {optimizer.param_groups[0]['lr']:.6f}")
    if (epoch + 1) % 5 == 0:
        evaluate_model(model, val_dataloader, writer, epoch)
        visualize_weights(model, writer, epoch)

writer.close()

KeyboardInterrupt: 

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import numpy as np
import matplotlib.pyplot as plt
from torchvision.datasets import CIFAR10
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
import wandb

dataset = CIFAR10(root='./data', train=True, download=True, transform=transform)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
val_dataset = CIFAR10(root='./data', train=False, download=True, transform=transform)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mko4ro[0m to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
  Expected `list[str]` but got `tuple` - serialized value may not be as expected
  return self.__pydantic_serializer__.to_python(
  Expected `list[str]` but got `tuple` - serialized value may not be as expected
  return self.__pydantic_serializer__.to_python(


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:11<00:00, 14761002.57it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [3]:
class CNNClassifier(nn.Module):
    def __init__(self, num_classes=10):
        super(CNNClassifier, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1_input_size = 64 * 16 * 16
        self.fc1 = nn.Linear(self.fc1_input_size, 128)
        self.fc2 = nn.Linear(128, 10)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.3)

    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.pool(self.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

model = CNNClassifier(num_classes=10).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-4)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5)


In [4]:
def create_center_mask(batch_size, channel, height, width):
    mask = torch.zeros((batch_size, channel, height, width)).to(device)
    grid_size = int(batch_size ** 0.5)  # 3x3 grid for batch size 9
    center_index = batch_size // 2  # Index of the center image in 3x3 grid
    mask[center_index, :, :, :] = 1  # Only the center image gets a mask
    return mask

In [6]:
# 検証データの評価（Val Loss と Accuracy の記録を追加）
def evaluate_model(model, dataloader, criterion, writer, epoch):
    model.eval()
    correct = 0
    total = 0
    val_loss = 0.0
    with torch.no_grad():
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    val_loss /= len(dataloader)
    wandb.log({"val_loss": val_loss,  "epoch": epoch})
    wandb.log({"val_acc": accuracy,  "epoch": epoch})
    writer.add_scalar('Loss/Validation', val_loss, epoch)
    writer.add_scalar('Accuracy/Validation', accuracy, epoch)
    print(f'Validation Loss: {val_loss:.4f}, Validation Accuracy: {accuracy:.2f}%')

In [8]:
wandb.init(project="cnn-cifar10", config={
    "epochs": 20,
    "batch_size": 9,
    "learning_rate": 0.001,
    "alpha": 0.01,
})

writer = SummaryWriter()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num_epochs = wandb.config["epochs"]
use_loss2 = True
channel = 1
batch_size = wandb.config["batch_size"]
learning_rate = wandb.config["learning_rate"]
alpha = wandb.config["alpha"]

transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),
    transforms.Resize((32, 32)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

In [9]:
for epoch in range(num_epochs):
    epoch_loss = 0.0
    model.train()
    for image, label in dataloader:
        batch_size = image.size(0)
        mask = create_center_mask(batch_size, channel, 32, 32)
        image, label = image.to(device), label.to(device)
        image.requires_grad_()
        output = model(image)
        loss = criterion(output, label)
        if use_loss2:
            dlossdx = torch.autograd.grad(outputs=output.mean(), inputs=image, create_graph=True)[0]
            focused_dlossdx = dlossdx * mask
            loss2 = (focused_dlossdx**2).sum() / (mask.sum() + 1e-8)
            total_loss = loss + alpha * loss2
            # loss2 = loss2 / loss2.detach()  # 正規化してエネルギーを1に
            # lambda_energy = 0.1  # 制約の強さ
            # constraint_loss = lambda_energy * (loss2 - 1) ** 2
            # total_loss = loss + alpha * loss2 + constraint_loss
        else:
            total_loss = loss
        optimizer.zero_grad()
        total_loss.backward()
        optimizer.step()
        epoch_loss += total_loss.item()
    average_loss = epoch_loss / len(dataloader)
    scheduler.step(average_loss)
    wandb.log({"train_loss": average_loss, "lr": optimizer.param_groups[0]['lr'], "epoch": epoch})
    writer.add_scalar("Loss/train", average_loss, epoch)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {average_loss:.4f}, Learning Rate: {optimizer.param_groups[0]['lr']:.6f}")
    if (epoch + 1) % 5 == 0:
        evaluate_model(model, val_dataloader, criterion, writer, epoch)
writer.close()
wandb.finish()

Epoch [1/20], Loss: 1.1759, Learning Rate: 0.001000
Epoch [2/20], Loss: 1.1544, Learning Rate: 0.001000
Epoch [3/20], Loss: 1.1361, Learning Rate: 0.001000
Epoch [4/20], Loss: 1.1211, Learning Rate: 0.001000
Epoch [5/20], Loss: 1.1117, Learning Rate: 0.001000
Validation Loss: 1.0464, Validation Accuracy: 63.27%
Epoch [6/20], Loss: 1.1009, Learning Rate: 0.001000
Epoch [7/20], Loss: 1.0822, Learning Rate: 0.001000
Epoch [8/20], Loss: 1.0789, Learning Rate: 0.001000
Epoch [9/20], Loss: 1.0681, Learning Rate: 0.001000
Epoch [10/20], Loss: 1.0625, Learning Rate: 0.001000
Validation Loss: 1.0538, Validation Accuracy: 63.21%
Epoch [11/20], Loss: 1.0595, Learning Rate: 0.001000
Epoch [12/20], Loss: 1.0588, Learning Rate: 0.001000
Epoch [13/20], Loss: 1.0506, Learning Rate: 0.001000
Epoch [14/20], Loss: 1.0430, Learning Rate: 0.001000
Epoch [15/20], Loss: 1.0339, Learning Rate: 0.001000
Validation Loss: 0.9924, Validation Accuracy: 65.56%
Epoch [16/20], Loss: 1.0350, Learning Rate: 0.001000
Ep

0,1
epoch,▁▁▂▂▂▁▁▂▂▂▂▂▃▃▄▄▄▄▄▅▅▅▆▆▆▆▇▇▇████
lr,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss,█▅▄▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁
val_acc,▁▁█▆
val_loss,▇█▁▂

0,1
epoch,19.0
lr,0.001
train_loss,1.02206
val_acc,64.98
val_loss,1.00275
