In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import json
from PIL import Image
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import IPython.display as display
%matplotlib inline

import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import models,transforms, datasets
from torch.utils.data import DataLoader

In [2]:
import sys
sys.path.append("../src/libs/")

In [3]:
from transform import BaseTransform
from dataset import CustomDataset
from loss import LossFunction

In [4]:
print("Pytorch Version: ", torch.__version__)
print("Torchvision Version:", torchvision.__version__)

Pytorch Version:  1.7.0
Torchvision Version: 0.8.0


# 画像

In [5]:
img_path = "../data/img/"
cor_path = "../data/cor_img/"

# VAE

In [6]:
transform = BaseTransform() #256*256

In [7]:
dataset = CustomDataset(data_dir=img_path,cor_dir=cor_path,transform=transform)
data_loader = DataLoader(dataset, batch_size=1, shuffle=True)

In [9]:

class Encoder(nn.Module):
    def __init__(self, z_dim):
        super(Encoder, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, 4, stride=2, padding=1)
        self.conv2 = nn.Conv2d(64, 128, 4, stride=2, padding=1)
        self.fc1 = nn.Linear(128*64*64, 1024)
        self.fc_mu = nn.Linear(1024, z_dim)
        self.fc_logvar = nn.Linear(1024, z_dim)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        mu = self.fc_mu(x)
        logvar = self.fc_logvar(x)
        return mu, logvar
    
# VAEのデコーダ
class Decoder(nn.Module):
    def __init__(self, z_dim):
        super(Decoder, self).__init__()
        self.fc1 = nn.Linear(z_dim, 1024)
        self.fc2 = nn.Linear(1024, 128*64*64)
        self.deconv1 = nn.ConvTranspose2d(128, 64, 4, stride=2, padding=1)
        self.deconv2 = nn.ConvTranspose2d(64, 3, 4, stride=2, padding=1)

    def forward(self, z):
        x = F.relu(self.fc1(z))
        x = F.relu(self.fc2(x))
        x = x.view(x.size(0), 128, 64, 64)
        x = F.relu(self.deconv1(x))
        x = torch.sigmoid(self.deconv2(x))  # 画像のピクセル値は0から1に正規化
        return x
    
# VAEモデル
class VAE(nn.Module):
    def __init__(self, z_dim):
        super(VAE, self).__init__()
        self.encoder = Encoder(z_dim)
        self.decoder = Decoder(z_dim)

    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        z = mu + eps * std
        return z

    def forward(self, x):
        mu, logvar = self.encoder(x)
        z = self.reparameterize(mu, logvar)
        x_recon = self.decoder(z)
        return x_recon, mu, logvar
    
    
def vae_loss(recon_x, x, mu, logvar):
    # 再構築誤差 (Reconstruction Loss)
    reconstruction_loss = F.mse_loss(recon_x, x, reduction='sum')  # 平均二乗誤差を使用

    # KLダイバージェンス
    kl_divergence = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())

    # 全体の損失
    total_loss = reconstruction_loss + kl_divergence
    return total_loss

# モデルのインスタンス化
z_dim = 64  # 潜在変数の次元
vae = VAE(z_dim)

# 最適化アルゴリズムと学習率
optimizer = optim.Adam(vae.parameters(), lr=0.001)

num_epochs = 10

device = torch.device('cpu')

# 学習ループ
for epoch in range(num_epochs):
    for batch in data_loader:  # データローダを適切に設定する必要があります
        optimizer.zero_grad()
        x = batch[0].to(device)
        recon_x, mu, logvar = vae(x)
        loss = vae_loss(recon_x, x, mu, logvar)
        loss.backward()
        optimizer.step()
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item()}')

# 学習済みモデルを保存
torch.save(vae.state_dict(), 'vae_model.pth')

: 

In [22]:
# 損失関数と最適化アルゴリズムの設定
vae = VAE(latent_dim=100)
criterion = nn.MSELoss()  # 自分の損失関数に置き換え
optimizer = optim.Adam(vae.parameters(), lr=0.001)

In [23]:
# VAEの訓練
num_epochs = 10
for epoch in range(num_epochs):
    for batch in data_loader:
        input_images, _ = batch
        output, mu, logvar = vae(input_images)
        
        loss = criterion(output, input_images, mu, logvar)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()


RuntimeError: mat1 and mat2 shapes cannot be multiplied (64x262144 and 3136x256)

In [11]:
# VAEの訓練
num_epochs = 50
for epoch in range(num_epochs):
    for batch in data_loader:
        input_images, target_images = batch

        # 入力画像をVAEに与えて潜在表現を推定
        output, mu, logvar = vae(input_images)
        
        # 推定した潜在表現と正解の潜在表現（VAEの潜在表現から派生したもの）との間で損失を計算
        reconstruction_loss = criterion(output, target_images)
        kl_divergence = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
        total_loss = reconstruction_loss + kl_divergence
        
        optimizer.zero_grad()
        total_loss.backward()
        optimizer.step()

RuntimeError: Given groups=1, weight of size [32, 1, 4, 4], expected input[1, 3, 224, 224] to have 1 channels, but got 3 channels instead

# GAN

## Generator

In [13]:
class Generator(nn.Module):
    def __init__(self, z_dim, img_dim):
        super(Generator, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(z_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 256),
            nn.ReLU(),
            nn.Linear(256, 512),
            nn.ReLU(),
            nn.Linear(512, np.prod(img_dim)),
            nn.Tanh()
        )
        self.img_dim = img_dim

    def forward(self, z):
        img = self.fc(z)
        return img.view(img.size(0), *self.img_dim)