In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from torchvision.datasets import OxfordIIITPet
import torchvision.transforms.functional as TF
import numpy as np
import matplotlib.pyplot as plt

# MyUNet.py として分割予定

# Mydataset.pyとして分割予定

# メインの実行

In [None]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import transforms
from PIL import Image
import os
import json
import numpy as np
import cv2
from tqdm import tqdm
import matplotlib.pyplot as plt

def crop_labels_to_match(labels_to_crop, target_tensor):
    target_h, target_w = target_tensor.shape[2:]
    source_h, source_w = labels_to_crop.shape[2:]
    delta_h = (source_h - target_h) // 2
    delta_w = (source_w - target_w) // 2
    return labels_to_crop[:, :, delta_h:delta_h + target_h, delta_w:delta_w + target_w]
transform = transforms.Compose([
    # transforms.Resize((128, 128)),
    transforms.ToTensor()
])
# --- Dataset ---
test_doc_id_list = ['100241706', '100249371', '100249376', '100249416', '100249476', '100249537', '200003076', '200003803', '200003967', '200004107']
train_dataset = PreTrainDataset(test_doc_id_list,
                            test_mode = False,
                            # input_path = '../kuzushiji_recognition/synthetic_images/input_images/',
                            input_path = '../kuzushiji-recognition/synthetic_images_backup/input_images/',
                            json_path = '../kuzushiji-recognition/synthetic_images_backup/gt_json_backup.json',
                            transform = transform)
test_dataset = PreTrainDataset(test_doc_id_list,
                            test_mode = True,
                            # input_path = '../kuzushiji_recognition/synthetic_images/input_images/',
                            input_path = '../kuzushiji-recognition/synthetic_images_backup/input_images/',
                            json_path = '../kuzushiji-recognition/synthetic_images_backup/gt_json_backup.json',
                            transform = transform)


# 最適化されたDataLoaderの作成
train_dl = create_optimized_dataloader(train_dataset, batch_size=1, num_workers=1)
test_dl = create_optimized_dataloader(train_dataset, batch_size=1, num_workers=1)

# --- データセットとデータローダの準備 ---
# train_dl = DataLoader(train_dataset, batch_size=1, shuffle=True)
# test_dl = DataLoader(test_dataset, batch_size=1, shuffle=True)

# --- モデル、損失関数、最適化手法の定義 ---
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# device = torch.device("mps" if torch.cuda.is_available() else "cpu")
model = UNet(3, 4).to(device)
criterion = nn.MSELoss() # 回帰問題なのでMSE損失を使用
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

# --- 学習ループの拡張 ---

num_epochs = 100 # エポック数を定義

# 損失の履歴を保存するリストを初期化
train_loss_history = []
test_loss_history = []

print("学習を開始します...")
for epoch in range(num_epochs):
    print(f'start epcoch')
    # --- 訓練フェーズ ---
    model.train() # モデルを訓練モードに設定
    train_loss_total = 0
    
    # tqdmでプログレスバーを表示
    train_bar = tqdm(train_dl, desc=f"Epoch {epoch+1}/{num_epochs} [Train]")
    for imgs, masks in train_bar:
        imgs, masks = imgs.to(device), masks.to(device)
        
        preds = model(imgs)
        cropped_masks = crop_labels_to_match(masks, preds)

        loss = criterion(preds, cropped_masks)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        train_loss_total += loss.item()
        # プログレスバーに現在のロスを表示
        train_bar.set_postfix(loss=loss.item())

    avg_train_loss = train_loss_total / len(train_dl)
    train_loss_history.append(avg_train_loss)

    # --- 評価フェーズ ---
    model.eval() # モデルを評価モードに設定
    test_loss_total = 0
    
    # 勾配計算を無効化して、メモリ効率を良くする
    with torch.no_grad():
        test_bar = tqdm(test_dl, desc=f"Epoch {epoch+1}/{num_epochs} [Test]")
        for imgs, masks in test_bar:
            imgs, masks = imgs.to(device), masks.to(device)
            preds = model(imgs)
            cropped_masks = crop_labels_to_match(masks, preds)
            
            loss = criterion(preds, cropped_masks)
            test_loss_total += loss.item()
            test_bar.set_postfix(loss=loss.item())

    avg_test_loss = test_loss_total / len(test_dl)
    test_loss_history.append(avg_test_loss)
    
    # 各エポックの最後に訓練ロスとテストロスを表示
    print(f"Epoch {epoch+1}/{num_epochs} | Train Loss: {avg_train_loss:.4f} | Test Loss: {avg_test_loss:.4f}")

print("学習が完了しました。")

# --- 損失の推移をグラフで表示 ---
plt.figure(figsize=(10, 5))
plt.plot(train_loss_history, label="Train Loss")
plt.plot(test_loss_history, label="Test Loss")
plt.title("Loss Trend")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.grid(True)
plt.show()

Using device: cuda
jsonデータを読み込みました。
Pre-computing ground truth data...


KeyboardInterrupt: 

In [None]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import transforms
from PIL import Image
import os
import json
import numpy as np
import cv2
from tqdm import tqdm
import matplotlib.pyplot as plt

def crop_labels_to_match(labels_to_crop, target_tensor):
    target_h, target_w = target_tensor.shape[2:]
    source_h, source_w = labels_to_crop.shape[2:]
    delta_h = (source_h - target_h) // 2
    delta_w = (source_w - target_w) // 2
    return labels_to_crop[:, :, delta_h:delta_h + target_h, delta_w:delta_w + target_w]
transform = transforms.Compose([
    # transforms.Resize((128, 128)),
    transforms.ToTensor()
])
# --- Dataset ---
test_doc_id_list = ['100241706', '100249371', '100249376', '100249416', '100249476', '100249537', '200003076', '200003803', '200003967', '200004107']
# train_dataset = PreTrainDataset(test_doc_id_list,
#                             test_mode = False,
#                             # input_path = '../kuzushiji_recognition/synthetic_images/input_images/',
#                             input_path = '../kuzushiji-recognition/synthetic_images_backup/input_images/',
#                             json_path = '../kuzushiji-recognition/synthetic_images_backup/gt_json_backup.json',
#                             transform = transform)
# test_dataset = PreTrainDataset(test_doc_id_list,
#                             test_mode = True,
#                             # input_path = '../kuzushiji_recognition/synthetic_images/input_images/',
#                             input_path = '../kuzushiji-recognition/synthetic_images_backup/input_images/',
#                             json_path = '../kuzushiji-recognition/synthetic_images_backup/gt_json_backup.json',
#                             transform = transform)

train_dataset = PreTrainDataset(
    test_doc_id_list=test_doc_id_list,
    test_mode=False,
    device=torch.device('cuda'),  # GPUを明示的に指定
    precompute_gt=False,  # 事前計算を有効化
    # num_workers=None
)
test_dataset = PreTrainDataset(
    test_doc_id_list=test_doc_id_list,
    test_mode=True,
    device=torch.device('cuda'),  # GPUを明示的に指定
    precompute_gt=False,  # 事前計算を有効化
    # num_workers=4
)

# 最適化されたDataLoaderの作成
train_dl = create_optimized_dataloader(train_dataset, batch_size=1, num_workers=1)
test_dl = create_optimized_dataloader(train_dataset, batch_size=1, num_workers=1)

# --- データセットとデータローダの準備 ---
# train_dl = DataLoader(train_dataset, batch_size=1, shuffle=True)
# test_dl = DataLoader(test_dataset, batch_size=1, shuffle=True)

# --- モデル、損失関数、最適化手法の定義 ---
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# device = torch.device("mps" if torch.cuda.is_available() else "cpu")
model = UNet(3, 4).to(device)
criterion = nn.MSELoss() # 回帰問題なのでMSE損失を使用
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

# --- 学習ループの拡張 ---

num_epochs = 100 # エポック数を定義

# 損失の履歴を保存するリストを初期化
train_loss_history = []
test_loss_history = []

print("学習を開始します...")
for epoch in range(num_epochs):
    print(f'start epcoch')
    # --- 訓練フェーズ ---
    model.train() # モデルを訓練モードに設定
    train_loss_total = 0
    
    # tqdmでプログレスバーを表示
    train_bar = tqdm(train_dl, desc=f"Epoch {epoch+1}/{num_epochs} [Train]")
    for imgs, masks in train_bar:
        imgs, masks = imgs.to(device), masks.to(device)
        
        preds = model(imgs)
        cropped_masks = crop_labels_to_match(masks, preds)

        loss = criterion(preds, cropped_masks)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        train_loss_total += loss.item()
        # プログレスバーに現在のロスを表示
        train_bar.set_postfix(loss=loss.item())

    avg_train_loss = train_loss_total / len(train_dl)
    train_loss_history.append(avg_train_loss)

    # --- 評価フェーズ ---
    model.eval() # モデルを評価モードに設定
    test_loss_total = 0
    
    # 勾配計算を無効化して、メモリ効率を良くする
    with torch.no_grad():
        test_bar = tqdm(test_dl, desc=f"Epoch {epoch+1}/{num_epochs} [Test]")
        for imgs, masks in test_bar:
            imgs, masks = imgs.to(device), masks.to(device)
            preds = model(imgs)
            cropped_masks = crop_labels_to_match(masks, preds)
            
            loss = criterion(preds, cropped_masks)
            test_loss_total += loss.item()
            test_bar.set_postfix(loss=loss.item())

    avg_test_loss = test_loss_total / len(test_dl)
    test_loss_history.append(avg_test_loss)
    
    # 各エポックの最後に訓練ロスとテストロスを表示
    print(f"Epoch {epoch+1}/{num_epochs} | Train Loss: {avg_train_loss:.4f} | Test Loss: {avg_test_loss:.4f}")

print("学習が完了しました。")

# --- 損失の推移をグラフで表示 ---
plt.figure(figsize=(10, 5))
plt.plot(train_loss_history, label="Train Loss")
plt.plot(test_loss_history, label="Test Loss")
plt.title("Loss Trend")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.grid(True)
plt.show()

Using device: cuda
jsonデータを読み込みました。
Using device: cuda
jsonデータを読み込みました。
学習を開始します...
start epcoch


Epoch 1/100 [Train]:   0%|          | 0/4965 [00:00<?, ?it/s]