<a href="https://colab.research.google.com/github/kodenshacho/sigma/blob/master/upscale_pk.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:

import torch
import torch.nn as nn
import torch.optim as optim
import torch.onnx
import cv2
import numpy as np
from basicsr.archs.rrdbnet_arch import RRDBNet

# --- レイヤー定義（Flatten → FC → Reshape） ---
class IdentityFC(nn.Module):
    def __init__(self, shape, random_init=False):
        super().__init__()
        self.shape = shape  # (C, H, W)
        flat_dim = shape[0] * shape[1] * shape[2]
        self.fc = nn.Linear(flat_dim, flat_dim, bias=False)

        if random_init:
            self.init_random()
        else:
            self.init_identity(flat_dim)

    def init_identity(self, dim):
        with torch.no_grad():
            weight = torch.zeros((dim, dim))
            for i in range(dim):
                weight[i, i] = 1.0
            self.fc.weight.copy_(weight)

    def init_random(self):
        nn.init.kaiming_normal_(self.fc.weight, a=0.01)

    def forward(self, x):
        b, c, h, w = x.shape
        x_flat = x.view(b, -1)
        x_fc = self.fc(x_flat)
        return x_fc.view(b, c, h, w)

# --- Real-ESRGANモデルにFCレイヤーを挿入 ---
def insert_fc_into_pretrained(model: nn.Module, random_init=False, verbose=True):
    dummy = torch.randn(1, 3, 1200, 1600)
    with torch.no_grad():
        x = model.conv_first(dummy)
        min_area = x.shape[2] * x.shape[3]
        min_idx = -1
        feature_maps = []

        for i, layer in enumerate(model.body):
            x = layer(x)
            area = x.shape[2] * x.shape[3]
            feature_maps.append(x)
            if area < min_area:
                min_area = area
                min_idx = i

    if verbose:
        print(f"🔍 最小特徴マップ位置: model.body[{min_idx}]、サイズ: {feature_maps[min_idx].shape}")

    before = list(model.body.children())[:min_idx + 1]
    after = list(model.body.children())[min_idx + 1:]

    fc_layer = IdentityFC(shape=feature_maps[min_idx].shape[1:], random_init=random_init)
    model.body = nn.Sequential(*before, fc_layer, *after)
    return model

# --- ONNXファイルに変換 ---
def export_to_onnx(model, input_tensor, onnx_path="exported_model.onnx"):
    model.eval()
    torch.onnx.export(model, input_tensor, onnx_path,
                      input_names=['input'], output_names=['output'],
                      dynamic_axes={'input': {0: 'batch'}, 'output': {0: 'batch'}},
                      opset_version=11)
    print(f"✅ ONNXとして保存されました: {onnx_path}")

# --- モデルを学習（random_init=True の場合） ---
def train_model(model, target_model, epochs=1, lr=1e-4):
    model.train()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    loss_fn = nn.MSELoss()
    dummy_input = torch.randn(1, 3, 1200, 1600)

    with torch.no_grad():
        target_output = target_model(dummy_input)

    for epoch in range(epochs):
        optimizer.zero_grad()
        output = model(dummy_input)
        loss = loss_fn(output, target_output)
        loss.backward()
        optimizer.step()
        print(f"🧪 Epoch {epoch+1}/{epochs}, Loss: {loss.item():.6f}")

# --- GUIで結果を比較（OpenCV） ---
def visualize_output(output1, output2):
    def tensor_to_cv(img):
        img = img.squeeze().permute(1, 2, 0).clamp(0, 1).cpu().numpy()
        img = (img * 255).astype(np.uint8)
        return cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

    img1 = tensor_to_cv(output1)
    img2 = tensor_to_cv(output2)
    combined = np.hstack((img1, img2))
    cv2.imshow('左: Pretrained, 右: FC付き', combined)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

# --- メイン関数 ---
def main():
    # モデル読み込み
    model_path = 'pretrained/RealESRGAN_x1_fixed_1600x1200.pth'
    model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64,
                    num_block=23, num_grow_ch=32, scale=1)
    model.load_state_dict(torch.load(model_path), strict=True)
    model.eval()

    # FC付きモデル作成（ランダム or アイデンティティ初期化）
    use_random_init = True  # ← Trueの場合、訓練も行われる
    model_fc = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64,
                       num_block=23, num_grow_ch=32, scale=1)
    model_fc.load_state_dict(torch.load(model_path), strict=True)
    model_fc = insert_fc_into_pretrained(model_fc, random_init=use_random_init, verbose=True)

    # 入力データ生成
    input_img = torch.randn(1, 3, 1200, 1600)

    # 学習（必要な場合のみ）
    if use_random_init:
        print("⚙️ FCレイヤーのランダム初期化に対して微調整を行います...")
        train_model(model_fc, model, epochs=3)

    # 出力計算
    with torch.no_grad():
        out1 = model(input_img)
        out2 = model_fc(input_img)
        is_same = torch.allclose(out1, out2, atol=1e-6)
        print(f"✅ 出力一致: {is_same}")

    # .pthファイルとして保存
    torch.save(model_fc.state_dict(), "modified_model_fc.pth")
    print("✅ FC付きモデルを保存しました: modified_model_fc.pth")

    # ONNX形式として保存
    export_to_onnx(model_fc, input_img, onnx_path="modified_model_fc.onnx")

    # GUIで出力画像を比較
    visualize_output(out1, out2)

if __name__ == "__main__":
    main()

In [None]:
import torch
from basicsr.archs.rrdbnet_arch import RRDBNet
from insert_fc import insert_fc_into_pretrained

def test_model_with_fc():
    # 学習済みモデルの読み込み
    model_path = 'pretrained/RealESRGAN_x1_fixed_1600x1200.pth'
    model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64,
                    num_block=23, num_grow_ch=32, scale=1)
    model.load_state_dict(torch.load(model_path), strict=True)
    model.eval()

    # モデルを複製して、FCレイヤーを挿入
    model_fc = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64,
                       num_block=23, num_grow_ch=32, scale=1)
    model_fc.load_state_dict(torch.load(model_path), strict=True)
    model_fc = insert_fc_into_pretrained(model_fc, random_init=False, verbose=True)
    model_fc.eval()

    # テスト用画像（ランダム）
    input_img = torch.randn(1, 3, 1200, 1600)

    # 出力を比較
    with torch.no_grad():
        out1 = model(input_img)
        out2 = model_fc(input_img)
        is_same = torch.allclose(out1, out2, atol=1e-6)

    print(f"✅ 出力が一致するか: {is_same}")

if __name__ == "__main__":
    test_model_with_fc()