In [1]:
%matplotlib inline
# 用于连接jupyter notebook

In [2]:
import os
os.getcwd()
os.chdir(r'G:\Image_Decomposition\nir-main-Ran')
os.getcwd()

'G:\\Image_Decomposition\\nir-main-Ran'

In [3]:

import numpy as np
from itertools import chain

import torch
from torch.utils.data import DataLoader

from model import Siren
from util import get_mgrid, jacobian, VideoFitting

In [4]:
def train_reflection(path, total_steps, lambda_interf=0.02, lambda_flow=0.02, lambda_excl=0.0005, verbose=True, steps_til_summary=100):
    g = Siren(in_features=3, out_features=2, hidden_features=256,
              hidden_layers=4, outermost_linear=True)
    g.cuda()
    f1 = Siren(in_features=2, out_features=3, hidden_features=256,
               hidden_layers=4, outermost_linear=True)
    f1.cuda()
    f2 = Siren(in_features=3, out_features=3, hidden_features=256, 
               hidden_layers=4, outermost_linear=True)
    f2.cuda()

    optim = torch.optim.Adam(lr=1e-4, params=chain(g.parameters(), f1.parameters(), f2.parameters()))

    v = VideoFitting(path)
    videoloader = DataLoader(v, batch_size=1, pin_memory=True, num_workers=0)
    model_input, ground_truth = next(iter(videoloader))
    model_input, ground_truth = model_input[0].cuda(), ground_truth[0].cuda()

    batch_size = (v.H * v.W) // 8
    # batch_size = 32
    for step in range(total_steps):
        start = (step * batch_size) % len(model_input)
        end = min(start + batch_size, len(model_input))

        xyt = model_input[start:end].requires_grad_()
        xy, t = xyt[:, :-1], xyt[:, [-1]]
        h = g(xyt)
        xy_ = xy + h
        o_scene = torch.sigmoid(f1(xy_))
        o_obst = torch.sigmoid(f2(torch.cat((xy, t), -1)))
        o = o_scene + o_obst
        loss_recon = ((o - ground_truth[start:end]) ** 2).mean()
        loss_interf = o_obst.abs().mean()
        loss_flow = jacobian(h, xyt).abs().mean()

        g_scene = jacobian(o_scene, xy_)
        g_obst = jacobian(o_obst, xy)
        n_scene = (g_obst.norm(dim=0, keepdim=True) / g_scene.norm(dim=0, keepdim=True)).sqrt()
        n_obst = (g_scene.norm(dim=0, keepdim=True) / g_obst.norm(dim=0, keepdim=True)).sqrt()
        loss_excl = (torch.tanh(n_scene * g_scene) * torch.tanh(n_obst * g_obst)).pow(2).mean()

        loss = loss_recon + lambda_interf * loss_interf + lambda_flow * loss_flow + lambda_excl * loss_excl

        if not step % steps_til_summary:
            print("Step [%04d/%04d]: recon=%0.8f, interf=%0.4f, flow=%0.4f, excl=%0.4f" % (step, total_steps, loss_recon, loss_interf, loss_flow, loss_excl))

        optim.zero_grad()
        loss.backward()
        optim.step()

    return g, f1, f2, v.video

In [5]:
g, f1, f2, orig = train_reflection('./data/reflection', 3000)

RuntimeError: CUDA out of memory. Tried to allocate 96.00 MiB (GPU 0; 12.00 GiB total capacity; 11.27 GiB already allocated; 0 bytes free; 11.28 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [None]:
with torch.no_grad():
    N, _, H, W = orig.size()
    xyt = get_mgrid([H, W, N]).cuda()
    h = g(xyt)
    o_scene = torch.sigmoid(f1(xyt[:, :-1] + h))
    o_obst = torch.sigmoid(f2(xyt))
    o_scene = o_scene.view(H, W, N, 3).permute(2, 0, 1, 3).cpu().detach().numpy()
    o_obst = o_obst.view(H, W, N, 3).permute(2, 0, 1, 3).cpu().detach().numpy()
    o_scene = (o_scene * 255).astype(np.uint8)
    o_obst = (o_obst * 255).astype(np.uint8)
    o_scene = [o_scene[i] for i in range(len(o_scene))]
    o_obst = [o_obst[i] for i in range(len(o_obst))]
    orig = orig.permute(0, 2, 3, 1).detach().numpy()
    orig = (orig * 255).astype(np.uint8)
    orig = [orig[i] for i in range(len(orig))]

In [None]:
# Save out video
# ! pip install --user imageio imageio-ffmpeg
import imageio
fn_orig = os.path.join('./data/reflecrtion_orig.mp4')
fn_scene = os.path.join('./data/reflection_scene.mp4')
fn_obst = os.path.join('./data/reflection_interf.mp4')
imageio.mimwrite(fn_orig, orig, fps=1)
imageio.mimwrite(fn_scene, o_scene, fps=1)
imageio.mimwrite(fn_obst, o_obst, fps=1)

# Display video inline
from IPython.display import HTML
from base64 import b64encode
data_url_orig = "data:video/mp4;base64," + b64encode(open(fn_orig, 'rb').read()).decode()
data_url_scene = "data:video/mp4;base64," + b64encode(open(fn_scene, 'rb').read()).decode()
data_url_obst = "data:video/mp4;base64," + b64encode(open(fn_obst, 'rb').read()).decode()
HTML(f'''
<video width=384 controls autoplay loop>
      <source src="{data_url_orig}" type="video/mp4">
</video>
<video width=384 controls autoplay loop>
      <source src="{data_url_scene}" type="video/mp4">
</video>
<video width=384 controls autoplay loop>
      <source src="{data_url_obst}" type="video/mp4">
</video>
''')

In [4]:
def train_fence(path, total_steps, lambda_interf=0.001, lambda_flow=0.02, verbose=True, steps_til_summary=100):
    g = Siren(in_features=3, out_features=2, hidden_features=256,
              hidden_layers=4, outermost_linear=True)
    g.cuda()
    f1 = Siren(in_features=2, out_features=3, hidden_features=256,
               hidden_layers=4, outermost_linear=True, first_omega_0=90.)
    f1.cuda()
    f2 = Siren(in_features=3, out_features=4, hidden_features=256, 
               hidden_layers=4, outermost_linear=True)
    f2.cuda()

    optim = torch.optim.Adam(lr=1e-4, params=chain(g.parameters(), f1.parameters(), f2.parameters()))

    v = VideoFitting(path)
    videoloader = DataLoader(v, batch_size=1, pin_memory=True, num_workers=0)
    model_input, ground_truth = next(iter(videoloader))
    model_input, ground_truth = model_input[0].cuda(), ground_truth[0].cuda()

    batch_size = (v.H * v.W) // 8
    for step in range(total_steps):
        start = (step * batch_size) % len(model_input)
        end = min(start + batch_size, len(model_input))

        xyt = model_input[start:end].requires_grad_()
        xy, t = xyt[:, :-1], xyt[:, [-1]]
        h = g(xyt)
        xy_ = xy + h
        o_scene = torch.sigmoid(f1(xy_))
        o_obst = torch.sigmoid(f2(xyt))
        o_obst, alpha = o_obst[:, :-1], o_obst[:, [-1]]
        o = (1 - alpha) * o_scene + alpha * o_obst
        loss_recon = ((o - ground_truth[start:end]) ** 2).mean()
        loss_interf = alpha.abs().mean()
        loss_flow = jacobian(h, xyt).abs().mean()
        loss = loss_recon + lambda_interf * loss_interf + lambda_flow * loss_flow

        if not step % steps_til_summary:
            print("Step [%04d/%04d]: recon=%0.8f, interf=%0.4f, flow=%0.4f" % (step, total_steps, loss_recon, loss_interf, loss_flow))

        optim.zero_grad()
        loss.backward()
        optim.step()

    return g, f1, f2, v.video

In [5]:
g, f1, f2, orig = train_fence('./data/fence', 3000)

Step [0000/3000]: recon=0.04113999, interf=0.5004, flow=0.1452
Step [0100/3000]: recon=0.01423756, interf=0.3495, flow=0.0199
Step [0200/3000]: recon=0.01251351, interf=0.2399, flow=0.0134
Step [0300/3000]: recon=0.01279197, interf=0.1896, flow=0.0182
Step [0400/3000]: recon=0.01254740, interf=0.1663, flow=0.0168
Step [0500/3000]: recon=0.00977877, interf=0.0982, flow=0.0085
Step [0600/3000]: recon=0.01007900, interf=0.0801, flow=0.0074
Step [0700/3000]: recon=0.00897714, interf=0.0726, flow=0.0087
Step [0800/3000]: recon=0.00855899, interf=0.0631, flow=0.0064
Step [0900/3000]: recon=0.01825585, interf=0.0927, flow=0.0278
Step [1000/3000]: recon=0.00818218, interf=0.0575, flow=0.0072
Step [1100/3000]: recon=0.00769778, interf=0.0533, flow=0.0057
Step [1200/3000]: recon=0.00782783, interf=0.0522, flow=0.0053
Step [1300/3000]: recon=0.00716166, interf=0.0515, flow=0.0049
Step [1400/3000]: recon=0.00665164, interf=0.0511, flow=0.0046
Step [1500/3000]: recon=0.00655094, interf=0.0519, flow

In [6]:
with torch.no_grad():
    N, _, H, W = orig.size()
    xyt = get_mgrid([H, W, N]).cuda()
    h = g(xyt)
    o_scene = torch.sigmoid(f1(xyt[:, :-1] + h))
    o_obst = torch.sigmoid(f2(xyt))
    o_obst = o_obst[:, :-1] * o_obst[:, [-1]]
    o_scene = o_scene.view(H, W, N, 3).permute(2, 0, 1, 3).cpu().detach().numpy()
    o_obst = o_obst.view(H, W, N, 3).permute(2, 0, 1, 3).cpu().detach().numpy()
    o_scene = (o_scene * 255).astype(np.uint8)
    o_obst = (o_obst * 255).astype(np.uint8)
    o_scene = [o_scene[i] for i in range(len(o_scene))]
    o_obst = [o_obst[i] for i in range(len(o_obst))]
    orig = orig.permute(0, 2, 3, 1).detach().numpy()
    orig = (orig * 255).astype(np.uint8)
    orig = [orig[i] for i in range(len(orig))]

In [7]:
# Save out video
# ! pip install --user imageio imageio-ffmpeg
import imageio
fn_orig = os.path.join('./data/fence_orig.mp4')
fn_scene = os.path.join('./data/fence_scene.mp4')
fn_obst = os.path.join('./data/fence_interf.mp4')
imageio.mimwrite(fn_orig, orig, fps=1)
imageio.mimwrite(fn_scene, o_scene, fps=1)
imageio.mimwrite(fn_obst, o_obst, fps=1)

# Display video inline
from IPython.display import HTML
from base64 import b64encode
data_url_orig = "data:video/mp4;base64," + b64encode(open(fn_orig, 'rb').read()).decode()
data_url_scene = "data:video/mp4;base64," + b64encode(open(fn_scene, 'rb').read()).decode()
data_url_obst = "data:video/mp4;base64," + b64encode(open(fn_obst, 'rb').read()).decode()
HTML(f'''
<video width=512 controls autoplay loop>
      <source src="{data_url_orig}" type="video/mp4">
</video>
<video width=512 controls autoplay loop>
      <source src="{data_url_scene}" type="video/mp4">
</video>
<video width=512 controls autoplay loop>
      <source src="{data_url_obst}" type="video/mp4">
</video>
''')

