In [4]:
import torch
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

import argparse
import numpy as np
import os
from tqdm import *

from gmflow.gmflow import GMFlow
from gmflow.loss import flow_loss_func
from gmflow.liteflownet import LiteFlowNet
from evaluate import inference_on_dir

from utils.logger import Logger
from utils import misc
from data.datasets import build_train_dataset

In [4]:
parser = get_args_parser()
args = parser.parse_args([])

In [4]:
args.supervise

False

In [5]:
if not args.eval and not args.submission and args.inference_dir is None:
    if args.local_rank == 0:
        print('pytorch version:', torch.__version__)
        print(args)
        misc.save_args(args)
        misc.check_path(args.checkpoint_dir)
        misc.save_command(args.checkpoint_dir)

seed = args.seed
torch.manual_seed(seed)
np.random.seed(seed)

torch.backends.cudnn.benchmark = True
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device = 'cuda'
# model
model = LiteFlowNet(args).to(device)

pytorch version: 1.13.1+cu117
Namespace(attention_type='swin', attn_splits_list=[2], batch_size=2, checkpoint_dir='tmp', corr_radius_list=[-1], count_time=False, dir_paired_data=False, distributed=False, eval=False, evaluate_matched_unmatched=False, feature_channels=128, ffn_dim_expansion=4, fwd_bwd_consistency_check=False, gamma=0.9, gpu_ids=0, grad_clip=1.0, image_size=[320, 1200], inference_dir=None, inference_size=None, lambda_biflow=1, lambda_photowarp=2, lambda_smooth=1, launcher='none', local_rank=0, lr=0.0001, max_flow=400, no_resume_optimizer=False, no_save_flo=False, num_head=1, num_scales=1, num_steps=100000, num_transformer_layers=6, num_workers=20, output_path='output', padding_factor=16, pred_bidir_flow=False, prop_radius_list=[-1], resume=None, save_ckpt_freq=10000, save_eval_to_file=False, save_flo_flow=False, save_latest_ckpt_freq=1000, save_vis_flow=False, seed=326, sequence_length=6, strict_resume=False, submission=False, summary_freq=100, supervise=False, train_dir=

In [6]:
if not args.eval and not args.submission and not args.inference_dir:
    print('Model definition:')
    print(model)

    print('Use %d GPUs' % torch.cuda.device_count())
    model = torch.nn.DataParallel(model)
    model_without_ddp = model.module

Model definition:
LiteFlowNet(
  (moduleFeatures): Features(
    (moduleOne): Sequential(
      (0): Conv2d(3, 32, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
      (1): LeakyReLU(negative_slope=0.1)
    )
    (moduleTwo): Sequential(
      (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (1): LeakyReLU(negative_slope=0.1)
      (2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (3): LeakyReLU(negative_slope=0.1)
      (4): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (5): LeakyReLU(negative_slope=0.1)
    )
    (moduleThr): Sequential(
      (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (1): LeakyReLU(negative_slope=0.1)
      (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (3): LeakyReLU(negative_slope=0.1)
    )
    (moduleFou): Sequential(
      (0): Conv2d(64, 96, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (1): LeakyReLU(

In [7]:
num_params = sum(p.numel() for p in model.parameters())
print('Number of params:', num_params)
if not args.eval and not args.submission and args.inference_dir is None:
    save_name = '%d_parameters' % num_params
    open(os.path.join(args.checkpoint_dir, save_name), 'a').close()

optimizer = torch.optim.AdamW(model_without_ddp.parameters(), lr=args.lr,
                              weight_decay=args.weight_decay)

start_step = 0
# resume checkpoints
if args.resume:
    print('Load checkpoint: %s' % args.resume)

    loc = 'cuda:{}'.format(args.local_rank)
    checkpoint = torch.load(args.resume, map_location=loc)

    weights = checkpoint['model'] if 'model' in checkpoint else checkpoint

    model_without_ddp.load_state_dict(weights, strict=args.strict_resume)

    if 'optimizer' in checkpoint and 'step' in checkpoint and 'epoch' in checkpoint and not \
            args.no_resume_optimizer:
        print('Load optimizer')
        optimizer.load_state_dict(checkpoint['optimizer'])
        start_step = checkpoint['step']

    print(' start_step: %d' % (start_step))

# evaluate

# inferece on a dir
if args.inference_dir is not None:
    inference_on_dir(model_without_ddp,
                     inference_dir=args.inference_dir,
                     output_path=args.output_path,
                     padding_factor=args.padding_factor,
                     inference_size=args.inference_size,
                     paired_data=args.dir_paired_data,
                     save_flo_flow=args.save_flo_flow,
                     attn_splits_list=args.attn_splits_list,
                     corr_radius_list=args.corr_radius_list,
                     prop_radius_list=args.prop_radius_list,
                     pred_bidir_flow=args.pred_bidir_flow,
                     fwd_bwd_consistency_check=args.fwd_bwd_consistency_check)

Number of params: 5381969


In [8]:
train_dataset = build_train_dataset(args)
print('Number of training images:', len(train_dataset))

# Multi-processing
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size,
                                           shuffle=True, num_workers=args.num_workers,
                                           pin_memory=True, drop_last=True)

last_epoch = start_step if args.resume and start_step > 0 else -1
lr_scheduler = torch.optim.lr_scheduler.OneCycleLR(
    optimizer, args.lr,
    args.num_steps + 10,
    pct_start=0.05,
    cycle_momentum=False,
    anneal_strategy='cos',
    last_epoch=last_epoch,
)

if args.local_rank == 0:
    summary_writer = SummaryWriter(args.checkpoint_dir)
    logger = Logger(lr_scheduler, summary_writer, args.summary_freq,
                    start_step=start_step)

Number of training images: 1925


In [9]:
fetch = iter(train_loader)
model.train()
print('Start training')

Start training


In [10]:
inputs = next(fetch)
# mannual change random seed for shuffling every epoch
img1, img2 = inputs['img1'].to(device), inputs['img2'].to(device)
flow_name = inputs['name']

In [11]:
img2[:, 0, ::].shape

torch.Size([2, 3, 1440, 327])

In [12]:
out = []
loss = []
for seq in range(args.sequence_length):
    sout, sloss = model(img1[:, seq, ::], img2[:, seq, ::])
    out.append(sout)
    loss.append(sloss)
loss = torch.sum(torch.cat(loss))



OutOfMemoryError: CUDA out of memory. Tried to allocate 20.00 MiB (GPU 0; 47.54 GiB total capacity; 46.93 GiB already allocated; 5.56 MiB free; 47.14 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

# test

In [1]:
import torch
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

import argparse
import numpy as np
import os
from tqdm import *
import torchvision
torchvision.disable_beta_transforms_warning()

from gmflow.GMflow import GMFlow
from gmflow.loss import flow_loss_func
from gmflow.liteflownet import LiteFlowNet
from evaluate import inference_on_dir

from utils.logger import Logger
from utils import misc
from data.datasets import build_train_dataset
from glob import glob
from utils.flow_viz import flow_to_image
from torchvision import transforms
from main import get_args_parser
from utils.flow_viz import save_vis_flow_tofile
from PIL import Image
import cv2 as cv

ModuleNotFoundError: No module named 'cupy'

In [None]:
parser = get_args_parser()
args = parser.parse_args([])

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [7]:
model = GMFlow(feature_channels=args.feature_channels,
                   num_scales=args.num_scales,
                   upsample_factor=args.upsample_factor,
                   num_head=args.num_head,
                   attention_type=args.attention_type,
                   ffn_dim_expansion=args.ffn_dim_expansion,
                   num_transformer_layers=args.num_transformer_layers,
                   ).to(device)

In [5]:
model = LiteFlowNet(args).to(device)

In [8]:
checkpoint = torch.load('tmp-1/checkpoint_latest.pth')
weights = checkpoint['model'] if 'model' in checkpoint else checkpoint
model.load_state_dict(weights)

<All keys matched successfully>

In [9]:
def channel_norm(img):
    datamax = [7979]
    datamin = [980]
    for i in range(len(datamax)):
        img[i] = (img[i] - datamin[i]) / (datamax[i] - datamin[i])
    return img

img_trans = transforms.Compose([
        # rand_crop,
        transforms.Resize(args.image_size, antialias=True),
        transforms.Normalize(mean=[0.5], std=[0.5]),
    ])

In [10]:
a = sorted(glob('halo/halo55/22584696/*.pt'))

In [11]:
model.eval()
with torch.no_grad():
    for i in range(len(a)-1):
        img1_path = a[i]
        img2_path = a[i+1]
        s1 = torch.FloatTensor(torch.load(img1_path))
        s2 = torch.FloatTensor(torch.load(img2_path))
        s1 = channel_norm(s1)
        s2 = channel_norm(s2)
        s1 = torch.unsqueeze(s1[:, 7:], dim=0)
        s2 = torch.unsqueeze(s2[:, 7:], dim=0)
        img1 = img_trans(s1)
        img2 = img_trans(s2)
        img1 = img1.unsqueeze(0)
        img2 = img2.unsqueeze(0)
        outs = model(img1.to('cuda'), img2.to('cuda'))
#         outs = np.minimum(outs[0].cpu(), np.percentile(outs[0].cpu(), 99))
        outs = outs[0].cpu()
        flow_pred = flow_tensor_to_image(outs)
        img = Image.fromarray(np.transpose(flow, (1,2,0)))
        img.save('img_flow/{}.png'.format(i))

TypeError: object of type 'NoneType' has no len()

#gmflow

In [12]:
#gmflow
model.eval()
with torch.no_grad():
    for i in range(len(a)-1):
        img1_path = a[i]
        img2_path = a[i+1]
        s1 = torch.FloatTensor(torch.load(img1_path))
        s2 = torch.FloatTensor(torch.load(img2_path))
        s1 = channel_norm(s1)
        s2 = channel_norm(s2)
        s1 = torch.unsqueeze(s1[:, 7:], dim=0)
        s2 = torch.unsqueeze(s2[:, 7:], dim=0)
        img1 = img_trans(s1)
        img2 = img_trans(s2)
        img1 = img1.unsqueeze(0)
        img2 = img2.unsqueeze(0)
        a1 = model(img1.to('cuda'), img2.to('cuda'),
               attn_splits_list=args.attn_splits_list,
               corr_radius_list=args.corr_radius_list,
               prop_radius_list=args.prop_radius_list, )
        outs = a1['flow_preds']
        flow = outs[0][0].cpu()
        flow = flow.permute(1, 2, 0)
        flow = flow.detach().cpu().numpy()
        flow = np.maximum(flow, np.percentile(flow, 0.01))
        flow = np.minimum(flow,np.percentile(flow, 99.9))
        flow = cv.GaussianBlur(flow,(5,5),0)
        color = flow_to_image(flow)
        img = Image.fromarray(color)
        img.save('img_flow/{}.png'.format(i))

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


In [None]:
fetch = iter(train_loader)
for i in range(len(train_dataset)):
    inputs = next(fetch)
    img1, img2 = inputs['img1'].permute(1, 0, 2, 3, 4).contiguous(), inputs['img2'].permute(1, 0, 2, 3, 4).contiguous()
    if torch.isnan(img1).any() or torch.isnan(img2).any():
        print('have none')