# Realistic Cross-View Image Geo-Localization
_adjusted to GPU_


### Setup

In [1]:
from google.colab import drive
import os

ROOT = '/content/gdrive'
drive.mount(ROOT) # mount drive

%cd "/content/gdrive/MyDrive/adl4cv/Realistic-Cross-View-Image-Geo-Localization/"

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).
/content/gdrive/MyDrive/adl4cv/Realistic-Cross-View-Image-Geo-Localization


### Imports

In [2]:
# General packages
import time
import matplotlib.pyplot as plt
import numpy as np
from argparse import Namespace

# Modeling packages
import torch
import torch.nn as nn
import torch.nn.functional as F

%matplotlib inline
plt.rcParams['figure.figsize'] = (4.0, 3.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'
plt.rc('xtick', labelsize=12)    # fontsize of the tick labels
plt.rc('ytick', labelsize=12)    # fontsize of the tick labels
plt.rc('legend', fontsize=10)    # legend fontsize

# for auto-reloading external modules see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

In [None]:
# inspect data
# PATH_TO_DATA_DIR = "/content/gdrive/MyDrive/Cross_view_geo_localization/CVUSA"
# sys.path.append(PATH_TO_DATA_DIR)

### Prepare data

In [None]:
# from data/convert_polar.py
############################ Apply Polar Transform to Aerial Images in CVUSA Dataset ############################
from data.convert_polar import polar_transform_CVUSA

# if os.listdir produces a timeout (colab does that after 2 min), then just try again and again (8-10 times even), it will work eventually
# https://stackoverflow.com/questions/54973331/input-output-error-while-using-google-colab-with-google-drive
# as suggested on stackoverflow, you can check the following command too
# !ls '../CVUSA/bingmap/19/' | grep "0044417"
# to see if a file can be found or not, if this does not work, probably os.listdir will not either

# takes approx. 4 hours
# polar_transform_CVUSA(input_dir = '../CVUSA/bingmap/19/', output_dir = '../CVUSA/polarmap/')



In [None]:
print(len(os.listdir('data/CVUSA/polarmap/')))
print(len(os.listdir('data/CVUSA/bingmap/19/')))

### Train model 

In [3]:
# flush out allocated gpu memory
# as for some reason it "stucks" in if you tried to allocate too much
# factory reset runtime would be needed if this does not work
import gc
gc.collect()
torch.cuda.empty_cache()

# check allocated gpu memory
!nvidia-smi

Sat Dec 11 15:18:21 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 495.44       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla K80           Off  | 00000000:00:04.0 Off |                    0 |
| N/A   72C    P8    36W / 149W |      0MiB / 11441MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [6]:
# these imports are here so that they are reloaded always when this cell is run, as these imports might be frequently modified
from data.custom_transforms import *
from data.cvusa_utils import CVUSA
from networks.c_gan import define_G, define_D, define_R
from utils import rgan_wrapper, base_wrapper, parser
from utils.setup_helper import make_deterministic, get_sys_mem

parse = parser.Parser()
opt, log_file = parse.parse()
opt.is_Train = True
make_deterministic(opt.seed)
os.environ['CUDA_VISIBLE_DEVICES'] = ','.join(str(x) for x in opt.gpu_ids)
log = open(log_file, 'a')
log_print = lambda ms: parse.log(ms, log)

#define networks

# generator = define_G(netG=opt.g_model, gpu_ids=opt.gpu_ids)
# log_print('Init {} as generator model'.format(opt.g_model))

# discriminator = define_D(input_c=opt.input_c, output_c=opt.realout_c, ndf=opt.feature_c, netD=opt.d_model,
#                             condition=opt.condition, n_layers_D=opt.n_layers, gpu_ids=opt.gpu_ids)
# log_print('Init {} as discriminator model'.format(opt.d_model))

retrieval = define_R(ret_method=opt.r_model, polar=opt.polar, gpu_ids=opt.gpu_ids)
log_print('Init {} as retrieval model'.format(opt.r_model))

rgan_wrapper = rgan_wrapper.RGANWrapper(opt, log_file, retrieval)

# Configure data loader
composed_transforms = transforms.Compose([RandomHorizontalFlip(),
                                            ToTensor()])
train_dataset = CVUSA(root=opt.data_root, csv_file=opt.train_csv, use_polar=opt.polar, name=opt.name,
                    transform_op=composed_transforms, load_pickle=True)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=opt.batch_size, shuffle=True, num_workers=0)

val_dataset = CVUSA(root=opt.data_root, csv_file=opt.val_csv, use_polar=opt.polar, name=opt.name,
                    transform_op=ToTensor(), load_pickle=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=opt.batch_size, shuffle=False, num_workers=0)
log_print('Load datasets from {}: train_set={} val_set={}'.format(opt.data_root, len(train_dataset), len(val_dataset)))

ret_best_acc = rgan_wrapper.ret_best_acc
log_print('Start training from epoch {} to {}, best acc: {}'.format(opt.start_epoch, opt.n_epochs, ret_best_acc))
for epoch in range(opt.start_epoch, opt.n_epochs):
    start_time = time.time()
    batches_done = 0
    val_batches_done = 0
    street_batches_t = []
    fake_street_batches_t = []
    street_batches_v = []
    fake_street_batches_v = []
    epoch_retrieval_loss = []
    epoch_generator_loss = []
    epoch_discriminator_loss = []
    log_print('>>> RGAN Epoch {}'.format(epoch))
    # rgan_wrapper.generator.train()
    # rgan_wrapper.discriminator.train()
    rgan_wrapper.retrieval.train()
    for i, data in enumerate(train_loader):  # inner loop within one epoch

        rgan_wrapper.set_input(data)
        rgan_wrapper.optimize_parameters(epoch)

        fake_street_batches_t.append(rgan_wrapper.fake_street_out.cpu().data)
        street_batches_t.append(rgan_wrapper.street_out.cpu().data)
        epoch_retrieval_loss.append(rgan_wrapper.r_loss.item())
        # epoch_discriminator_loss.append(rgan_wrapper.d_loss.item())
        # epoch_generator_loss.append(rgan_wrapper.g_loss.item())

        if (i + 1) % 40 == 0 or (i + 1) == len(train_loader):
            fake_street_vec = torch.cat(fake_street_batches_t, dim=0)
            street_vec = torch.cat(street_batches_t, dim=0)
            dists = 2 - 2 * torch.matmul(fake_street_vec, street_vec.permute(1, 0))
            tp1 = rgan_wrapper.mutual_topk_acc(dists, topk=1)
            tp5 = rgan_wrapper.mutual_topk_acc(dists, topk=5)
            tp10 = rgan_wrapper.mutual_topk_acc(dists, topk=10)
            log_print('Batch:{} loss={:.3f} samples:{} tp1={tp1[0]:.2f}/{tp1[1]:.2f} ' \
                    'tp5={tp5[0]:.2f}/{tp5[1]:.2f}'.format(i + 1, np.mean(epoch_retrieval_loss),
                                                            len(dists), tp1=tp1, tp5=tp5))
            street_batches_t.clear()
            fake_street_batches_t.clear()

    rgan_wrapper.save_networks(epoch, os.path.dirname(log_file), best_acc=ret_best_acc,
                                    last_ckpt=True)  # Always save last ckpt


    # Save model periodically
    if (epoch + 1) % opt.save_step == 0:
        rgan_wrapper.save_networks(epoch, os.path.dirname(log_file), best_acc=ret_best_acc)

    # rgan_wrapper.generator.eval()
    rgan_wrapper.retrieval.eval()
    for i, data in enumerate(val_loader):
        rgan_wrapper.set_input(data)
        rgan_wrapper.eval_model()
        fake_street_batches_v.append(rgan_wrapper.fake_street_out_val.cpu().data)
        street_batches_v.append(rgan_wrapper.street_out_val.cpu().data)


    fake_street_vec = torch.cat(fake_street_batches_v, dim=0)
    street_vec = torch.cat(street_batches_v, dim=0)
    dists = 2 - 2 * torch.matmul(fake_street_vec, street_vec.permute(1, 0))
    tp1 = rgan_wrapper.mutual_topk_acc(dists, topk=1)
    tp5 = rgan_wrapper.mutual_topk_acc(dists, topk=5)
    tp10 = rgan_wrapper.mutual_topk_acc(dists, topk=10)

    num = len(dists)
    tp1p = rgan_wrapper.mutual_topk_acc(dists, topk=0.01 * num)
    acc = Namespace(num=len(dists), tp1=tp1, tp5=tp5, tp10=tp10, tp1p=tp1p)

    log_print('\nEvaluate Samples:{num:d}\nRecall(p2s/s2p) tp1:{tp1[0]:.2f}/{tp1[1]:.2f} ' \
                'tp5:{tp5[0]:.2f}/{tp5[1]:.2f} tp10:{tp10[0]:.2f}/{tp10[1]:.2f} ' \
                'tp1%:{tp1p[0]:.2f}/{tp1p[1]:.2f}'.format(epoch + 1, num=acc.num, tp1=acc.tp1,
                                                        tp5=acc.tp5, tp10=acc.tp10, tp1p=acc.tp1p))

    # Save the best model
    tp1_p2s_acc = acc.tp1[0]
    if tp1_p2s_acc > ret_best_acc:
        ret_best_acc = tp1_p2s_acc
        rgan_wrapper.save_networks(epoch, os.path.dirname(log_file), best_acc=ret_best_acc, is_best=True)
        log_print('>>Save best model: epoch={} best_acc(tp1_p2s):{:.2f}'.format(epoch + 1, tp1_p2s_acc))

    # Progam stastics
    rss, vms = get_sys_mem()
    log_print('Memory usage: rss={:.2f}GB vms={:.2f}GB Time:{:.2f}s'.format(rss, vms, time.time() - start_time))

----------------- 
 Namespace(b1=0.5, b2=0.999, batch_size=16, condition=1, d_model='basic', data_root='./data/CVUSA', feature_c=64, g_model='unet-skip', gan_loss='vanilla', gpu_ids='0', hard_decay1_topk_ratio=0.1, hard_decay2_topk_ratio=0.05, hard_decay3_topk_ratio=0.01, hard_topk_ratio=1.0, input_c=3, isTrain=True, lambda=10, lambda_gp=10, lambda_l1=100, lambda_ret1=1000, lambda_sm=10, lr_d=0.0001, lr_g=0.0001, lr_r=0.0001, n_critic=1, n_epochs=10, n_layers=3, name='', phase='train', polar=True, r_model='SAFA', realout_c=3, results_dir='./output', resume=True, rgan_checkpoint=None, save_step=10, seed=10, segout_c=3, start_epoch=0, train_csv='train-19zl-10.csv', val_csv='val-19zl-3.csv')
----------------- Options ---------------
                       b1: 0.5                           
                       b2: 0.999                         
               batch_size: 16                            
                condition: 1                             
                  d_model: b

### First run: around 2hrs and 20min -> banned between batch 320 (5120 image) and 360 (5760 image)

RGAN Epoch 0
Batch:40 loss=0.227 samples:640 tp1=26.41/26.56 tp5=47.34/49.69

Batch:80 loss=0.138 samples:640 tp1=48.91/49.53 tp5=76.88/77.66

Batch:120 loss=0.105 samples:640 tp1=59.38/62.34 tp5=84.53/85.16

Batch:160 loss=0.084 samples:640 tp1=57.19/54.84 tp5=81.41/83.91

Batch:200 loss=0.071 samples:640 tp1=64.22/65.31 tp5=86.09/86.72

Batch:240 loss=0.062 samples:640 tp1=67.50/67.19 tp5=88.75/89.84

Batch:280 loss=0.056 samples:640 tp1=70.31/70.16 tp5=90.16/89.22

Batch:320 loss=0.051 samples:640 tp1=68.59/68.75 tp5=90.47/91.41

In [4]:
# check if an image can be found in our data folder
# if this throws a timeout (2min) than the dataloader probably won't work either
# try to run it until it works
!ls 'data/CVUSA/polarmap/' | grep "0041073"

0041073.jpg


### Test model

In [None]:
parse = parser.Parser()
opt, log_file = parse.parse()
opt.is_Train = True
make_deterministic(opt.seed)
os.environ['CUDA_VISIBLE_DEVICES'] = ','.join(str(x) for x in opt.gpu_ids)

log = open(log_file, 'a')
log_print = lambda ms: parse.log(ms, log)

#define networks
generator = define_G(netG=opt.g_model, gpu_ids=opt.gpu_ids)
print('Init {} as generator model'.format(opt.g_model))

discriminator = define_D(input_c=opt.input_c, output_c=opt.realout_c, ndf=opt.feature_c, netD=opt.d_model,
                            condition=opt.condition, n_layers_D=opt.n_layers, gpu_ids=opt.gpu_ids)
print('Init {} as discriminator model'.format(opt.d_model))

retrieval = define_R(ret_method=opt.r_model, polar=opt.polar, gpu_ids=opt.gpu_ids)
print('Init {} as retrieval model'.format(opt.r_model))

# Initialize network wrapper
if opt.resume:
    opt.rgan_checkpoint = os.path.join('./placeholder_checkpoint_path', 'rgan_best_ckpt.pth')

rgan_wrapper = rgan_wrapper.RGANWrapper(opt, log_file, generator, discriminator, retrieval)
# Configure data loader
val_dataset = CVUSA(root=opt.data_root, csv_file=opt.val_csv, use_polar=opt.polar, name=opt.name,
                    transform_op=ToTensor())
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=opt.batch_size, shuffle=False, num_workers=0)

log_print('Load test dataset from {}: val_set={}'.format(opt.data_root, len(val_dataset)))
log_print('length of val loader: {:d}'.format(len(val_loader)))

rgan_wrapper.generator.eval()
rgan_wrapper.retrieval.eval()
fake_street_batches_v = []
street_batches_v = []
item_ids = []

for i, data in enumerate(val_loader):
    print (i)
    rgan_wrapper.set_input(data)
    rgan_wrapper.eval_model()
    fake_street_batches_v.append(rgan_wrapper.fake_street_out_val.cpu().data)
    street_batches_v.append(rgan_wrapper.street_out_val.cpu().data)

fake_street_vec = torch.cat(fake_street_batches_v, dim=0)
street_vec = torch.cat(street_batches_v, dim=0)
dists = 2 - 2 * torch.matmul(fake_street_vec, street_vec.permute(1, 0))

tp1 = rgan_wrapper.mutual_topk_acc(dists, topk=1)
tp5 = rgan_wrapper.mutual_topk_acc(dists, topk=5)
tp10 = rgan_wrapper.mutual_topk_acc(dists, topk=10)

num = len(dists)
tp1p = rgan_wrapper.mutual_topk_acc(dists, topk=0.01 * num)
acc = Namespace(num=len(dists), tp1=tp1, tp5=tp5, tp10=tp10, tp1p=tp1p)

log_print('\nEvaluate Samples:{num:d}\nRecall(p2s/s2p) tp1:{tp1[0]:.2f}/{tp1[1]:.2f} ' \
        'tp5:{tp5[0]:.2f}/{tp5[1]:.2f} tp10:{tp10[0]:.2f}/{tp10[1]:.2f} ' \
        'tp1%:{tp1p[0]:.2f}/{tp1p[1]:.2f}'.format(1, num=acc.num, tp1=acc.tp1,
                                                tp5=acc.tp5, tp10=acc.tp10, tp1p=acc.tp1p))