In [1]:
import torch 
import torch.nn as nn 
import torch.nn.functional as F 
import json 
from torch.utils.data import Dataset, DataLoader
from PIL import Image, ImageDraw

from torchvision import transforms
from tqdm import tqdm
import cv2
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
from scipy.ndimage.filters import gaussian_filter
import sys 
sys.path.append('../..')
import src.utils as utils
import src.clip as clip 
import yaml
import math 
from tqdm import tqdm  
from src.clip_led.dataset import LEDDataset

import src.fusion as fusion
from src.blocks import Up, ConvBlock, IdentityBlock
%matplotlib inline 

  warn(f"Failed to load image Python extension: {e}")


In [2]:
config = {
    # Data Paths
    'train_path' : '../../data/way_splits/train_data.json',
    'valid_seen_path' : '../../data/way_splits/valSeen_data.json',
    'valid_unseen_path': '../../data/way_splits/valUnseen_data.json',
    'mesh2meters': '../../data/floorplans/pix2meshDistance.json',
    'image_dir': '../../data/floorplans/',
    'geodistance_file': '../../data/geodistance_nodes.json',

    'device': 'cpu',

    # Hyper Parameters
    'max_floors': 5,

    # Image Parameters
    'image_size': [3, 448, 448],
    # 'image_size': [3, 700, 1200],
    'original_image_size': [3, 700, 1200],
    'cropped_image_size': [3, 700, 800],
    'scaled_image_size': [3, 448, 448],


    'crop_translate_x': 200,
    'crop_translate_y': 0,
    'resize_scale_x': 448/800,
    'resize_scale_y': 448/700,
    'conversion_scale': 448/800,


    'lang_fusion_type': 'mult',
    'num_post_clip_channels': 2048, 
    'bilinear': True,
    'batch_norm': True, 
    'num_output_channels': 1,

    'lr': 0.001,
}

In [3]:
train_dataset = LEDDataset(config['valid_seen_path'], config['image_dir'], config)

In [5]:
led_clip = LEDModel(config)

In [6]:
# Training Parameters 

loss_fn = nn.KLDivLoss(reduction="batchmean")
optimizer = torch.optim.AdamW(led_clip.parameters(), lr=config['lr'], betas=(0.9, 0.999), eps=1e-08, weight_decay=0.01, amsgrad=False)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min')
scaler = torch.cuda.amp.GradScaler()




In [7]:

def snap_to_grid(geodistance_nodes, node2pix, sn, pred_coord, conversion, level):
    min_dist = math.inf
    best_node = ""
    for node in node2pix[sn].keys():
        if node2pix[sn][node][2] != int(level) or node not in geodistance_nodes:
            continue
        target_coord = [node2pix[sn][node][0][1], node2pix[sn][node][0][0]]
        dist = np.sqrt(
            (target_coord[0] - pred_coord[0]) ** 2
            + (target_coord[1] - pred_coord[1]) ** 2
        ) / (conversion)
        if dist.item() < min_dist:
            best_node = node
            min_dist = dist.item()
    return best_node


def distance_from_pixels(config, preds, mesh_conversions, scan_names, true_viewpoints, episode_ids, mode):
    """Calculate distances between model predictions and targets within a batch.
    Takes the propablity map over the pixels and returns the geodesic distance"""
    node2pix = json.load(open(config['image_dir'] + "allScans_Node2pix.json"))
    geodistance_nodes = json.load(open(config['geodistance_file']))
    distances, episode_predictions = [], []
    for pred, conversion, sn, tv, id in zip(
        preds, mesh_conversions, scan_names, true_viewpoints, episode_ids
    ):

        total_floors = len(set([v[2] for k, v in node2pix[sn].items()]))
        pred = nn.functional.interpolate(
            pred.unsqueeze(1), (700, 1200), mode="bilinear"
        ).squeeze(1)[:total_floors]
        pred_coord = np.unravel_index(pred.argmax(), pred.size())
        convers = conversion.view(config['max_floors'], 1, 1)[pred_coord[0].item()]
        pred_viewpoint = snap_to_grid(
            geodistance_nodes[sn],
            node2pix,
            sn,
            [pred_coord[1].item(), pred_coord[2].item()],
            convers,
            pred_coord[0].item(),
        )
        if mode != "test":
            dist = geodistance_nodes[sn][tv][pred_viewpoint]
            distances.append(dist)
        episode_predictions.append([id, pred_viewpoint])
    return distances, episode_predictions

def accuracy(dists, threshold=3):
    """Calculating accuracy at 3 meters by default"""
    return np.mean((torch.tensor(dists) <= threshold).int().numpy())

In [8]:
train_loader = DataLoader(train_dataset, batch_size=2)

In [8]:
for data in train_loader:
    maps = data['maps']
    target_maps = data['target_maps']
    conversions = data['conversions']
    dialogs = data['dialogs']
    dialogs = dialogs.squeeze(1)

    preds = led_clip(maps, dialogs)
    break 
    
    

11.478775426276762
16.93066725972494
torch.Size([2, 77])


In [10]:
le, ep = distance_from_pixels(config, preds, data['conversions'], data['scan_names'], data['true_viewpoints'], data['episode_ids'], train_dataset.mode )



In [15]:
for param in led_clip.parameters():
    assert param.dtype == torch.float32

In [2]:
a = {'a': 0, 'b': 1}

In [3]:
def assign_a(a):
    a['a'] = 6
    a['b'] = 11

In [16]:
# Training Loop 


def training_loop(train_loader, valid_seen_loader, valid_unseen_loader, epochs, model, loss_fn, optimizer, scaler, scheduler, config):

    # Metrics 
    metrics = {
        'train_loss': 0,
        'valid_seen_loss': 0,
        'valid_unseen_loss': 0,
        'train_acc_5m': 0, 
        'train_acc_3m': 0, 
        'train_acc_0m': 0, 
        'valid_seen_acc_5m': 0, 
        'valid_seen_acc_3m': 0, 
        'valid_seen_acc_0m': 0, 
        'valid_unseen_acc_5m': 0,
        'valid_unsseen_acc_3m': 0,
        'valid_unsseen_acc_0m': 0,
    }
    
    # Training 
    for e in range(epochs): 

        model.train()
        train_metrics = train_model(model, loss_fn, optimizer, scaler, config)
        
        print(f'Train Loss: {train_metrics["loss"]}')
        print(f'Train Acc0m: {train_metrics["acc0m"]}')
        print(f'Train Acc3m: {train_metrics["acc3m"]}')
        print(f'Train Acc5m: {train_metrics["acc5m"]}')
        
        assign_metrics(metrics, train_metrics, 'train')

        model.eval()

        valid_seen_metrics = eval_model(model, valid_seen_loader, loss_fn, config, 'valid_seen')

        print(f'Valid Seen Loss: {valid_seen_metrics["loss"]}')
        print(f'Valid Seen Acc0m: {valid_seen_metrics["acc0m"]}')
        print(f'Valid Seen Acc3m: {valid_seen_metrics["acc3m"]}')
        print(f'Valid Seen Acc5m: {valid_seen_metrics["acc5m"]}')

        assign_metrics(metrics, valid_seen_metrics, 'valid_seen')

        valid_unseen_metrics = eval_model(model, valid_seen_loader, loss_fn, config, 'valid_unseen')

        print(f'Valid Unseen Loss: {valid_seen_metrics["loss"]}')
        print(f'Valid Unseen Acc0m: {valid_seen_metrics["acc0m"]}')
        print(f'Valid Unseen Acc3m: {valid_seen_metrics["acc3m"]}')
        print(f'Valid Unseen Acc5m: {valid_seen_metrics["acc5m"]}')

        assign_metrics(metrics, valid_unseen_metrics, 'valid_unseen')

        print(metrics)

        
        scheduler.step(metrics['valid_unseen_loss'])

        