In [1]:
from modules.dataloader import XView3Data
import numpy as np
from torch.utils.data import DataLoader
import torch
import torchvision
from torch.utils.tensorboard import SummaryWriter

In [2]:
for i in range(torch.cuda.device_count()):
    print(i, torch.cuda.get_device_name(i))

0 TITAN Xp
1 TITAN X (Pascal)
2 Quadro K420


    Found GPU2 Quadro K420 which is of cuda capability 3.0.
    PyTorch no longer supports this GPU because it is too old.
    The minimum cuda capability that we support is 3.5.
    


In [3]:
device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
# device = torch.device("cpu")

## Model Preparation

In [4]:
from torchvision.models import resnet50
from torchvision import io, transforms as T

transform = T.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])

# img = io.read_image("./lion.jpg")/255
# img = transform(img).reshape(1,3,224,224)
# img = img.to(device)

In [5]:
backbone = resnet50(pretrained=True)
for param in backbone.parameters():
    param.requires_grad = False
backbone = backbone.eval()

In [6]:
from modules.featextract import FeatureExtractor
from modules.model import RPN
from modules.config import load_model_config
from modules.utils import evaluate, save_fig, convert_prob_to_image

In [7]:
config = load_model_config(backbone)

In [8]:
feat = FeatureExtractor(config)
rpn = RPN(128)
# model.to(device)
# model.eval()
print()




In [9]:
model = torch.nn.Sequential(*[feat, rpn])

In [10]:
print("Total trainable params:", torch.nn.utils.parameters_to_vector([p for p in model.parameters() if p.requires_grad]).numel())

Total trainable params: 4968321


## Dataloading

In [11]:
exp_no = 5

In [12]:
def preprocess_label(df):
    df = df.dropna(subset=["is_vessel"])
    return df

train_data_path = "/media/xview/xview3_challenge/dataset/data/train"
train_label_path = "/media/xview/xview3_challenge/dataset/labels/train1.csv"

val_data_path = "/media/xview/xview3_challenge/dataset/data/validation"
val_label_path = "/media/xview/xview3_challenge/dataset/labels/validation1.csv"

# data_path = [train_data_path, val_data_path]
# label_path = [train_label_path, val_label_path]

train_data = XView3Data(background_chip_ratio=0.1, obj_size=3, threshold=0.25, overwrite=True,
                        labels_path=train_label_path, data_path=train_data_path, preprocess_label=preprocess_label, shore=True)


val_data = XView3Data(background_chip_ratio=1.1, obj_size=3, threshold=0.25, overwrite=True,
                        labels_path=val_label_path, data_path=val_data_path, preprocess_label=preprocess_label, shore=True)

# data = XView3Data(background_chip_ratio=0.1, obj_size=5, threshold=0.25, overwrite=False,
#                         labels_path=label_path, data_path=data_path, preprocess_label=preprocess_label)


Total scenes detected: 558
	Processing scene: 8325eea6c95aa16at
	Saving in labels.json
	Total chips extracted: 52
		Background chips: 5
		Foreground chips: 47
	Processing scene: ced739d78afc3286t
	Saving in labels.json
	Total chips extracted: 34
		Background chips: 4
		Foreground chips: 31
	Processing scene: b95ab45640844f93t
	Saving in labels.json
	Total chips extracted: 53
		Background chips: 5
		Foreground chips: 48
	Processing scene: d3cf076c5f02d411t
	Saving in labels.json
	Total chips extracted: 35
		Background chips: 4
		Foreground chips: 32
	Processing scene: e9b02e333e21456ft
	Saving in labels.json
	Total chips extracted: 54
		Background chips: 5
		Foreground chips: 49
	Processing scene: 3225958292ca7026t
	Saving in labels.json
	Total chips extracted: 47
		Background chips: 5
		Foreground chips: 42
	Processing scene: d1e05813c04918a7t
	Saving in labels.json
	Total chips extracted: 39
		Background chips: 4
		Foreground chips: 35
	Processing scene: ef43fc465a606f43t
	Saving in l

	Saving in labels.json
	Total chips extracted: 80
		Background chips: 14
		Foreground chips: 138
	Processing scene: ba737e36cc3bbbd6t
	Saving in labels.json
	Total chips extracted: 52
		Background chips: 5
		Foreground chips: 47
	Processing scene: 1185066a6428bc12t
	Saving in labels.json
	Total chips extracted: 48
		Background chips: 5
		Foreground chips: 43
	Processing scene: d89c0f2ec5c4f159t
	Saving in labels.json
	Total chips extracted: 39
		Background chips: 4
		Foreground chips: 36
	Processing scene: e5e4bf0c579362bdt
	Saving in labels.json
	Total chips extracted: 64
		Background chips: 6
		Foreground chips: 58
	Processing scene: f16fe7fee9a1af85t
	Saving in labels.json
	Total chips extracted: 69
		Background chips: 7
		Foreground chips: 62
	Processing scene: 00a035722196ee86t
	Saving in labels.json
	Total chips extracted: 31
		Background chips: 3
		Foreground chips: 29
	Processing scene: 425980ed6b6bcd2dt
	Saving in labels.json
	Total chips extracted: 65
		Background chips: 6
		

	Saving in labels.json
	Total chips extracted: 52
		Background chips: 5
		Foreground chips: 47
	Processing scene: eceb1884e5d82f4dt
	Saving in labels.json
	Total chips extracted: 30
		Background chips: 3
		Foreground chips: 28
	Processing scene: b0d9965dde8d5276t
	Saving in labels.json
	Total chips extracted: 51
		Background chips: 5
		Foreground chips: 46
	Processing scene: 9667cb466b141140t
	Saving in labels.json
	Total chips extracted: 107
		Background chips: 10
		Foreground chips: 97
	Processing scene: d9d700a11647e8e1t
	Saving in labels.json
	Total chips extracted: 51
		Background chips: 5
		Foreground chips: 46
	Processing scene: 36ca6b0a73c90d7ft
	Saving in labels.json
	Total chips extracted: 54
		Background chips: 5
		Foreground chips: 49
	Processing scene: 25285dd4a014196et
	Saving in labels.json
	Total chips extracted: 113
		Background chips: 11
		Foreground chips: 102
	Processing scene: 39a6368f2a05dad4t
	Saving in labels.json
	Total chips extracted: 117
		Background chips: 

	Saving in labels.json
	Total chips extracted: 34
		Background chips: 4
		Foreground chips: 32
	Processing scene: f71770d9201b1f1ft
	Saving in labels.json
	Total chips extracted: 61
		Background chips: 6
		Foreground chips: 55
	Processing scene: a92edc51bc96a82ft
	Saving in labels.json
	Total chips extracted: 54
		Background chips: 5
		Foreground chips: 49
	Processing scene: 95ee747a5f5537b6t
	Saving in labels.json
	Total chips extracted: 60
		Background chips: 6
		Foreground chips: 54
	Processing scene: da781384de903b8dt
	Saving in labels.json
	Total chips extracted: 55
		Background chips: 5
		Foreground chips: 50
	Processing scene: 08f46b67e01aa8bft
	Saving in labels.json
	Total chips extracted: 62
		Background chips: 6
		Foreground chips: 56
	Processing scene: b1de75699207dc07t
	Saving in labels.json
	Total chips extracted: 78
		Background chips: 8
		Foreground chips: 72
	Processing scene: a736a57b5c252312t
	Saving in labels.json
	Total chips extracted: 34
		Background chips: 4
		Fo

	Saving in labels.json
	Total chips extracted: 70
		Background chips: 7
		Foreground chips: 63
	Processing scene: 9f57aace246e3852t
	Saving in labels.json
	Total chips extracted: 75
		Background chips: 7
		Foreground chips: 68
	Processing scene: 8ca84c319893f38bt
	Saving in labels.json
	Total chips extracted: 126
		Background chips: 12
		Foreground chips: 114
	Processing scene: 9089b8058edd2077t
	Saving in labels.json
	Total chips extracted: 72
		Background chips: 7
		Foreground chips: 65
	Processing scene: 1630d57d3094f376t
	Saving in labels.json
	Total chips extracted: 57
		Background chips: 6
		Foreground chips: 51
	Processing scene: ca1019141c8d9c5ft
	Saving in labels.json
	Total chips extracted: 48
		Background chips: 5
		Foreground chips: 43
	Processing scene: 1f1ef57de714f5c2t
	Saving in labels.json
	Total chips extracted: 62
		Background chips: 6
		Foreground chips: 56
	Processing scene: 0dd94a7b8484235dt
	Saving in labels.json
	Total chips extracted: 52
		Background chips: 5
	

	Saving in labels.json
	Total chips extracted: 64
		Background chips: 6
		Foreground chips: 58
	Processing scene: 70c22e83de0db11dt
	Saving in labels.json
	Total chips extracted: 60
		Background chips: 6
		Foreground chips: 54
	Processing scene: 2a96e1e4747841a5t
	Saving in labels.json
	Total chips extracted: 108
		Background chips: 10
		Foreground chips: 99
	Processing scene: 876ba61d558b3219t
	Saving in labels.json
	Total chips extracted: 48
		Background chips: 5
		Foreground chips: 43
	Processing scene: 9d8690a60069c75bt
	Saving in labels.json
	Total chips extracted: 60
		Background chips: 6
		Foreground chips: 54
	Processing scene: e0798a8e99c2568ft
	Saving in labels.json
	Total chips extracted: 17
		Background chips: 2
		Foreground chips: 15
	Processing scene: 24ffe6419e6a555ft
	Saving in labels.json
	Total chips extracted: 50
		Background chips: 5
		Foreground chips: 45
	Processing scene: f5ce2dc15a39b593t
	Saving in labels.json
	Total chips extracted: 54
		Background chips: 5
		

	Saving in labels.json
	Total chips extracted: 47
		Background chips: 5
		Foreground chips: 44
	Processing scene: 3c895133d30df79ft
	Saving in labels.json
	Total chips extracted: 45
		Background chips: 5
		Foreground chips: 42
	Processing scene: bbb1871646993b03t
	Saving in labels.json
	Total chips extracted: 81
		Background chips: 8
		Foreground chips: 73
	Processing scene: 5520b576b9ec01cbt
	Saving in labels.json
	Total chips extracted: 72
		Background chips: 7
		Foreground chips: 65
	Processing scene: e6ed69d28d60111bt
	Saving in labels.json
	Total chips extracted: 91
		Background chips: 9
		Foreground chips: 82
	Processing scene: 25b647117570170et
	Saving in labels.json
	Total chips extracted: 73
		Background chips: 7
		Foreground chips: 66
	Processing scene: 801619a1ec9fdc55t
	Saving in labels.json
	Total chips extracted: 75
		Background chips: 7
		Foreground chips: 68
	Processing scene: cb63265c445495d3t
	Saving in labels.json
	Total chips extracted: 64
		Background chips: 6
		Fo

	Saving in labels.json
	Total chips extracted: 68
		Background chips: 7
		Foreground chips: 61
	Processing scene: 623fc1923a45f720t
	Saving in labels.json
	Total chips extracted: 43
		Background chips: 4
		Foreground chips: 39
	Processing scene: 8a6781613364d055t
	Saving in labels.json
	Total chips extracted: 108
		Background chips: 10
		Foreground chips: 98
	Processing scene: 791fe574b453aec3t
	Saving in labels.json
	Total chips extracted: 107
		Background chips: 10
		Foreground chips: 99
	Processing scene: 5fda68bfd1c4a4b9t
	Saving in labels.json
	Total chips extracted: 76
		Background chips: 8
		Foreground chips: 71
	Processing scene: 64e674072952e056t
	Saving in labels.json
	Total chips extracted: 82
		Background chips: 8
		Foreground chips: 74
	Processing scene: 7e7d516d2a585038t
	Saving in labels.json
	Total chips extracted: 57
		Background chips: 6
		Foreground chips: 51
	Processing scene: 7a6d0fa89709579ft
	Saving in labels.json
	Total chips extracted: 82
		Background chips: 8


	Saving in labels.json
	Total chips extracted: 38
		Background chips: 4
		Foreground chips: 35
	Processing scene: 066e06233b18a637t
	Saving in labels.json
	Total chips extracted: 40
		Background chips: 4
		Foreground chips: 36
	Processing scene: a19667d54d0b4623t
	Saving in labels.json
	Total chips extracted: 71
		Background chips: 7
		Foreground chips: 66
	Processing scene: 7060a820c566d5d3t
	Saving in labels.json
	Total chips extracted: 44
		Background chips: 4
		Foreground chips: 40
	Processing scene: 81205e3e86fdc03ct
	Saving in labels.json
	Total chips extracted: 50
		Background chips: 5
		Foreground chips: 45
	Processing scene: 02a23f3b5627213bt
	Saving in labels.json
	Total chips extracted: 114
		Background chips: 11
		Foreground chips: 103
	Processing scene: b0ae857ccb3bf7cat
	Saving in labels.json
	Total chips extracted: 84
		Background chips: 8
		Foreground chips: 76
	Processing scene: 61b732b793693dddt
	Saving in labels.json
	Total chips extracted: 25
		Background chips: 3
	

	Saving in labels.json
	Total chips extracted: 375
		Background chips: 224
		Foreground chips: 203
	Processing scene: 758991708403f218v
	Saving in labels.json
	Total chips extracted: 203
		Background chips: 108
		Foreground chips: 98
	Processing scene: 7b7e837a7ac5a880v
	Saving in labels.json
	Total chips extracted: 244
		Background chips: 134
		Foreground chips: 121
	Processing scene: 3fe00bf7beab8812v
	Saving in labels.json
	Total chips extracted: 354
		Background chips: 214
		Foreground chips: 194
	Processing scene: 4a97701b4bd81bf7v
	Saving in labels.json
	Total chips extracted: 230
		Background chips: 140
		Foreground chips: 127
	Processing scene: 335f9a411884e9cbv
	Saving in labels.json
	Total chips extracted: 202
		Background chips: 124
		Foreground chips: 112
	Processing scene: 75c03770095c6d9ev
	Saving in labels.json
	Total chips extracted: 74
		Background chips: 48
		Foreground chips: 43
	Processing scene: 5e9a2c1bcf179e9bv
	Saving in labels.json
	Total chips extracted: 379
	

In [13]:
batch_size = 4
n_epochs = 5

In [14]:
train_sampler = torch.utils.data.RandomSampler(train_data)
val_sampler = torch.utils.data.RandomSampler(val_data)
# data_sampler = torch.utils.data.RandomSampler(data)


data_loader_train = torch.utils.data.DataLoader(train_data, batch_size=batch_size, sampler=train_sampler, num_workers=0, pin_memory=True)
data_loader_val = torch.utils.data.DataLoader(val_data, batch_size=batch_size, sampler=val_sampler, num_workers=0, pin_memory=True)

In [15]:
writer = SummaryWriter("/media/xview/xview3_challenge/xView3-Challenge/exps/runs/factseg_experiment_RPN"+str(exp_no))

In [16]:
# model.train()
model.to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, weight_decay=0.0001, momentum=0.9)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)

# class_weight = torch.Tensor([1, 1, 1]).to(device)

In [17]:
def loss_fn(binary_pred, cls_true):
    
    losses = torch.nn.functional.binary_cross_entropy(binary_pred, cls_true, reduction='none')
    label_arr = cls_true.squeeze(1).detach().cpu().numpy()
    
    b, bg_i, bg_j = np.where(label_arr == 0)
    b, fg_i, fg_j = np.where(label_arr == 1)
    
    fg_l = len(fg_i)
    bg_l = len(bg_i)
    ixs = np.random.choice(np.arange(bg_l), fg_l)
    
    fg_loss = losses[b, :, fg_i, fg_j].mean()
    bg_loss = losses[b, :, bg_i[ixs], bg_j[ixs]].mean()
    
    return fg_loss + bg_loss

In [18]:
running_loss = 0

for epoch in range(n_epochs):
    num_nans = 0
    print(f"\nEpoch: {epoch+1}")
    for i, data in enumerate(data_loader_train, 1):
        
        un_img, class_labels, inst_weight = [torch.cat(d, dim=0) for d in data]
        img = transform(un_img)
        img = img.to(device)
        
        class_labels = class_labels.cpu().numpy()
        class_labels[class_labels != 0] = 1
        class_labels = torch.from_numpy(class_labels.astype("float32")).unsqueeze(1).to(device)
        
        
        optimizer.zero_grad()
        
        pred = model(img)

        loss = loss_fn(pred, class_labels)
        
        print(f"\rStep: {i} Training Loss: {loss.item()} Validation Loss: {None} Nans: {num_nans}", end="")
        
        if torch.isnan(loss):
            if num_nans > 10:
                raise RuntimeError(f"Model Error: Encountered {num_nans} nan loss")
            num_nans += 1
            continue
        loss.backward()
        
        optimizer.step()
        
        running_loss += loss.detach().item()
        
        num_nans = 0
        if i%15 == 0:
            
            writer.add_scalar('training_loss', running_loss/15, epoch*len(data_loader_train)+i)
            
            step = None
            
            if i%60 == 0:
                step = epoch*len(data_loader_train)+i
                save_fig(writer, "train", step, un_img[0], pred[0], class_labels[0])
                
            val_loss = evaluate(lambda x: model(x), data_loader_val, loss_fn, step, 5, device, writer, transform=transform)
            
            writer.add_scalar('validation_loss', val_loss, epoch*len(data_loader_train)+i)
            
            print(f"\rTraining Loss: {loss.item()} Validation Loss: {val_loss}")
            
            running_loss = 0.0
    
    scheduler.step()
    
    checkpoint_pth = f'/media/xview/xview3_challenge/xView3-Challenge/exps/ckpts/RPN_{exp_no}_trained_model_{epoch+1}_epochs.pth'
    torch.save(model.state_dict(), checkpoint_pth)

writer.close()


Epoch: 1
Training Loss: 0.32580500841140747 Validation Loss: 1.0567463397979737: 0
Training Loss: 0.45757126808166504 Validation Loss: 0.6502853274345398: 0
Training Loss: 0.2215203493833542 Validation Loss: 0.4627175390720367: 00
Training Loss: 0.15237939357757568 Validation Loss: 0.4593044638633728: 0
Training Loss: 0.19194404780864716 Validation Loss: 0.4667647898197174: 0
Training Loss: 0.27751943469047546 Validation Loss: 0.5312729477882385: 0
Training Loss: 0.1723109781742096 Validation Loss: 0.5742243945598602s: 00
Training Loss: 0.05275581032037735 Validation Loss: 0.5937548696994781s: 0
Training Loss: 0.45514896512031555 Validation Loss: 0.5800192922353744s: 0
Training Loss: 0.3195115327835083 Validation Loss: 0.6000227808952332s: 00
Training Loss: 0.1565473973751068 Validation Loss: 0.7055763125419616s: 00
Training Loss: 0.5874180793762207 Validation Loss: 0.9216587662696838s: 00
Training Loss: 0.3127478063106537 Validation Loss: 0.2774430960416794s: 00
Training Loss: 0.3223

Training Loss: 0.0035519697703421116 Validation Loss: 1.2922251105308533ns: 0
Training Loss: 0.021902363747358322 Validation Loss: 1.1162401897832752ns: 0
Training Loss: 0.05304589867591858 Validation Loss: 1.574496901035309ans: 000
Training Loss: 0.006089158356189728 Validation Loss: 1.2051068812608718ns: 0
Training Loss: 0.03768972307443619 Validation Loss: 0.4281695693731308ns: 00
Training Loss: 0.01540943793952465 Validation Loss: 1.0959940016269685ns: 00
Training Loss: 0.008999152109026909 Validation Loss: 0.5877543598413467ns: 0
Training Loss: 0.01432369276881218 Validation Loss: 1.468529936671257ans: 00
Training Loss: 0.03852013126015663 Validation Loss: 0.8629313945770264ns: 00
Training Loss: 0.007604371756315231 Validation Loss: 0.8576794326305389ns: 0
Training Loss: 0.019686101004481316 Validation Loss: 1.0104198396205901ns: 0
Training Loss: 0.030232883989810944 Validation Loss: 0.5945604115724563ns: 0
Training Loss: 0.014696826227009296 Validation Loss: 1.1841897368431091ns:

Training Loss: 0.01645355299115181 Validation Loss: 2.3105618476867678ns: 0 0
Training Loss: 0.027184896171092987 Validation Loss: 2.2302568554878235ns: 0
Training Loss: 0.007898799143731594 Validation Loss: 0.9961224436759949ns: 0
Training Loss: 0.006091230548918247 Validation Loss: 1.3882474243640899ns: 00
Training Loss: 0.03133772313594818 Validation Loss: 0.8060767889022827ns: 0 0
Training Loss: 0.04928075894713402 Validation Loss: 1.3893573582172394ns: 000
Training Loss: 0.020982032641768456 Validation Loss: 3.1040004223585127ns: 00
Training Loss: 0.02657371386885643 Validation Loss: 1.3447428476065397ns: 000
Training Loss: 0.01593656837940216 Validation Loss: 0.5948540925979614ns: 000
Training Loss: 0.01583503559231758 Validation Loss: 2.127028560638428ans: 00
Training Loss: 0.02048681303858757 Validation Loss: 1.63030663728714Nans: 00
Training Loss: 0.01831197738647461 Validation Loss: 1.2234989285469056ns: 000
Training Loss: 0.014558404684066772 Validation Loss: 1.9177098989486

KeyboardInterrupt: 