In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.models as models
import torchvision.transforms as transforms


import pandas as pd
import numpy as np
import spacy

from PIL import Image
from tqdm import tqdm
import matplotlib.pyplot as plt
import time, math, random

from datasets import build_vocab, get_loaders
from model2 import Img2Seq, EncoderCNN, DecoderRNN
from utils import train, evaluate, epoch_time

%matplotlib inline

In [3]:
# making our results reproducable
SEED = 42
random.seed=(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

# speedup training
torch.backends.cudnn.benchmark = True

In [4]:
spacy_en = spacy.load('en')
MODEL_PATH = 'models/splits.pth'
IMAGES_PATH = 'Images/'
DF_PATH = 'data.csv'
TEST_DF_PATH = 'test.csv'
TEST_EXAMPLES_PATH = 'test_examples/'

In [5]:
vocab = build_vocab(DF_PATH)

In [6]:
pad_idx = vocab.stoi['<pad>']

In [7]:
HID_DIM = 256
EMB_DIM = 256
DROPOUT = .5
VOCAB_LENGTH = len(vocab)
TRAIN_CNN = False
bs = 256
lr = 3e-3

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

encoder = EncoderCNN(HID_DIM, DROPOUT)
decoder = DecoderRNN(EMB_DIM, HID_DIM, VOCAB_LENGTH, DROPOUT)

model = Img2Seq(encoder, decoder, device).to(device)

In [8]:
# transforms 
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [9]:
criterion = nn.CrossEntropyLoss(ignore_index=pad_idx)
optimizer = optim.Adam(model.parameters(), lr=lr)

In [10]:
train_loader, valid_loader = get_loaders(bs, IMAGES_PATH, DF_PATH, transform, vocab)

Dataset split: train
Unique Image: 6000
Size: 30000
Dataset split: val
Unique Image: 1000
Size: 5000


In [11]:
N_EPOCHS = 1
CLIP = 1

best_valid_loss = float('inf')

for epoch in range(N_EPOCHS):
    
    start_time = time.time()
    
    train_loss = train(model, train_loader, optimizer, criterion, CLIP)
    valid_loss = evaluate(model, valid_loader, criterion)
    
    end_time = time.time()
    
    epoch_mins, epoch_secs = epoch_time(start_time, end_time)
    
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), 'splits.pth')
        
    print(f'Epoch: {epoch+1:02} | Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train PPL: {math.exp(train_loss):7.3f}')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. PPL: {math.exp(valid_loss):7.3f}')

                                                 

RuntimeError: CUDA out of memory. Tried to allocate 176.00 MiB (GPU 0; 7.94 GiB total capacity; 7.19 GiB already allocated; 40.00 MiB free; 7.38 GiB reserved in total by PyTorch)