In [None]:
import time 
import os 
import numpy as np 
import tensorflow as tf

# You'll generate plots of attention in order to see which parts of an image
# our model focuses on during captioning
import matplotlib.pyplot as plt
import pandas as pd
import collections
import random
import numpy as np
import os
import time
import json
from PIL import Image
from tqdm import tqdm

In [None]:
import tensorflow as tf
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
  tf.config.experimental.set_memory_growth(gpu, True)

In [None]:
path_caption = "../input/flickr30k/captions.txt"
file_caption = open(path_caption,'r')
path_folder_image = "../input/flickr30k/images/flickr30k_images"
image_path_to_caption = collections.defaultdict(list)
count = 0
for line in tqdm(file_caption):
    if count == 0:
        count += 1
        continue
    line = line[:-1]
    
    [nameimage, number_caption, text] = line.split("|")
#     number_caption = int(number_caption)
    caption = f"<start> {text} <end>"
    image_path = path_folder_image + "/" + nameimage
    image_path_to_caption[image_path].append(caption)
    

In [None]:
# image_paths = list(image_path_to_caption.keys())
# # random.shuffle(image_paths)
# train_image_paths = image_paths[:6000]
# # val_image_paths = image_paths[24000:]
# print(len(train_image_paths))

In [None]:
image_paths = list(image_path_to_caption.keys())
random.shuffle(image_paths)
train_image_paths = image_paths[:24000]
val_image_paths = image_paths[24000:]
print(len(train_image_paths))

In [None]:
train_captions = []
img_name_vector = []

for image_path in train_image_paths:
  caption_list = image_path_to_caption[image_path]
  train_captions.extend(caption_list)
  img_name_vector.extend([image_path] * len(caption_list))

In [None]:
def load_image(image_path):
#     if "jpg" not in image_path:
#         print("Found")
    img = tf.io.read_file(image_path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, (299, 299))
    img = tf.keras.applications.efficientnet.preprocess_input(img)
    return img, image_path

In [None]:
image_model = tf.keras.applications.EfficientNetB7(include_top=False,
                                                weights='imagenet')
new_input = image_model.input
hidden_layer = image_model.layers[-1].output

image_features_extract_model = tf.keras.Model(new_input, hidden_layer)

In [None]:
Dataset_npy = "./Dataset_npy"
os.makedirs(Dataset_npy)

In [None]:
# Get unique images
encode_train = sorted(set(img_name_vector))

# Feel free to change batch_size according to your system configuration
image_dataset = tf.data.Dataset.from_tensor_slices(encode_train)
image_dataset = image_dataset.map(
  load_image, num_parallel_calls=tf.data.AUTOTUNE).batch(16)

for img, path in tqdm(image_dataset):
  batch_features = image_features_extract_model(img)
  batch_features = tf.reshape(batch_features,
                              (batch_features.shape[0], -1, batch_features.shape[3]))

  for bf, p in zip(batch_features, path):
    nameimage = p.numpy().decode("utf-8").split("/")[-1]
    path = Dataset_npy + "/" + nameimage
#     path_of_feature = p.numpy().decode("utf-8")
    np.save(path, bf.numpy())

In [None]:
# Find the maximum length of any caption in our dataset
def calc_max_length(tensor):
    return max(len(t) for t in tensor)

In [None]:

folder_save = './Save_Checkpoint'
os.makedirs(folder_save)

In [None]:
# Choose the top 5000 words from the vocabulary
top_k = 5000
tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=top_k,
                                                  oov_token="<unk>",
                                                  filters='!"#$%&()*+.,-/:;=?@[\]^_`{|}~ ')
tokenizer.fit_on_texts(train_captions)
tokenizer.word_index['<pad>'] = 0
tokenizer.index_word[0] = '<pad>'

In [None]:
print(tokenizer.word_index['<start>'])

In [None]:
# Create the tokenized vectors
train_seqs = tokenizer.texts_to_sequences(train_captions)
# Pad each vector to the max_length of the captions
# If you do not provide a max_length value, pad_sequences calculates it automatically
cap_vector = tf.keras.preprocessing.sequence.pad_sequences(train_seqs, padding='post')

In [None]:
img_to_cap_vector = collections.defaultdict(list)
for img, cap in zip(img_name_vector, cap_vector):
  img_to_cap_vector[img].append(cap)

# Create training and validation sets using an 80-10-10 split randomly.
img_keys = list(img_to_cap_vector.keys())
random.shuffle(img_keys)

slice_index = int(len(img_keys)*0.8)
img_name_train_keys, img_name_val_keys, img_name_test_keys = img_keys[:slice_index], \
                                img_keys[slice_index: slice_index + int(len(img_keys)*0.1)],\
                                img_keys[slice_index + int(len(img_keys)*0.1):]

img_name_train = []
cap_train = []
for imgt in img_name_train_keys:
  capt_len = len(img_to_cap_vector[imgt])
  img_name_train.extend([imgt] * capt_len)
  cap_train.extend(img_to_cap_vector[imgt])

img_name_val = []
cap_val = []
for imgv in img_name_val_keys:
  capv_len = len(img_to_cap_vector[imgv])
  img_name_val.extend([imgv] * capv_len)
  cap_val.extend(img_to_cap_vector[imgv])

img_name_test = []
cap_test = []
for imge in img_name_val_keys:
  cape_len = len(img_to_cap_vector[imge])
  img_name_test.extend([imge] * cape_len)
  cap_test.extend(img_to_cap_vector[imge])

In [None]:
# Feel free to change these parameters according to your system's configuration

BATCH_SIZE = 64
BUFFER_SIZE = 1000
embedding_dim = 256
units = 512
vocab_size = top_k + 1
num_steps = len(img_name_train) // BATCH_SIZE
val_num_steps = len(img_name_val) // BATCH_SIZE
# Shape of the vector extracted from InceptionV3 is (64, 2048)
# These two variables represent that vector shape
features_shape = 2560
attention_features_shape = 81

In [None]:
# Load the numpy files
def map_func(img_name):
    nameimage = img_name.split("/")[-1]
    path_image = Dataset_npy + "/" + nameimage + '.npy'
    img_tensor = np.load(path_image)
    return img_tensor

In [None]:
len(img_name_train), len(img_name_val), len(img_name_test)

In [None]:
from torch.utils.data import DataLoader
from torch.utils.data import Dataset

In [None]:
class Flickr30kDataset(Dataset):
    """
    A PyTorch Dataset class to be used in a PyTorch DataLoader to create batches.
    """

    def __init__(self, img_name_list, caption_list, map_func):
        self.img_name_list = img_name_list
        self.caption_list = caption_list
        self.map_func = map_func
    def __getitem__(self, i):
        # Remember, the Nth caption corresponds to the (N // captions_per_image)th image
        img_name = self.img_name_list[i]
        cap = self.caption_list[i]
        img_tensor = self.map_func(img_name)
        return img_tensor, cap
    def __len__(self):
        return len(self.img_name_list)

In [None]:
torch_train_dataset = Flickr30kDataset(img_name_train, cap_train, map_func)
torch_val_dataset = Flickr30kDataset(img_name_val, cap_val, map_func)
torch_test_dataset = Flickr30kDataset(img_name_test, cap_test, map_func)

In [None]:
torch_train_dataset[1][0].shape, torch_train_dataset[1][1].shape

### Dataloader

In [None]:
train_dataloader = DataLoader(dataset = torch_train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_dataloader = DataLoader(dataset = torch_val_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_dataloader = DataLoader(dataset = torch_test_dataset, batch_size=BATCH_SIZE, shuffle=True)

In [None]:
import torch
import torch.nn as nn
import torchvision.models as models
from torch.nn.utils.rnn import pack_padded_sequence
from torch.autograd import Variable
import torch.nn.functional as F
from torch.nn import init
from torch.utils.data import Dataset

In [None]:
class CNN_Encoder(nn.Module):
    # Since you have already extracted the features and dumped it
    # This encoder passes those features through a Fully connected layer
    def __init__(self, hidden_size):
        super(CNN_Encoder, self).__init__()
        # shape after fc == (batch_size, 81, embedding_dim)
        self.fc = nn.Linear(2560, hidden_size)
        self.relu = nn.ReLU()

    def forward(self, x):
        return self.relu(self.fc(x))

In [None]:
# linear = nn.Linear(512, 81, bias=False)
# V = torch.rand(1, 81, 512)
# h_t = torch.rand(5,10, 512)
# out_V = linear(V)
# out_h = linear(h_t)
# out_V.shape, out_h.shape

In [None]:
#spatial attention 
class Attention(nn.Module):
    def __init__(self, hidden_size):
        super(Attention, self).__init__()
        self.affine_v = nn.Linear(hidden_size, 81, bias=False) # W_v
        self.affine_g = nn.Linear(hidden_size, 81, bias=False) # W_g
        self.affine_s = nn.Linear(hidden_size, 81, bias = False) # W_s
        self.affine_h = nn.Linear(81, 1, bias=False) # w_h

        self.dropout = nn.Dropout(0.5)
        self.init_weights()

    def init_weights( self ):
        """Initialize the weights."""
        init.xavier_uniform_( self.affine_v.weight )
        init.xavier_uniform_( self.affine_g.weight )
        init.xavier_uniform_( self.affine_h.weight )
        init.xavier_uniform_( self.affine_s.weight )

    def forward( self, V, h_t, s_t ):
        '''
        Input: V=[v_1, v_2, ... v_k], h_t, s_t from LSTM
        Output: c_hat_t, attention feature map
        '''
        
        # W_v * V + W_g * h_t * 1^T

        content_v = self.affine_v( self.dropout( V )).unsqueeze( 1 ) \
                    + self.affine_g( self.dropout( h_t ) ).unsqueeze( 2 )
#         print('Spatial Attention')
#         print('V shape: {}'.format(V.shape))
#         print('h_t shape: {}'.format(h_t.shape))
#         print('s_t shape: {}'.format(s_t.shape))
#         print('content_v shape: {}'.format(content_v.shape))
        # z_t = W_h * tanh( content_v )
        z_t = self.affine_h( self.dropout( F.tanh( content_v ) ) ).squeeze( 3 )
        alpha_t = F.softmax( z_t.view( -1, z_t.size( 2 )), dim=-1).view( z_t.size( 0 ), z_t.size( 1 ), -1 )
#         print('alpha_t shape: {}'.format(alpha_t.shape))
        # Construct c_t: B x seq x hidden_size
        c_t = torch.bmm( alpha_t, V ).squeeze( 2 )
#         print('c_t shape: {}'.format(c_t.shape))
        # W_s * s_t + W_g * h_t
        content_s = self.affine_s( self.dropout( s_t ) ) + self.affine_g( self.dropout( h_t ) )
#         print('content_s shape: {}'.format(content_s.shape))
        # w_t * tanh( content_s )
        z_t_extended = self.affine_h( self.dropout( F.tanh( content_s ) ) )
#         print('z_t shape: {}'.format(z_t.shape))
#         print('z_t_extended: {}'.format(z_t_extended.shape))
        # Attention score between sentinel and image content
        extended = torch.cat( ( z_t, z_t_extended ), dim=2 )
#         print('extended z shape: {}'.format(extended.shape))
        alpha_hat_t = F.softmax( extended.view( -1, extended.size( 2 )), dim=-1).view( extended.size( 0 ), extended.size( 1 ), -1 )
#         print('alpha_hat_t shape: {}'.format(alpha_hat_t.shape))
        beta_t = alpha_hat_t[ :, :, -1 ]
        
        # c_hat_t = beta * s_t + ( 1 - beta ) * c_t
        beta_t = beta_t.unsqueeze( 2 )
        c_hat_t = beta_t * s_t + ( 1 - beta_t ) * c_t
#         print('c_hat_t shape: {}'.format(c_hat_t.shape))
#         print('-'*50)
        return c_hat_t, alpha_t, beta_t


In [None]:
# Sentinel BLock    
class Sentinel( nn.Module ):
    def __init__( self, input_size, hidden_size ):
        super( Sentinel, self ).__init__()

        self.affine_x = nn.Linear( input_size, hidden_size, bias=False )
        self.affine_h = nn.Linear( hidden_size, hidden_size, bias=False )
        
        # Dropout applied before affine transformation
        self.dropout = nn.Dropout( 0.5 )
        
        self.init_weights()
        
    def init_weights( self ):
        init.xavier_uniform_( self.affine_x.weight )
        init.xavier_uniform_( self.affine_h.weight )
        
    def forward( self, x_t, h_t_1, cell_t ):
        
        # g_t = sigmoid( W_x * x_t + W_h * h_(t-1) )        
        gate_t = self.affine_x( self.dropout( x_t ) ) + self.affine_h( self.dropout( h_t_1 ) )
        gate_t = F.sigmoid( gate_t )
#         print('Sentinel block')
#         print('x_t shape: {}'.format(x_t.shape))
#         print('h_t_1 shape: {}'.format(h_t_1.shape))
#         print('cell_t shape: {}'.format(cell_t.shape))
#         print('gate_t shape: {}'.format(gate_t.shape))
        
        # Sentinel embedding
        s_t =  gate_t * F.tanh( cell_t )
#         print('s_t shape: {}'.format(s_t.shape))
#         print('-'*50)
        return s_t

In [None]:
# Adaptive Attention Block: C_t, Spatial Attention Weights, Sentinel embedding    
class AdaptiveBlock( nn.Module ):
    
    def __init__( self, embed_size, hidden_size, vocab_size ):
        super( AdaptiveBlock, self ).__init__()

        # Sentinel block
        self.sentinel = Sentinel( embed_size, hidden_size )
        
        # Image Spatial Attention Block
        self.atten = Attention( hidden_size )
        
        # Final Caption generator
        self.mlp = nn.Linear( hidden_size, vocab_size )
        
        # Dropout layer inside Affine Transformation
        self.dropout = nn.Dropout( 0.5 )
        
        self.hidden_size = hidden_size
        self.init_weights()
        
    def init_weights( self ):
        '''
        Initialize final classifier weights
        '''
        init.kaiming_normal_( self.mlp.weight, mode='fan_in' )
        self.mlp.bias.data.fill_( 0 )
        
        
    def forward( self, x, hiddens, cells, V ):
#         print('Adaptive attention block')
#         print('hiddens shape: {}'.format(hiddens.shape))
#         print('cells shape: {}'.format(cells.shape))
#         print('V shape: {}'.format(V.shape))
#         print('-'*50)
        # hidden for sentinel should be h0-ht-1
        h0 = self.init_hidden( x.size(0) )[0].transpose( 0,1 )
        
        # h_(t-1): B x seq x hidden_size ( 0 - t-1 )
        if hiddens.size( 1 ) > 1:
            hiddens_t_1 = torch.cat( ( h0, hiddens[ :, :-1, : ] ), dim=1 )
        else:
            hiddens_t_1 = h0

        # Get Sentinel embedding, it's calculated blockly    
        sentinel = self.sentinel( x, hiddens_t_1, cells )
        
        # Get C_t, Spatial attention, sentinel score
        c_hat, atten_weights, beta = self.atten( V, hiddens, sentinel )
        
        # Final score along vocabulary
        scores = self.mlp( self.dropout( c_hat + hiddens ) )
        
        return scores, atten_weights, beta
    
    def init_hidden( self, bsz ):
        '''
        Hidden_0 & Cell_0 initialization
        '''
        weight = next( self.parameters() ).data
        
        if torch.cuda.is_available():
            return ( Variable( weight.new( 1 , bsz, self.hidden_size ).zero_().cuda() ),
                    Variable( weight.new( 1,  bsz, self.hidden_size ).zero_().cuda() ) ) 
        else: 
            return ( Variable( weight.new( 1 , bsz, self.hidden_size ).zero_() ),
                    Variable( weight.new( 1,  bsz, self.hidden_size ).zero_() ) ) 

In [None]:
# Caption Decoder
class Decoder( nn.Module ):
    def __init__( self, embed_size, vocab_size, hidden_size ):
        super( Decoder, self ).__init__()

        # word embedding
        self.embed = nn.Embedding( vocab_size, embed_size )
        
        # LSTM decoder: input = [ w_t; v_g ] => 2 x word_embed_size;
        self.LSTM = nn.LSTM( embed_size, hidden_size, 1, batch_first=True )
        
        # Save hidden_size for hidden and cell variable 
        self.hidden_size = hidden_size
        
        # Adaptive Attention Block: Sentinel + C_hat + Final scores for caption sampling
        self.adaptive = AdaptiveBlock( embed_size, hidden_size, vocab_size )
        
    def forward( self, V , captions, states=None ):
#         print('Decoder')
        
        # Word Embedding
        x = self.embed( captions )
        
#         print('x shape: {}'.format(x.shape))
#         print('V shape: {}'.format(V.shape))
        # Hiddens: Batch x seq_len x hidden_size
        # Cells: seq_len x Batch x hidden_size, default setup by Pytorch
        if torch.cuda.is_available():
            hiddens = Variable( torch.zeros( x.size(0), x.size(1), self.hidden_size ).cuda() )
            cells = Variable( torch.zeros( x.size(1), x.size(0), self.hidden_size ).cuda() )
        else:
            hiddens = Variable( torch.zeros( x.size(0), x.size(1), self.hidden_size ) )
            cells = Variable( torch.zeros( x.size(1), x.size(0), self.hidden_size ) )            
        
        # Recurrent Block
        # Retrieve hidden & cell for Sentinel simulation
        for time_step in range( x.size( 1 ) ):
#             print('Time step: {}'.format(time_step))
            # Feed in x_t one at a time
            x_t = x[ :, time_step, : ]
            x_t = x_t.unsqueeze( 1 )
            
            h_t, states = self.LSTM( x_t, states )
#             print('h_t shape: {}'.format(h_t.squeeze(2).shape))
#             print('hiddens shape: {}'.format(hiddens.shape))
            # Save hidden and cell
#             print(hiddens[:, time_step, :].shape)
            hiddens[ :, time_step, : ] = torch.squeeze(h_t, 1)  # Batch_first
            cells[ time_step, :, : ] = states[1]
        
        # cell: Batch x seq_len x hidden_size
        cells = cells.transpose( 0, 1 )
        
        scores, atten_weights, beta = self.adaptive( x, hiddens, cells, V )
        
        # Return states for Caption Sampling purpose
        return scores, states, atten_weights, beta

In [None]:
# decoder = Decoder(256, 5000, 512)
# V = torch.rand(16, 81, 512)
# captions = torch.randint(high=5000, size=(16, 2))
# out = decoder(V, captions)

In [None]:
# Whole Architecture with Image Encoder and Caption decoder        
class Encoder2Decoder( nn.Module ):
    def __init__( self, embed_size, vocab_size, hidden_size ):
        super( Encoder2Decoder, self ).__init__()
        
        # Image CNN encoder and Adaptive Attention Decoder
        self.encoder = CNN_Encoder(hidden_size )
        self.decoder = Decoder( embed_size, vocab_size, hidden_size )
        
        
    def forward( self, images, captions):
        
        # V=[ v_1, ..., v_k ] in the original paper

        V = self.encoder( images )
        
        # Language Modeling on word prediction
        scores, _, _,_ = self.decoder( V, captions )
        return scores
        # Pack it to make criterion calculation more efficient
#         packed_scores = pack_padded_sequence( scores, lengths, batch_first=True )
        
#         return packed_scores
    
    # Caption generator
    def sampler( self, images, max_len=20 ):
        """
        Samples captions for given image features (Greedy search).
        """
        

        V = self.encoder( images )
            
        # Build the starting token Variable <start> (index 1): B x 1
        if torch.cuda.is_available():
            captions = Variable( torch.LongTensor( images.size( 0 ), 1 ).fill_( 1 ).cuda() )
        else:
            captions = Variable( torch.LongTensor( images.size( 0 ), 1 ).fill_( 1 ) )
        
        # Get generated caption idx list, attention weights and sentinel score
        sampled_ids = []
        attention = []
        Beta = []
        
        # Initial hidden states
        states = None

        for i in range( max_len ):

            scores, states, atten_weights, beta = self.decoder( V, captions, states ) 
            predicted = scores.max( 2 )[ 1 ] # argmax
            captions = predicted
            
            # Save sampled word, attention map and sentinel at each timestep
            sampled_ids.append( captions )
            attention.append( atten_weights )
            Beta.append( beta )
        
        # caption: B x max_len
        # attention: B x max_len x 49
        # sentinel: B x max_len
        sampled_ids = torch.cat( sampled_ids, dim=1 )
        attention = torch.cat( attention, dim=1 )
        Beta = torch.cat( Beta, dim=1 )
        
        return sampled_ids, attention, Beta

In [None]:
os.getcwd()

In [None]:
os.chdir('../input/savedresults')

In [None]:
adaptive = Encoder2Decoder(embedding_dim, vocab_size, units)
adaptive.load_state_dict(torch.load('adaptive-13.pkl'))

In [None]:
os.chdir('../../working')

In [None]:
# adaptive = Encoder2Decoder(embedding_dim, vocab_size, units)
learning_rate = 0.001
loss_func = nn.CrossEntropyLoss()
params = list(adaptive.encoder.parameters()) + list( adaptive.decoder.parameters())
optimizer = torch.optim.Adam(params, lr=learning_rate)
if torch.cuda.is_available():
    adaptive.cuda()
    loss_func.cuda()
# adding this in a separate cell because if you run the training cell
# many times, the loss_plot array will be reset

checkpoint_path = "./checkpoints/train"
os.makedirs(checkpoint_path)

In [None]:
# Variable wrapper
def to_var(x, volatile=False):
    '''
    Wrapper torch tensor into Variable
    '''
    if torch.cuda.is_available():
        x = x.cuda()
    return Variable( x, volatile=volatile )

In [None]:
os.chdir('../input/savedresults')

In [None]:
df_result = pd.read_csv('loss_result.csv')

In [None]:
df_result

In [None]:
loss_plot = df_result['train_loss'].values.tolist()
val_plot = df_result['val_loss'].values.tolist() 

In [None]:
loss_plot

In [None]:
os.chdir('../../working')

In [None]:
import time

In [None]:
# Train the Models
total_step = len(train_dataloader)
num_epochs = 7
#     cider_scores = []
#     best_cider = 0.0
best_epoch = 0
   
# Start Training 
for epoch in range(1, num_epochs + 1):
    start = time.time()
    total_loss = 0
    
    # training phase
    print('------------------Training for Epoch %d----------------'%( epoch ))
    for i, (images, captions) in enumerate( train_dataloader ):
        loss = 0
        # Set mini-batch dataset
        images = to_var( images )
        captions = to_var( captions ).to(torch.int64)
#         print('Images shape: {}'.format(images.shape))
#         print('Captions shape: {}'.format(captions.shape))


        # Forward, Backward and Optimize
        adaptive.train()
        adaptive.zero_grad()

        scores = adaptive(images, captions)
#         print('Scores shape: {}'.format(scores.shape))
        loss = loss_func(scores[:, :-1, :].view(scores[:, :-1, :].size(0), scores[:, :-1, :].size(2), -1), captions[:, 1:])
        total_loss += loss
        loss.backward()
        optimizer.step()


        # Print log info
        if i%100 == 0:
            print(f'Epoch [{epoch}/{num_epochs}], Batch [{i}/{total_step}], Train Loss: {loss}')
    
    loss_plot.append(total_loss/total_step)
    print(f'Epoch {epoch} loss: {total_loss/total_step}')          
    
    # validation phase
    print('Validation for Epoch %d'%( epoch ))
    total_loss = 0    
    for i, (images, captions) in enumerate( val_dataloader ):
        loss = 0
        # Set mini-batch dataset
        images = to_var( images )
        captions = to_var( captions ).to(torch.int64)


        # Forward, Backward and Optimize
        adaptive.eval()
        with torch.no_grad():
            scores = adaptive(images, captions)
#         print('Scores shape: {}'.format(scores.shape))
        loss = loss_func(scores[:, :-1, :].view(scores[:, :-1, :].size(0), scores[:, :-1, :].size(2), -1), captions[:, 1:])
        total_loss += loss


        # Print log info
        if i%100 == 0:
            print(f'Epoch [{epoch}/{num_epochs}], Batch [{i}/{len(val_dataloader)}], Val Loss: {loss}')
    val_plot.append(total_loss/len(val_dataloader))
    print(f'Epoch {epoch} val loss: {total_loss/len(val_dataloader)}')     
    print(f'Time taken for epoch: {time.time() - start}s')        
    # Save the Adaptive Attention model after every epoch
    torch.save( adaptive.state_dict(), 
                    os.path.join( checkpoint_path, 
                    'adaptive-%d.pkl'%( epoch ) ) )          
      
        
#         # Evaluation on validation set        
#         cider = coco_eval( adaptive, args, epoch )
#         cider_scores.append( cider )        
        
#         if cider > best_cider:
#             best_cider = cider
#             best_epoch = epoch
       
#         if len( cider_scores ) > 5:
            
#             last_6 = cider_scores[-6:]
#             last_6_max = max( last_6 )
            
#             # Test if there is improvement, if not do early stopping
#             if last_6_max != best_cider:
                
#                 print 'No improvement with CIDEr in the last 6 epochs...Early stopping triggered.'
#                 print 'Model of best epoch #: %d with CIDEr score %.2f'%( best_epoch, best_cider )
#                 break

In [None]:
from IPython.display import FileLink


In [None]:
FileLink(os.path.join( checkpoint_path, 
                    'adaptive-%d.pkl'%(7) ))

In [None]:
loss_plot_num = [val.item() if type(val) == torch.Tensor else val  for val in loss_plot]
val_plot_num = [val.item() if type(val) == torch.Tensor else val for val in val_plot]

In [None]:
plt.plot(loss_plot_num, label='Train Loss')
plt.plot(val_plot_num, label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Loss Plot')
plt.legend()
plt.show()

In [None]:
loss_plot_num

In [None]:
val_plot_num

In [None]:
df_result = pd.DataFrame({'train_loss': loss_plot_num, 'val_loss': val_plot_num})

In [None]:
df_result

In [None]:
df_result.to_csv('loss_result.csv', index=False)

In [None]:
FileLink('loss_result.csv')

In [None]:
torch.save(train_dataloader, 'train_dataloader.pth')

In [None]:
torch.save(val_dataloader, 'val_dataloader.pth')
torch.save(test_dataloader, 'test_dataloader.pth')

In [None]:
FileLink('train_dataloader.pth')

In [None]:
FileLink('val_dataloader.pth')

In [None]:
FileLink('test_dataloader.pth')