In [None]:
import sys

In [None]:
sys.path.append('../video-classification/ResNetCRNN/')

In [None]:
import os
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
import torchvision.transforms as transforms
import torch.utils.data as data
import torchvision
from torch.autograd import Variable
import matplotlib.pyplot as plt
from functions_new import *
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.metrics import accuracy_score
import pickle
from torchsummary1 import summary

from dataloader import *

In [None]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
torch.__version__

In [None]:
torchvision.__version__

# Loading the training data

In [None]:
root_dir = '/media/hdd/astamoulakatos/nsea_video_jpegs/'
class_paths = [d.path for d in os.scandir(root_dir) if d.is_dir]

transform = transforms.Compose([
    transforms.Resize((576, 704)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

class_names = ['exp_and','exp_fs','exp','exp_fj','bur']
one_hot_classes = [[1,0,1,0,0],[1,0,0,0,1],[1,0,0,0,0],[1,0,0,1,0],[0,1,0,0,0]]

df = pd.read_csv('./train-valid-splits-video/train.csv')


In [None]:
bs = 64

In [None]:
class_image_paths = []
end_idx = []
for c, class_path in enumerate(class_paths):
    for d in os.scandir(class_path):
        if d.is_dir:
            if d.path in df.videos.values:
                paths = sorted(glob.glob(os.path.join(d.path, '*.png')))
                # Add class idx to paths
                paths = [(p, one_hot_classes[c]) for p in paths]
                class_image_paths.extend(paths)
                end_idx.extend([len(paths)])
                
end_idx = [0, *end_idx]
end_idx = torch.cumsum(torch.tensor(end_idx), 0)

In [None]:
seq_length = 16

In [None]:
sampler = MySampler(end_idx, seq_length)

In [None]:
dataset = MyDataset(
    image_paths=class_image_paths,
    seq_length=seq_length,
    transform=transform,
    length=len(sampler))

In [None]:
loader = DataLoader(
    dataset,
    batch_size=bs,
    sampler=sampler,
    num_workers=0,
    drop_last = True
)

## Show a batch

In [None]:
# #Get a batch of training data
# inputs, classes = next(iter(loader))
# inputs = inputs.squeeze(dim = 0)

# for j in range(bs):
#     # Make a grid from batch
#     out = torchvision.utils.make_grid(inputs[j])


#     for i, f in enumerate(one_hot_classes):
#         if np.array_equal(classes[j][0].numpy(), np.asarray(f)):
#             title = class_names[i]


#     imshow(out, title=title)

# Loading the model

In [None]:
save_model_path = "./save-model-lstm/"

# EncoderCNN architecture
CNN_fc_hidden1, CNN_fc_hidden2 = 1024, 768
CNN_embed_dim = 512   # latent dim extracted by 2D CNN
res_size = (576, 704)      # ResNet image size
dropout_p = 0.4      # dropout probability

# DecoderRNN architecture
RNN_hidden_layers = 3
RNN_hidden_nodes = 512
RNN_FC_dim = 256

# training parameters
k = 5            # number of target category
epochs = 2        # training epochs
batch_size = bs
learning_rate = 1e-3
log_interval = 10   # interval for displaying training info

# Select which frame to begin & end in videos
#begin_frame, end_frame, skip_frame = 1, 29, 1

In [None]:
# Detect devices
use_cuda = torch.cuda.is_available()                   # check if GPU exists
device = torch.device("cuda" if use_cuda else "cpu")   # use CPU or GPU

In [None]:
# Data loading parameters
params = {'batch_size': batch_size, 'shuffle': True, 'num_workers': 0, 'pin_memory': True} if use_cuda else {}

In [None]:
# Create model
cnn_encoder = ResCNNEncoder(fc_hidden1=CNN_fc_hidden1, fc_hidden2=CNN_fc_hidden2,
                            drop_p=dropout_p, CNN_embed_dim=CNN_embed_dim).to(device)
rnn_decoder = DecoderRNN(CNN_embed_dim=CNN_embed_dim, h_RNN_layers=RNN_hidden_layers, h_RNN=RNN_hidden_nodes, 
                         h_FC_dim=RNN_FC_dim, drop_p=dropout_p, num_classes=k).to(device)


In [None]:
cnn_encoder

In [None]:
summary(cnn_encoder, (16,3,576,704))

In [None]:
summary(rnn_decoder, (16,512))

In [None]:
torch.__version__

In [None]:
# Parallelize model to multiple GPUs
if torch.cuda.device_count() > 1:
    print("Using", torch.cuda.device_count(), "GPUs!")
    cnn_encoder = nn.DataParallel(cnn_encoder)
    rnn_decoder = nn.DataParallel(rnn_decoder)

    # Combine all EncoderCNN + DecoderRNN parameters
    crnn_params = list(cnn_encoder.module.fc1.parameters()) + list(cnn_encoder.module.bn1.parameters()) + \
                  list(cnn_encoder.module.fc2.parameters()) + list(cnn_encoder.module.bn2.parameters()) + \
                  list(cnn_encoder.module.fc3.parameters()) + list(rnn_decoder.parameters())

elif torch.cuda.device_count() == 1:
    print("Using", torch.cuda.device_count(), "GPU!")
    # Combine all EncoderCNN + DecoderRNN parameters
    crnn_params = list(cnn_encoder.fc1.parameters()) + list(cnn_encoder.bn1.parameters()) + \
                  list(cnn_encoder.fc2.parameters()) + list(cnn_encoder.bn2.parameters()) + \
                  list(cnn_encoder.fc3.parameters()) + list(rnn_decoder.parameters())

optimizer = torch.optim.Adam(crnn_params, lr=learning_rate)

# Training

In [None]:
epochs = 3
for epoch in range(epochs):
    #cnn_encoder, rnn_decoder = model
    cnn_encoder.train()
    rnn_decoder.train()

    losses = []
    scores = []
    N_count = 0   # counting total trained sample in one epoch
    for batch_idx, (X, y) in enumerate(loader):
        # distribute data to device
        X, y = X.to(device), y.to(device) #.view(-1, )
 
        y = y.squeeze(dim=1)

        #y = y.type_as(output) # comment that line the first time and uncomment it after that

        y = y.float()
        N_count += X.size(0)
        
        optimizer.zero_grad()
        output = rnn_decoder(cnn_encoder(X))   # output has dim = (batch, number of classes)

        loss = F.binary_cross_entropy_with_logits(output, y)
        losses.append(loss.item())

        # to compute accuracy
        #y_pred = torch.max(output, 1)[1]  # y_pred != output
        #step_score = accuracy_score(y.cpu().data.squeeze().numpy(), y_pred.cpu().data.squeeze().numpy())
        #scores.append(step_score)         # computed on CPU

        loss.backward()
        optimizer.step()

        # show information
        if batch_idx % 10 == 0:
                print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                    epoch, batch_idx * len(X), len(loader.dataset),
                    100. * batch_idx / len(loader), loss.item())) #data[0]



In [None]:
from sklearn.metrics import precision_score,f1_score

In [None]:
def pred_acc(original, predicted):
    return torch.round(predicted).eq(original).sum().numpy()/len(original)

In [None]:
# save Pytorch models of best record
torch.save(cnn_encoder.state_dict(),
           os.path.join(save_model_path, 'cnn_encoder_epoch{}.pth'.format(epoch + 1)))  # save spatial_encoder
torch.save(rnn_decoder.state_dict(),
           os.path.join(save_model_path, 'rnn_decoder_epoch{}.pth'.format(epoch + 1)))  # save motion_encoder
torch.save(optimizer.state_dict(),
           os.path.join(save_model_path, 'optimizer_epoch{}.pth'.format(epoch + 1)))  # save optimizer

In [None]:
plt.figure(figsize=(14,7))
plt.plot(losses)
plt.show()

# Load Validation Data

In [None]:
root_dir = '/media/scratch/astamoulakatos/nsea_video_jpegs/'
class_paths = [d.path for d in os.scandir(root_dir) if d.is_dir]

transform = transforms.Compose([
    transforms.Resize((576, 704)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

class_names = ['exp_and','exp_fs','exp','exp_fj','bur']
one_hot_classes = [[1,0,1,0,0],[1,0,0,0,1],[1,0,0,0,0],[1,0,0,1,0],[0,1,0,0,0]]

df = pd.read_csv('./train-valid-splits-video/valid.csv')


In [None]:
bs = 16

In [None]:
class_image_paths = []
end_idx = []
for c, class_path in enumerate(class_paths):
    for d in os.scandir(class_path):
        if d.is_dir:
            if d.path in df.videos.values:
                paths = sorted(glob.glob(os.path.join(d.path, '*.png')))
                # Add class idx to paths
                paths = [(p, one_hot_classes[c]) for p in paths]
                class_image_paths.extend(paths)
                end_idx.extend([len(paths)])
                
end_idx = [0, *end_idx]
end_idx = torch.cumsum(torch.tensor(end_idx), 0)

In [None]:
seq_length = 16

In [None]:
sampler = MySampler(end_idx, seq_length)

In [None]:
dataset = MyDataset(
    image_paths=class_image_paths,
    seq_length=seq_length,
    transform=transform,
    length=len(sampler))

In [None]:
valid_loader = DataLoader(
    dataset,
    batch_size=bs,
    sampler=sampler,
    num_workers=0,
    drop_last = True
)

In [None]:
len(valid_loader)

# Validation

In [None]:
torch.cuda.empty_cache()

In [None]:
cnn_encoder.eval()
rnn_decoder.eval()

test_loss = 0
all_y = []
all_y_pred = []
y_pred = []

with torch.no_grad():
    for X, y in valid_loader:
        # distribute data to device
        X, y = X.to(device), y.to(device)
        
        y = y.squeeze(dim=1)
        
        y = y.float()

        #y = y.type_as(output) # comment that line the first time and uncomment it after that

        output = rnn_decoder(cnn_encoder(X))

        loss = F.binary_cross_entropy_with_logits(output, y)
        test_loss += loss.item()   
        # sum up batch loss
        y_pred = output.sigmoid()
#         if sigmoid: y_pred = y_pred.sigmoid()
#         return ((y_pred>thresh).byte()==y_true.byte()).float().mean()

        # collect all y and y_pred in all batches
        all_y.extend(y)
        all_y_pred.extend(y_pred)


In [None]:
test_loss /= len(valid_loader.dataset)

# compute accuracy
#all_y = torch.stack(all_y, dim=0)
#all_y_pred = torch.stack(all_y_pred, dim=0)
#test_score = accuracy_score(all_y.cpu().data.squeeze().numpy(), all_y_pred.cpu().data.squeeze().numpy())
accuracy = ((all_y_pred>0.5).byte() == all_y.byte()).float().mean()

In [None]:
accuracy

In [None]:
output.sigmoid()

In [None]:
y.shape

In [None]:
#cnn_encoder, rnn_decoder = model
cnn_encoder.eval()
rnn_decoder.eval()

cl1 = []
cl2 = []
cl3 = []
cl4 = []
cl5 = []

with torch.no_grad():
    for X, y in loader:
        # distribute data to device
        X, y = X.to(device), y.to(device) #.view(-1, )
        y = y.squeeze(dim=1)

        y = y.type_as(output)

        output = rnn_decoder(cnn_encoder(X))

        pred = torch.sigmoid(output)
        pred = pred.cpu().numpy()
        for p in pred:
            cl1.append(p[0])
            cl2.append(p[1])
            cl3.append(p[2])
            cl4.append(p[3])
            cl5.append(p[4])

In [None]:
data = {'anode':cl1, 'burial':cl2, 'exposure':cl3, 'field joint':cl4, 'free span':cl5} 
  
df_pred = pd.DataFrame(data) 

In [None]:
df_pred.head(20)

In [None]:
output