In [1]:
import torch
import torch.optim as optim
import torch.nn as nn
from torch.utils.data import DataLoader, random_split
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import os, shutil

from config import args
from models.video_net import VideoNet
from data.lrs3_dataset import LRS3Main
from data.utils import collate_fn
from utils.general import num_params, train, evaluate

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
matplotlib.use("Agg")
np.random.seed(args["SEED"])
torch.manual_seed(args["SEED"])
gpuAvailable = torch.cuda.is_available()
device = torch.device("cuda" if gpuAvailable else "cpu")
kwargs = {"num_workers": args["NUM_WORKERS"], "pin_memory": True} if gpuAvailable else {}
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [7]:
#declaring the train and validation datasets and their corresponding dataloaders
videoParams = {"videoFPS":args["VIDEO_FPS"]}
videoParams

{'videoFPS': 25}

In [16]:
# class LRS2Main(Dataset):

#     """
#     A custom dataset class for the LRS2 main (includes train, val, test) dataset
#     """

#     def __init__(self, dataset, datadir, , charToIx, stepSize, videoParams):
#         super(LRS2Main, self).__init__()
#         with open(datadir + "/" + dataset + ".txt", "r") as f:
#             lines = f.readlines()
#         self.datalist = [datadir + "/main/" + line.strip().split(" ")[0] for line in lines]
#         self.reqInpLen = reqInpLen
#         self.charToIx = charToIx
#         self.dataset = dataset
#         self.stepSize = stepSize
#         self.videoParams = videoParams
#         return


#     def __getitem__(self, index):
#         #using the same procedure as in pretrain dataset class only for the train dataset
#         if self.dataset == "train":
#             base = self.stepSize * np.arange(int(len(self.datalist)/self.stepSize)+1)
#             ixs = base + index
#             ixs = ixs[ixs < len(self.datalist)]
#             index = np.random.choice(ixs)

#         #passing the visual features file and the target file paths to the prepare function to obtain the input tensors
#         visualFeaturesFile = self.datalist[index] + ".npy"
#         targetFile = self.datalist[index] + ".txt"
#         inp, trgt, inpLen, trgtLen = prepare_main_input(visualFeaturesFile, targetFile, self.reqInpLen, self.charToIx, self.videoParams)
#         return inp, trgt, inpLen, trgtLen


#     def __len__(self):
#         #using step size only for train dataset and not for val and test datasets because
#         #the size of val and test datasets is smaller than step size and we generally want to validate and test
#         #on the complete dataset
#         if self.dataset == "train":
#             return self.stepSize
#         else:
#             return len(self.datalist)


In [3]:
print(args["TRAIN_DIRECTORY"])

../lrs3/train_mini/


In [4]:
print(os.path.isdir(args["TRAIN_DIRECTORY"]))

True


In [5]:
dataset = "train"
datadir = args["DATA_DIRECTORY"]
reqInpLen = args["MAIN_REQ_INPUT_LENGTH"]
charToIx = args["CHAR_TO_INDEX"]
stepSize = args["STEP_SIZE"]

In [11]:
trainData = LRS3Main(dataset,datadir,reqInpLen,charToIx,stepSize,videoParams)
trainData.datalist

['../lrs3/train_mini/00j9bKdiOjk/50001.txt',
 '../lrs3/train_mini/00j9bKdiOjk/50002.txt',
 '../lrs3/train_mini/00j9bKdiOjk/50003.txt',
 '../lrs3/train_mini/0af00UcTOSc/50001.txt',
 '../lrs3/train_mini/0af00UcTOSc/50002.txt',
 '../lrs3/train_mini/0af00UcTOSc/50003.txt',
 '../lrs3/train_mini/0af00UcTOSc/50004.txt',
 '../lrs3/train_mini/0af00UcTOSc/50005.txt',
 '../lrs3/train_mini/0af00UcTOSc/50007.txt',
 '../lrs3/train_mini/0af00UcTOSc/50008.txt',
 '../lrs3/train_mini/0af00UcTOSc/50009.txt',
 '../lrs3/train_mini/0af00UcTOSc/50010.txt',
 '../lrs3/train_mini/0af00UcTOSc/50011.txt',
 '../lrs3/train_mini/0af00UcTOSc/50012.txt',
 '../lrs3/train_mini/0af00UcTOSc/50013.txt',
 '../lrs3/train_mini/0af00UcTOSc/50014.txt',
 '../lrs3/train_mini/0akiEFwtkyA/50001.txt',
 '../lrs3/train_mini/0akiEFwtkyA/50002.txt',
 '../lrs3/train_mini/0Amg53UuRqE/50001.txt',
 '../lrs3/train_mini/0Bhk65bYSI0/50001.txt',
 '../lrs3/train_mini/0Bhk65bYSI0/50002.txt',
 '../lrs3/train_mini/0Bhk65bYSI0/50003.txt',
 '../lrs3/

In [10]:
trainLoader = DataLoader(trainData, batch_size=args["BATCH_SIZE"], collate_fn=collate_fn, shuffle=True, **kwargs)

In [13]:
valData = LRS3Main("val", args["DATA_DIRECTORY"], args["MAIN_REQ_INPUT_LENGTH"], args["CHAR_TO_INDEX"], args["STEP_SIZE"],
                   videoParams)

In [14]:
valLoader = DataLoader(valData, batch_size=args["BATCH_SIZE"], collate_fn=collate_fn, shuffle=True, **kwargs)

In [16]:
#declaring the model, optimizer, scheduler and the loss function
model = VideoNet(args["TX_NUM_FEATURES"], args["TX_ATTENTION_HEADS"], args["TX_NUM_LAYERS"], args["PE_MAX_LENGTH"],
                 args["TX_FEEDFORWARD_DIM"], args["TX_DROPOUT"], args["NUM_CLASSES"])
model.to(device)

VideoNet(
  (positionalEncoding): PositionalEncoding()
  (videoEncoder): TransformerEncoder(
    (layers): ModuleList(
      (0): TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
        )
        (linear1): Linear(in_features=512, out_features=2048, bias=True)
        (dropout): Dropout(p=0.1, inplace=False)
        (linear2): Linear(in_features=2048, out_features=512, bias=True)
        (norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
        (dropout1): Dropout(p=0.1, inplace=False)
        (dropout2): Dropout(p=0.1, inplace=False)
      )
      (1): TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
        )
        (linear1): Linear(in_features=512, out_features=2048, 

In [18]:
optimizer = optim.Adam(model.parameters(), lr=args["INIT_LR"], betas=(args["MOMENTUM1"], args["MOMENTUM2"]))

In [19]:
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode="min", factor=args["LR_SCHEDULER_FACTOR"],
                                                 patience=args["LR_SCHEDULER_WAIT"], threshold=args["LR_SCHEDULER_THRESH"],
                                                 threshold_mode="abs", min_lr=args["FINAL_LR"], verbose=True)

In [20]:
loss_function = nn.CTCLoss(blank=0, zero_infinity=False)

In [21]:
#removing the checkpoints directory if it exists and remaking it
if os.path.exists(args["CODE_DIRECTORY"] + "/checkpoints"):
    while True:
        ch = input("Continue and remove the 'checkpoints' directory? y/n: ")
        if ch == "y":
            break
        elif ch == "n":
            exit()
        else:
            print("Invalid input")
    shutil.rmtree(args["CODE_DIRECTORY"] + "/checkpoints")

In [22]:
os.mkdir(args["CODE_DIRECTORY"] + "/checkpoints")
os.mkdir(args["CODE_DIRECTORY"] + "/checkpoints/models")
os.mkdir(args["CODE_DIRECTORY"] + "/checkpoints/plots")

In [23]:
#loading the pretrained weights
# if args["PRETRAINED_MODEL_FILE"] is not None:
#     print("\n\nPre-trained Model File: %s" %(args["PRETRAINED_MODEL_FILE"]))
#     print("\nLoading the pre-trained model .... \n")
#     model.load_state_dict(torch.load(args["CODE_DIRECTORY"] + args["PRETRAINED_MODEL_FILE"], map_location=device))
#     model.to(device)
#     print("Loading Done.\n")



Pre-trained Model File: /final/models/pretrained_model.pt

Loading the pre-trained model .... 



FileNotFoundError: [Errno 2] No such file or directory: '../../avsr_lr3//final/models/pretrained_model.pt'