In [20]:
import torch
import torch.nn as nn
import altair as alt
import pandas as pd
import numpy as np
import warnings
import tokenizer
from pathlib import Path
from torch.utils.data import Dataset, DataLoader, random_split
warnings.filterwarnings("ignore")

In [2]:
# Define the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)
torch.cuda.empty_cache()


Using device: cuda


In [16]:
from train import DataSetLoader
from config import get_config
from transformer import Transformer
from transformer import TransformerBuilder

In [31]:
def load_model(config):
		# print some nice looking message
		print("=== SUMMIT Training Process ===\n")

		config = config
		max_tokens = int(config['MAX_SUPPORTED_SENTENCE_TOKEN_LENGTH'])
		learning_rate = float(config['LEARNING_RATE'])
		eps = float(config['EPS'])
		seed = int(config['SEED'])
		batch_size = int(config['BATCH_SIZE'])
		epochs = int(config["EPOCHS"])

		# folders
		dataset_folder = Path(config["TRAIN_DIRECTORY"]) / Path(config["datasource"])
		if not Path.exists(dataset_folder): 
			dataset_folder.mkdir(parents = True)
		print(f"Base directory for model-related data: {str(dataset_folder)}")
		checkpoint_folder = dataset_folder / Path(config["CHECKPOINT_DIRECTORY"])
		if not Path.exists(checkpoint_folder): 
			checkpoint_folder.mkdir(parents = True)
		print(f"Checkpoint directory: {str(checkpoint_folder)}")

		# get device
		print("Checking devices...")
		device_str = "cpu"
		if torch.cuda.is_available(): device_str = "cuda"
		device = torch.device(device_str)

		print(f"Device for training: {device}")

		# fix seed
		print(f"Random seed: {seed}")
		torch.manual_seed(seed)

		# get dataset
		print("Loading dataset...")
		train_ds, validation_ds, test_ds, tokenizer_source, tokenizer_target = DataSetLoader.get_dataset(config)

		print(f"Maximum token length found: {max_tokens}")

		# data points printed are the amount of sentence pairs
		print(f"Train dataset size: {len(train_ds)}")
		print(f"Validation dataset size: {len(validation_ds)}")
		print(f"Test dataset size: {len(test_ds)}\n")

		# print random example
		print(f"Example data entry: {train_ds[621]}\n")

		# dataloader
		print("Creating dataloaders...")
		train_dataloader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
		validation_dataloader = DataLoader(validation_ds, batch_size=1, shuffle=True)
		test_dataloader = DataLoader(test_ds, batch_size=1, shuffle=True)

		print("Loading model")
		# TODO: make use of different configurations ?????
		model = TransformerBuilder.build_transformer(tokenizer_source.get_vocab_size(), tokenizer_target.get_vocab_size(), max_tokens, max_tokens, False, True, config["MODEL_DIMENSIONS"], config["NUM_ENCODER_BLOCKS"], config["NUM_HEADS"], config["DROPOUT"]).to(device)

		optimizer = torch.optim.Adam(model.parameters(), learning_rate, eps = eps)

		old_train_files = list(Path(checkpoint_folder).glob('*'))
		if len(old_train_files) > 0:
			old_train_files.sort(reverse=True)
			old_train_filename = old_train_files[0]
			print(f"Found latest model at: {old_train_filename}")
		
			state = torch.load(old_train_filename)
			model.load_state_dict(state['model_states'])
			optimizer.load_state_dict(state['optimizer_state'])
			global_step = state['global_step']
			epoch = state['epoch'] #to start at next epoch

			print(f"Successfully loaded existing state, at epoch {epoch}")

		return model

In [None]:
config = get_config()
#train_dataloader, validation_dataloader, test_dataloader, vocab_source, vocab_target = DataSetLoader.get_dataset(config)
model = load_model(config) 


In [29]:
config = get_config()
file_path = str(Path('.').parent.resolve() / config["TRAIN_DIRECTORY"] / config["datasource"] / config["CHECKPOINT_DIRECTORY"] / config["model_name"])
print(file_path)


D:\Github\SUMMIT\src\train\opus_books\checkpoints\00


In [15]:
#Load pretrained weights
state = torch.load(file_path)

In [None]:
model.load_state_dict(state['model_state_dict'])