%env CUDA_LAUNCH_BLOCKING=1

In [0]:
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

In [0]:
from google.colab import drive
drive.mount('/content/drive')

In [0]:
from torch import cuda
print(cuda.current_device())
print(cuda.device_count())
print(cuda.get_device_name())
print(cuda.is_available())

In [0]:
import torch
def get_device(gpu_no):
	if torch.cuda.is_available():
		torch.cuda.set_device(gpu_no)
		return torch.device('cuda:{}'.format(gpu_no))
	else:
		return torch.device('cpu')
  
def to_device(data, device):
	if isinstance(data, (list, tuple)):
		return [to_device(x, device) for x in data]
	return data.to(device, non_blocking=True)

In [0]:
device = get_device(0)

In [0]:
class DeviceDataLoader:
    def __init__(self, dl, device):
        self.dl = dl
        self.device = device
    
    def __iter__(self):
        for b in self.dl:
            yield to_device(b, self.device)
    
    def __len__(self):
        return len(self.dl)

In [0]:
# Описание модели
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import pdb
from torch import cuda



__all__ = ['bilstm']

class BiLSTM(nn.Module):
	def __init__(self, options):
		super(BiLSTM, self).__init__()
		self.embedding = nn.Embedding(options['vocab_size'], options['embed_dim'])
		self.projection = nn.Linear(options['embed_dim'], 300)
		self.dropout = nn.Dropout(p = options['dp_ratio'])
		self.lstm = nn.LSTM(300, options['d_hidden'], 3)
		self.relu = nn.ReLU()
		self.out = nn.Sequential(
			nn.Linear(1024, 1024),
			self.relu,
			self.dropout,
			nn.Linear(1024, 1024),
			self.relu,
			self.dropout,
			nn.Linear(1024, 1024),
			self.relu,
			self.dropout,
			nn.Linear(1024, options['out_dim'])
		)
		pass

	def forward(self, batch): # Какая-то конкатенация ?
		print(batch.premise.size())
		premise_embed = self.embedding(batch.premise)
		print(batch.hypothesis.size())
		hypothesis_embed = self.embedding(batch.hypothesis)
		print(hypothesis_embed.size())
		premise_proj = self.relu(self.projection(premise_embed))
		hypothesis_proj = self.relu(self.projection(hypothesis_embed))
		encoded_premise, _ = self.lstm(premise_proj)
		encoded_hypothesis, _ = self.lstm(hypothesis_proj)
		premise = encoded_premise.sum(dim = 1)
		hypothesis = encoded_hypothesis.sum(dim = 1)
		combined = torch.cat((premise, hypothesis), 1)
		return self.out(combined)

def bilstm(options):
	return BiLSTM(options)

In [0]:
import sys
from torchtext import data
from torchtext import datasets

import dill
import pdb

__all__ = ['snli']		


class SNLI():
	def __init__(self, options):
		self.inputs = data.Field(lower=True, tokenize='spacy', batch_first=True)
		self.answers = data.Field(sequential=False, unk_token=None, is_target=True)

		# Считаем данные из JSON
		print("Считываем данные...")
		self.train, self.dev, self.test = datasets.SNLI.splits(self.inputs, self.answers)
		print("Считали! Продолжаем.")

		# self.train, self.dev, self.test = datasets.SNLI.splits(self.inputs, self.answers)
		# Построим входной и выходной словарь
		self.inputs.build_vocab(self.train, self.dev)
		self.answers.build_vocab(self.train)
  
		# Разобьем выборку на train, test, dev
		self.train_iter, self.dev_iter, self.test_iter = data.Iterator.splits((self.train, self.dev, self.test), 
			                     batch_size=options['batch_size'], 
								 device=device)


	def vocab_size(self):
		return len(self.inputs.vocab)

	def out_dim(self):
		return len(self.answers.vocab)

	def labels(self):
		return self.answers.vocab.stoi

def snli(options):
	return SNLI(options)

In [0]:
import torch
from argparse import ArgumentParser

def training_params():
	parser = ArgumentParser(description='Параметры для обучения')
	parser.add_argument('--dataset', type=str, default='snli')
	parser.add_argument('--model', type=str, default='bilstm')
	parser.add_argument('--gpu', type=int, default=0)
	parser.add_argument('--batch_size', type=int, default=128)
	parser.add_argument('--embed_dim', type=int, default=300)
	parser.add_argument('--d_hidden', type=int, default=512)
	parser.add_argument('--dp_ratio', type=int, default=0.2)
	parser.add_argument('--epochs', type=int, default=50)
	parser.add_argument('--lr', type=float, default=.001)
	parser.add_argument('--combine', type=str, default='cat')
	parser.add_argument('--save_model', action='store_false', default=True)
	args = parser.parse_args()
	return args

def evaluate_params():
	parser = ArgumentParser(description='Подсчёт качества на валидационной выборке')
	parser.add_argument('--dataset', type=str, default='snli')
	parser.add_argument('--model', type=str, default='bilstm')
	parser.add_argument('--gpu', type=int, default=0)
	parser.add_argument('--batch_size', type=int, default=128)
	parser.add_argument('--save_path', type=str, default = "save/bilstm-snli-model.pt")
	args = parser.parse_args()
	return args

def get_args(mode):
	if mode == "train":
		return training_params()
	elif mode == "evaluate":
		return evaluate_params()

In [0]:
dataset_options = {'batch_size': 128, 'device': 0}
dataset = snli(dataset_options)

In [0]:
import os
import sys

import torch
import torch.optim as O
import torch.nn as nn


import datetime
import pdb
import torch.nn.functional as F
from tqdm import tqdm

from prettytable import PrettyTable

import time

class Train():
    def __init__(self):
        print("Началось выполнение обучения: {}".format(datetime.datetime.now()))
        self.args = get_args("train")
        self.device = device # args.gpu -- это номер используемого gpu

        self.dataset_options = {
                                'batch_size': self.args.batch_size, # по умолчанию равен 128
                                'device': self.device
                               }
        self.dataset = dataset

        self.model_options = { # Возьмем гиперпараметры, предложенные автором
                                    'vocab_size': 128, #self.dataset.vocab_size(), 
                                    'embed_dim': 300, #self.args.embed_dim,
                                    'out_dim': self.dataset.out_dim(),
                                    'dp_ratio': 0.2, #self.args.dp_ratio,
                                    'd_hidden': 512, #self.args.d_hidden
                                }
        self.model = bilstm(self.model_options)
        print(device)
        self.model.to(device)
        self.criterion = nn.CrossEntropyLoss(reduction='sum')
        self.opt = O.Adam(self.model.parameters(), lr=self.args.lr)
        self.best_accuracy = -1
        print("resource preparation done: {}".format(datetime.datetime.now()))

    def save_model(self, current_accuracy):
        if current_accuracy > self.best_accuracy:
            self.best_accuracy = current_accuracy
            torch.save({
                'accuracy': self.best_accuracy,
                'options': self.model_options,
                'model_dict': self.model.state_dict(),
            }, 'save/' + "{}-{}-model.pt".format(self.args.model, self.args.dataset))
        pass

    def train(self):
        self.model.train()
        self.dataset.train_iter.init_epoch()
        n_correct, n_total, n_loss = 0, 0, 0
        for batch_idx, batch in enumerate(self.dataset.train_iter):
            print("Номер батча:", batch_idx, "Всего батчей:", len(self.dataset.train_iter))
            self.opt.zero_grad()
            print("before model")
            answer = self.model(batch)
            print("after model")
            loss = self.criterion(answer, batch.label)

            n_correct += (torch.max(answer, 1)[1].view(batch.label.size()) == batch.label).sum().item()
            n_total += batch.batch_size
            n_loss += loss.item()

            loss.backward(); self.opt.step()
        train_loss = n_loss / n_total
        train_acc = 100. * n_correct / n_total
        return train_loss, train_acc

    def validate(self):
        self.model.eval(); self.dataset.dev_iter.init_epoch()
        n_correct, n_total, n_loss = 0, 0, 0
        with torch.no_grad():
            for batch_idx, batch in enumerate(self.dataset.dev_iter):
                answer = self.model(batch)
                loss = self.criterion(answer, batch.label)

                n_correct += (torch.max(answer, 1)[1].view(batch.label.size()) == batch.label).sum().item()
                n_total += batch.batch_size
                n_loss += loss.item()

            val_loss = n_loss / n_total
            val_acc = 100. * n_correct / n_total
            return val_loss, val_acc

    def execute(self):
        for epoch in range(self.args.epochs):
            start = time.time()
            train_loss, train_acc = self.train()
            val_loss, val_acc = self.validate()
            if self.args.save_model:
                self.save_model(val_acc)
            print("time taken: {}   epoch: {}   Training loss: {}   Training Accuracy: {}   Validation loss: {}   Validation Accuracy: {}".format(
                round(time.time()-start, 2), epoch, round(train_loss, 3), round(train_acc, 3), round(val_loss, 3), round(val_acc, 3)
            ))

sys.argv[1] = "--dataset=snli"
sys.argv[2] = "--model=bilstm"

task = Train()
task.execute()

In [0]:
!nvidia-smi

In [0]:
import os
print(os.environ['CUDA_VISIBLE_DEVICES'])

In [0]:
import os
print(os.environ.has_key('CUDA_VISIBLE_DEVICES'))