In [1]:
from imports import *
from Preprocessing_PyTorch import *
from model import *
from utils import *
import warnings
warnings.filterwarnings('ignore')


In [2]:
class Classifier(nn.Module):
    def __init__(self, input_size):
        """
        Initializes the classifier's parameters..
        """
        super().__init__()
        self.input_size = input_size #vocab_size
        self.hidden_dim = Config.HIDDEN_SIZE
        self.output_size = Config.OUTPUT_SIZE
        self.LSTM_layers = Config.LSTM_LAYERS
        self.dropout = nn.Dropout(0.5)
        self.embedding = nn.Embedding(self.input_size, self.hidden_dim, padding_idx=0)
        self.lstm = nn.LSTM(input_size=self.hidden_dim, hidden_size=self.hidden_dim, num_layers=self.LSTM_layers, batch_first=True)
        self.fc1 = nn.Linear(in_features=self.hidden_dim, out_features=128)
        self.fc2 = nn.Linear(128, self.output_size)

    def forward(self, x):
        """
        Forward pass.
        """
        h0 = Variable(torch.zeros(self.LSTM_layers, x.size(0), self.hidden_dim, device=x.device).float())
        c0 = Variable(torch.zeros(self.LSTM_layers, x.size(0), self.hidden_dim, device=x.device).float())
        # h0 = torch.zeros(self.LSTM_layers, x.size(0), self.hidden_dim).float()
        # c0 = torch.zeros(self.LSTM_layers, x.size(0), self.hidden_dim).float()
        out = self.embedding(x)
        out, _ = self.lstm(out, (h0,c0))
        out = torch.relu_(self.fc1(out[:,-1,:]))
        out = torch.sigmoid(self.fc2(out))       
        return out

In [11]:
class Executing:
    """
    The Execution Class
    """
    def __init__(self):
        """
        Initializes the executing class.
        """
        self.batch_size = Config.BATCH_SIZE
        self.epochs = Config.NUM_EPOCHS
        self.lr = Config.LR
        self.metric = Config.METRIC

    def prepare_batches(self):
        """
        Prepares the batches.
        """
        self.X_train, self.X_test, self.y_train, self.y_test = self.df.get_data()
        self.X_train = Variable(torch.tensor(self.X_train, dtype=torch.long))
        self.y_train = Variable(torch.tensor(self.y_train, dtype=torch.float32))
        self.X_test = Variable(torch.tensor(self.X_test, dtype=torch.long))
        self.y_test = Variable(torch.tensor(self.y_test, dtype=torch.float32))
        self.train_dataset = torch.utils.data.TensorDataset(self.X_train, self.y_train)
        self.test_dataset = torch.utils.data.TensorDataset(self.X_test, self.y_test)
        self.train_loader = torch.utils.data.DataLoader(self.train_dataset, batch_size=self.batch_size)
        self.test_loader = torch.utils.data.DataLoader(self.test_dataset, shuffle=True)

    def prepare_data(self):
        """
        Prepares the data.
        """
        start = time.time()
        self.df = Preprocessing()
        self.df.text2seq()
        tokenizer = self.df.get_tokenizer()
        self.input_size = len(tokenizer.word_index) + 1
        self.model = Classifier(self.input_size)
        self.prepare_batches()
        print("Done preparing data, done in {:.2f} seconds".format(time.time() - start))
    
    def get_loaders(self):
        return self.train_loader, self.test_loader, self.model


        
        
    # def get_history(self):
    #     """
    #     Returns the history.
    #     """
    #     return self.history

    # def get_model(self):
    #     """
    #     Returns the model.
    #     """
    #     return self.model

# if __name__ == "__main__":
#     print("Start training")
#     start = time.time()
#     execute = Executing()
#     execute.train()
#     end = time.time()
#     print(f'Time taken: {end-start}')
#     print("Finished training")

In [12]:
print("Start training")
execute = Executing()
execute.prepare_data()
train_loader, test_loader, model = execute.get_loaders()

Start training
Done preprocessing.
Done text2seq.
Done preparing data, done in 307.30 seconds


In [18]:
def on_epoch_start(epoch):
    print(f'Epoch {epoch+1}/{Config.NUM_EPOCHS}')

def METRIC(outputs, labels):
    outputs = outputs > 0.5
    return (outputs.argmax(1) == labels).type(torch.float).sum().item()

def fit(model, train_loader, test_loader, num_epochs=Config.NUM_EPOCHS):
        """
        Trains the model.
        """
        model.to(Config.DEVICE)
        criterion = nn.BCELoss()
        optimizer = torch.optim.Adam(model.parameters(), lr=Config.LR)
        trainSteps = len(train_loader.dataset) // Config.BATCH_SIZE
        testSteps = len(test_loader.dataset) // Config.BATCH_SIZE
        history = []
        for epoch in range(0, num_epochs):
            model.train()
            start = time.time()    
            on_epoch_start(epoch)
            train_loss, train_acc, = 0, 0
            val_loss, val_acc = 0, 0
            for x, y in train_loader:
                x = x.to(Config.DEVICE)
                y = y.to(Config.DEVICE)
                optimizer.zero_grad()
                y_pred = model(x)
                loss = criterion(y_pred, y)
                model.train()
                loss.backward()
                optimizer.step()
                train_loss += loss
                train_acc += METRIC(y_pred, y)
                with torch.no_grad():
                    #model.rnn_layer.train()
                    for x_test, y_test in test_loader:
                        x_test = x_test.to(Config.DEVICE)
                        y_test = y_test.to(Config.DEVICE)
                        model.eval()

                        pred_test = model(x_test)
                        val_acc += METRIC(pred_test, y_test)
                        val_loss += criterion(pred_test, y_test)
            valCorrect = val_acc / len(test_loader.dataset)
            avgValLoss = val_loss / testSteps
            avgTrainLoss = train_loss / trainSteps
            trainCorrect = train_acc / len(train_loader.dataset)
            his = {'train_loss': avgTrainLoss, 'train_accuracy': trainCorrect, 'val_loss': avgValLoss, 'val_accuracy': valCorrect}
            history.append(his)
            on_epoch_end(his)
            print(f'Epoch {epoch+1} done in {time.time() - start:.2f} seconds')
            print("-"*10)

def on_epoch_end(logs):
    """
    Prints the logs.
    """
    print(f'train_loss: {logs["train_loss"]:.2f}, train_accuracy: {logs["train_accuracy"]:.2f}')
    print(f'val_loss: {logs["val_loss"]:.2f}, val_accuracy: {logs["val_accuracy"]:.2f}')

In [19]:
fit(model, train_loader, test_loader)

Epoch 1/10


RuntimeError: cudnn RNN backward can only be called in training mode

: 