In [1]:
from imports import *
from Preprocessing_PyTorch import *
from model import *
from utils import *
import warnings
warnings.filterwarnings('ignore')
from sklearn.metrics import accuracy_score

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
class Classifier2(nn.Module):
    def __init__(self, input_size):
        super().__init__()
        self.input_size = input_size
        self.hidden_dim = Config.HIDDEN_SIZE
        self.output_size = Config.OUTPUT_SIZE
        self.LSTM_layers = Config.LSTM_LAYERS
        self.dropout = nn.Dropout(0.5)
        self.embedding = nn.Embedding(self.input_size, self.hidden_dim, padding_idx=0)
        self.lstm = nn.LSTM(input_size=self.hidden_dim, hidden_size=self.hidden_dim, num_layers=self.LSTM_layers, batch_first=True)
        self.fc1 = nn.Linear(in_features=self.hidden_dim, out_features=16)
        self.fc2 = nn.Linear(16, self.output_size)

    def forward(self, x):
        h0 = torch.zeros(self.LSTM_layers, x.size(0), self.hidden_dim, device=x.device).float()
        c0 = torch.zeros(self.LSTM_layers, x.size(0), self.hidden_dim, device=x.device).float()
        # h0 = torch.zeros(self.LSTM_layers, x.size(0), self.hidden_dim).float()
        # c0 = torch.zeros(self.LSTM_layers, x.size(0), self.hidden_dim).float()
        #torch.nn.init.xavier_normal_(h0)
        #torch.nn.init.xavier_normal_(c0)
        out = self.embedding(x)
        out, _ = self.lstm(out, (h0,c0))
        out = self.dropout(out)
        out = torch.relu_(self.fc1(out[:,-1,:]))
        out = self.dropout(out)
        out = torch.sigmoid(self.fc2(out))
        return out

In [6]:
def METRIC(outputs, labels):
    outputs = outputs > 0.5
    return (labels == outputs).sum().item() / labels.size(0)

class Executing:
    def __init__(self):
        """
        Initializes the executing class.
        """
        self.batch_size = Config.BATCH_SIZE
        self.epochs = Config.NUM_EPOCHS
        self.lr = Config.LR
        #self.metric = Config.METRIC

    def on_epoch_start(self, epoch):
        print(f'Epoch {epoch+1}/{self.epochs}')

    def prepare_batches(self):
        """
        Prepares the batches.
        """
        #self.X_train, self.X_test, self.y_train, self.y_test = self.df.get_data()
        self.X_train, self.X_test, self.y_train, self.y_test = df.get_data()

        self.X_train = torch.tensor(self.X_train, dtype=torch.long)
        self.y_train = torch.tensor(self.y_train, dtype=torch.float32)
        self.X_test = torch.tensor(self.X_test, dtype=torch.long)
        self.y_test = torch.tensor(self.y_test, dtype=torch.float32)
        self.train_dataset = torch.utils.data.TensorDataset(self.X_train, self.y_train)
        self.test_dataset = torch.utils.data.TensorDataset(self.X_test, self.y_test)
        self.train_loader = torch.utils.data.DataLoader(self.train_dataset, batch_size=self.batch_size, shuffle=True)
        self.test_loader = torch.utils.data.DataLoader(self.test_dataset, shuffle=True)

    def prepare_data(self):
        """
        Prepares the data.
        """
        #self.df = Preprocessing()
        # self.input_size = self.df.max_words
        #self.df.text2seq()
        #df.text2seq()
        #tokenizer = self.df.get_tokenizer()
        tokenizer = df.get_tokenizer()
        self.input_size = len(tokenizer.word_index) + 1
        self.model = Classifier(self.input_size)
        self.prepare_batches()
        print("Done preparing data")

    def fit(self):
        """
        Trains the model.
        """
        self.prepare_data()
        self.model.to(Config.DEVICE)
        self.criterion = nn.BCELoss()
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.lr)
        self.history = []
        for epoch in range(self.epochs):
            self.on_epoch_start(epoch)
            self.model.train()
            train_loss, train_acc, = [], []
            for x, y in self.train_loader:
                x = x.to(Config.DEVICE)
                y = y.to(Config.DEVICE)
                y_pred = self.model(x)
                loss = self.criterion(y_pred, y)
                self.optimizer.zero_grad()
                loss.backward()
                train_loss += [loss.item()]
                self.optimizer.step()
                train_acc += [METRIC(y_pred, y)]
                #print(train_acc)

            val_acc, val_loss = self.eval()
            his = {'train_loss': np.mean(train_loss), 'train_accuracy': np.mean(train_acc), 'val_loss': np.mean(val_loss), 'val_accuracy': np.mean(val_acc)}
            self.history.append(his)
            self.on_epoch_end(his)

    def eval(self):
        """
        Evaluates the model.
        """
        self.model.eval()
        val_loss, val_acc = [], []
        with torch.no_grad():
            for x, y in self.test_loader:
                x = x.to(Config.DEVICE)
                y = y.to(Config.DEVICE)
                y_pred = self.model(x)
                val_acc += [METRIC(y_pred, y)]
                val_loss += [self.criterion(y_pred, y).item()]
        return val_acc, val_loss

    def predict(self, text):
        """
        Predicts the sentiment of a text.
        """
        self.model.eval()
        x = torch.tensor(self.df.text2seq(text))
        x = x.to(Config.DEVICE)
        y_pred = self.model(x)
        return y_pred.item()

    def on_epoch_end(self, logs):
        print(f'train_loss: {logs["train_loss"]:.2f}, train_accuracy: {logs["train_accuracy"]:.2f}')
        print(f'val_loss: {logs["val_loss"]:.2f}, val_accuracy: {logs["val_accuracy"]:.2f}')
        print("-"*10)
        
    def get_history(self):
        return self.history

    def get_model(self):
        return self.model

# if __name__ == "__main__":
#     print("Start training")
#     start = time.time()
#     execute = Executing()
#     execute.train()
#     end = time.time()
#     print(f'Time taken: {end-start}')
#     print("Finished training")

In [4]:
df = Preprocessing()
df.text2seq()

Done preprocessing.
Done text2seq.


In [8]:
X_train, X_test, y_train, y_test = df.get_data()

In [10]:
X_train

array([[  184,     9,     7, ...,     0,     0,     0],
       [  785,  2907,   455, ...,     0,     0,     0],
       [13985,   616,     7, ...,     0,     0,     0],
       ...,
       [  652,  3469,     1, ...,     0,     0,     0],
       [ 9153,     1,  2496, ...,     0,     0,     0],
       [   36,   254,   557, ...,     0,     0,     0]])

In [7]:
print("Start training")
start = time.time()
execute = Executing()
execute.fit()
end = time.time()
print(f'Time taken: {end-start}')
print("Finished training")

Start training
Done preparing data
256
Epoch 1/3
train_loss: 0.69, train_accuracy: 0.50
val_loss: 0.69, val_accuracy: 0.50
----------
Epoch 2/3
train_loss: 0.69, train_accuracy: 0.50
val_loss: 0.69, val_accuracy: 0.50
----------
Epoch 3/3


KeyboardInterrupt: 