In [None]:
import pandas as pd
import torch
import torch.nn.functional as F
import torchtext
# !pip install torchtext==0.10.0
import torchtext.legacy
import random

In [None]:
df = pd.read_csv("filename.csv")
TEXT = torchtext.legacy.data.Field(tokenize='spacy',tokenizer_language='en_core_web_sm')
LABEL = torchtext.legacy.data.LabelField(dtype=torch.long)

In [None]:
fields = [('sentiment', LABEL),('lemma_str', TEXT)]
dataset = torchtext.legacy.data.TabularDataset(path='filename.csv', format='csv',skip_header=True, fields=fields)

In [None]:
RANDOM_SEED=123
train_data, test_data = dataset.split(split_ratio=[0.8, 0.2],random_state=random.seed(RANDOM_SEED))
train_data, valid_data = train_data.split(split_ratio=[0.85, 0.15],random_state=random.seed(RANDOM_SEED))

In [None]:
VOCABULARY_SIZE=5000
TEXT.build_vocab(train_data,min_freq=4,max_size=VOCABULARY_SIZE)
LABEL.build_vocab(train_data)

In [None]:
LEARNING_RATE = 0.005
BATCH_SIZE = 16
NUM_EPOCHS = 15
DEVICE = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu')
EMBEDDING_DIM = 300
HIDDEN_DIM = 128
NUM_CLASSES = 13

In [None]:
train_loader, valid_loader, test_loader = torchtext.legacy.data.BucketIterator.splits(
    (train_data, valid_data, test_data), batch_size=BATCH_SIZE, sort_within_batch=False,
        sort_key=lambda x: len(x.lemma_str), device=DEVICE
)

In [None]:
#Not the actual model, just a simple model for testing
class RNN(torch.nn.Module):
    
    def __init__(self, input_dim, embedding_dim, hidden_dim, output_dim):
        super().__init__()
        self.embedding = torch.nn.Embedding(input_dim, embedding_dim)
        self.rnn = torch.nn.LSTM(embedding_dim, hidden_dim)        
        self.fc = torch.nn.Linear(hidden_dim, output_dim)
        
    def forward(self, text):
        embedded = self.embedding(text)
        output, (hidden, cell) = self.rnn(embedded)
        hidden.squeeze_(0)
        output = self.fc(hidden)
        return output

In [None]:
torch.manual_seed(RANDOM_SEED)
model = RNN(input_dim=len(TEXT.vocab),embedding_dim=EMBEDDING_DIM,hidden_dim=HIDDEN_DIM,output_dim=NUM_CLASSES)

model = model.to(DEVICE)
optimizer = torch.optim.Adam(model.parameters(), lr=0.005)

In [None]:
#Training
for epoch in range(NUM_EPOCHS):
    for batch_idx, batch in enumerate(train_loader):
        data = batch.lemma_str.to(device=DEVICE)
        targets = batch.sentiment.to(device=DEVICE)

        scores = model(data)
        loss = F.cross_entropy(scores, targets)

        optimizer.zero_grad()
        loss.backward()

        optimizer.step()

    print(f"Epoch {epoch} is done")