In [45]:
from torch import nn
from transformers import AutoModel

class EssayDisectionModel(nn.Module):
    def __init__(self, num_labels=7, hidden_size=768):
        super(EssayDisectionModel, self).__init__()
        
        self.backbone = AutoModel.from_pretrained(
        'allenai/longformer-base-4096',
        num_labels=num_labels
        )
        
        self.dense1 = nn.Linear(hidden_size, 256)
        self.relu = nn.ReLU()
        self.dense2 = nn.Linear(256, num_labels)

    def forward(self, input_ids, attention_mask):
        backbone_output = self.backbone(input_ids=input_ids, attention_mask=attention_mask)
        
        x = self.dense1(backbone_output[0])
        x = self.relu(x)
        x = self.dense2(x)
        
        return x


In [46]:
import torch
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained('allenai/longformer-base-4096')

labels = ['Claim', 'Evidence', 'Concluding Statement', 'Rebuttal', 'Position','Counterclaim', 'Lead']

model = torch.load('../data/model.pth')
model.to('mps')
model.eval()

def pad_sequence(sequence, max_len, padding_value=0):
    if len(sequence) < max_len:
        return sequence + [padding_value] * (max_len - len(sequence))
    else:
        return sequence[:max_len]


def predict(text):

    encoded = tokenizer(text, max_length=205, padding='max_length')
    padded_input = encoded['input_ids']
    mask = encoded['attention_mask']

    padded_input_tensor = torch.tensor(padded_input, dtype=torch.long).to('mps')
    mask_tensor = torch.tensor(mask, dtype=torch.long).to('mps')

    padded_input_batch = padded_input_tensor.unsqueeze(0)
    mask_batch = mask_tensor.unsqueeze(0)

    with torch.no_grad():
        logits = model(padded_input_batch, mask_batch)

    predictions = torch.argmax(logits, dim=-1)

    relevant_predictions = predictions[0][mask_tensor.bool()].cpu().numpy()

    token_to_word = encoded.words()[:len(relevant_predictions)]

    word_predictions = []

    for idx, label in zip(token_to_word, relevant_predictions):
        if idx and idx >= len(word_predictions):
            word_predictions.append(label)

    return word_predictions


def visualize(text, predictions):
    words = text.split()
    tags = [labels[i] for i in predictions]

    tag_dict = {}
    current_tag = None
    sequence_number = {}

    for word, tag in zip(words, tags):
        # If the tag changes, reset the current tag and increment sequence number
        if tag != current_tag:
            current_tag = tag
            sequence_number[tag] = sequence_number.get(tag, 0) + 1
            key = f"{tag}_{sequence_number[tag]}"
            tag_dict[key] = []

        # Add the word to the current sequence
        key = f"{tag}_{sequence_number[tag]}"
        tag_dict[key].append(word)

    return tag_dict

def inference(text):
    predictions = predict(text)
    tag_dict = visualize(text, predictions)
    return tag_dict

text = "Sample text from an essay..."
prediction = predict(text)
print("Predicted label:", prediction)
visualize(text, prediction)


Predicted label: [6, 6, 6, 6, 6]


{'Lead_1': ['Sample', 'text', 'from', 'an', 'essay...']}

In [47]:
def read_text_from_file(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        text = file.read()
    return text

file_path = '../data/feedback-prize-2021/test/0FB0700DAF44.txt'

text = read_text_from_file(file_path)

result = inference(text)

for key, value in result.items():
    print(key, ':')
    print(' '.join(value))
    print()

Lead_1 :
During a group project, have you ever asked a group member about adding or replacing something? Or, when you were studying for a math test, did you ever ask your parents or sibling about different ways to tackle a certain

Position_1 :
problem? Asking for other's opinions is especially

Claim_1 :
beneficial as it allows for an individual to receive a variety of different views towards a given topic. Likewise, being diverse and asking many people for their opinions allows one to understand how most people percieve something. This is especially important as knowing multiple opinions can allow

Evidence_1 :
someone to take those views into account and sway themseleves to the general audience. Knowing different people's

Claim_2 :
opinion can

Position_2 :
be beneficial in a variety of situations. First and foremost, a great example about how knowing other's

Concluding Statement_1 :
opinions is helpful is

Evidence_2 :
when someone is making the choice between smoking or

Conclud



In [33]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained('allenai/longformer-base-4096', use_fast=True)

tokenized_input = tokenizer("Sample text from an essay...", max_length=205, padding='max_length')

print(tokenized_input)

{'input_ids': [0, 47241, 2788, 31, 41, 14700, 734, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,