In [1]:
from transformers import pipeline
from transformers import AutoTokenizer
from transformers import DataCollatorWithPadding
from transformers import AutoModelForSequenceClassification, TrainingArguments
from transformers import PreTrainedModel
from transformers.pipelines.pt_utils import KeyDataset

from tqdm.auto import tqdm

from sklearn.model_selection import train_test_split
from sklearn.metrics import cohen_kappa_score

import pandas as pd
import numpy as np
import logging
from glob import glob
from os import path

from IPython.display import HTML, display

import torch

In [2]:
device = "cuda:0" if torch.cuda.is_available() else "cpu"
device = torch.device(device)
device

device(type='cpu')

In [44]:
category_codes = {0: 'Claim',
 1: 'Concluding Statement',
 2: 'Counterclaim',
 3: 'Evidence',
 4: 'Lead',
 5: 'Position',
 6: 'Rebuttal'}
labels = list(zip(*category_codes.items()))[1]
labels

('Claim',
 'Concluding Statement',
 'Counterclaim',
 'Evidence',
 'Lead',
 'Position',
 'Rebuttal')

In [4]:
model_checkpoint = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, use_fast=True)

In [5]:
model_path = r"models_gitignored/distilbert-base-uncased-finetuned-sentence-classification/checkpoint-12626"
model = AutoModelForSequenceClassification.from_pretrained(model_path, id2label=category_codes)

In [45]:
input_text = "What the dog doing?"

In [46]:
encodings = tokenizer(input_text, return_tensors="pt")
outputs = model(**encodings)
outputs

SequenceClassifierOutput(loss=None, logits=tensor([[ 2.6127, -1.7622, -1.3208,  0.4333,  1.0773, -1.0897, -1.2661]],
       grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)

In [57]:
outputs.logits

tensor([[ 2.6127, -1.7622, -1.3208,  0.4333,  1.0773, -1.0897, -1.2661]],
       grad_fn=<AddmmBackward0>)

The following syntax may be confusing, but the logits output has a gradient associated with it, if we just want the logits itself, we have to `.detach()` from the gradient, and then `[0]` because batch size of 1, and then get the highest confidence index

In [56]:
highest_conf = torch.argmax(outputs.logits)
print(f"predicted {labels[highest_conf]} with confidence of {outputs.logits.detach()[0][highest_conf]}")

predicted Claim with confidence of 2.6126956939697266
