In [1]:
import torch
from torch.optim import AdamW
from transformers import (AutoTokenizer,
                          AutoConfig,
                          AutoModelForSequenceClassification, 
                          get_linear_schedule_with_warmup,)
from sklearn.metrics import accuracy_score, classification_report
import pandas as pd
from tqdm import tqdm
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn

In [2]:
def one_hot_binary(label):
    a = [0, 0]
    a[label] = 1
    return torch.tensor(a).to(torch.float)

def one_hot_multiclass(label):
    a = [0, 0, 0, 0]
    a[label] = 1
    return torch.tensor(a).to(torch.float)

In [3]:
def load_hope_data(data_file, label_map, classification_type):
    df = pd.read_csv(data_file)
    texts = df['text'].tolist()
    labels = [label_map[sentiment] for sentiment in df[classification_type].tolist()]
    return texts, labels

In [4]:
polyhope_binary_labels = {'Hope':1, 'Not Hope':0}
polyhope_inv_binary_labels = {v: k for k, v in polyhope_binary_labels.items()}

binary_polyhope = 'binary'
multi_polyhope = 'multiclass'

binary_classes = 2
multi_classes = 4

polyhope_multi_labels = {'Not Hope':0, 'Generalized Hope':1, 'Realistic Hope':2, 'Unrealistic Hope':3}
polyhope_inv_multi_labels = {v: k for k, v in polyhope_multi_labels.items()}

train_file = "../data/train/train_polyhope_english.csv"
val_file = "../data/val/val_polyhope_english.csv"

train_texts, train_labels = load_hope_data(train_file, polyhope_multi_labels, multi_polyhope)
val_texts, val_labels = load_hope_data(val_file, polyhope_multi_labels, multi_polyhope)

In [5]:
class TextClassificationDataset(Dataset):
	def __init__(self, texts, labels,tokenizer, max_length):
		self.texts = texts
		self.labels = labels
		self.tokenizer = tokenizer
		self.max_length = max_length

	def __len__(self):
		return len(self.texts)

	def __getitem__(self, idx):
		text = self.texts[idx]
		label = one_hot_multiclass(self.labels[idx])
		encoding = self.tokenizer(text, return_tensors='pt', max_length=self.max_length, padding='max_length', truncation=True)
		return {'input_ids': encoding['input_ids'].flatten(), 'attention_mask': encoding['attention_mask'].flatten(), 'label': label}

In [6]:
def train(model, data_loader, optimizer, scheduler, device):
    model.train()
    for batch in tqdm(data_loader):
        optimizer.zero_grad()
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['label'].to(device)
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        loss = nn.BCEWithLogitsLoss()(outputs.logits, labels)
        loss.backward()
        optimizer.step()
        scheduler.step()

In [7]:
def evaluate(model, data_loader, device):
    a = []
    for i in range(16):
        a.append([0, 1, 2, 3])
    cats = torch.tensor(a).to(device)
    b= []
    for i in range(8):
        b.append([0, 1, 2, 3])
    cats8 = torch.tensor(b).to(device)
    model.eval()
    predictions = []
    actual_labels = []
    with torch.no_grad():
        for batch in tqdm(data_loader):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['label'].to(device)
            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            pred = torch.sigmoid(outputs.logits)
            pred = torch.round(pred)
            if pred.shape[0] == 8:
                pred = torch.sum(cats8 * pred, dim=1)
                actual = torch.sum(cats8 * labels, dim=1)
            else:
                pred = torch.sum(cats * pred, dim=1)
                actual = torch.sum(cats * labels, dim=1)
            predictions.extend(pred.cpu().tolist())
            actual_labels.extend(actual.cpu().tolist())
    return accuracy_score(actual_labels, predictions), classification_report(actual_labels, predictions), predictions

In [8]:
# a = []
# for i in range(16):
#     a.append([0, 1, 2, 3])
# b = []
# for i in range(16):
#     b.append([0, 1, 2, 3])
# a = torch.tensor(a)
# b = torch.tensor(b)
# torch.sum(a*b, dim=1).tolist()

In [9]:
def predict_sentiment(label_map, text, model, tokenizer, device, max_length=128):
    model.eval()
    encoding = tokenizer(text, return_tensors='pt', max_length=max_length, padding='max_length', truncation=True)
    input_ids = encoding['input_ids'].to(device)
    attention_mask = encoding['attention_mask'].to(device)
    with torch.no_grad():
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        pred = torch.sigmoid(outputs.logits)
        pred = torch.round(pred)
        pred = torch.sum(torch.tensor([0, 1, 2, 3]).to(device) * pred).item()
    return label_map[pred]

In [10]:
# model_name = 'bert-base-cased'
model_name = 'google/flan-t5-small'
tokenizer = AutoTokenizer.from_pretrained(model_name)

model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=4,problem_type="multi_label_classification")

print('Num labels: ',model.config.num_labels)

max_length = 128
batch_size = 8
num_epochs = 3
learning_rate = 2e-5

train_dataset = TextClassificationDataset(train_texts, train_labels, tokenizer, max_length)
val_dataset = TextClassificationDataset(val_texts, val_labels, tokenizer, max_length)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

optimizer = AdamW(model.parameters(), lr=learning_rate)
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=len(train_loader) * num_epochs)

tokenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.40k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/308M [00:00<?, ?B/s]

Some weights of T5ForSequenceClassification were not initialized from the model checkpoint at google/flan-t5-small and are newly initialized: ['classification_head.dense.bias', 'classification_head.dense.weight', 'classification_head.out_proj.bias', 'classification_head.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Num labels:  4


In [11]:
for epoch in range(num_epochs):
    print(f"Epoch {epoch + 1}/{num_epochs}")
    train(model, train_loader, optimizer, scheduler, device)
    print(f"Evaluating...")
    accuracy, report, predictions = evaluate(model, val_loader, device)
    print(f"Validation Accuracy: {accuracy}")
    print(f"Report: {report}")

Epoch 1/3


100%|█████████████████████████████████████████| 774/774 [05:53<00:00,  2.19it/s]


Evaluating...


100%|█████████████████████████████████████████| 129/129 [00:16<00:00,  7.81it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Validation Accuracy: 0.5784883720930233
Report:               precision    recall  f1-score   support

         0.0       0.58      0.97      0.72       502
         1.0       0.57      0.37      0.45       300
         2.0       0.00      0.00      0.00       128
         3.0       0.00      0.00      0.00       102

    accuracy                           0.58      1032
   macro avg       0.29      0.33      0.29      1032
weighted avg       0.45      0.58      0.48      1032

Epoch 2/3


100%|█████████████████████████████████████████| 774/774 [05:55<00:00,  2.18it/s]


Evaluating...


100%|█████████████████████████████████████████| 129/129 [00:16<00:00,  7.75it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Validation Accuracy: 0.6405038759689923
Report:               precision    recall  f1-score   support

         0.0       0.68      0.94      0.79       502
         1.0       0.56      0.63      0.59       300
         2.0       0.00      0.00      0.00       128
         3.0       0.00      0.00      0.00       102

    accuracy                           0.64      1032
   macro avg       0.31      0.39      0.35      1032
weighted avg       0.49      0.64      0.56      1032

Epoch 3/3


100%|█████████████████████████████████████████| 774/774 [05:56<00:00,  2.17it/s]


Evaluating...


100%|█████████████████████████████████████████| 129/129 [00:16<00:00,  7.79it/s]

Validation Accuracy: 0.6463178294573644
Report:               precision    recall  f1-score   support

         0.0       0.68      0.95      0.79       502
         1.0       0.57      0.64      0.60       300
         2.0       0.00      0.00      0.00       128
         3.0       0.00      0.00      0.00       102

    accuracy                           0.65      1032
   macro avg       0.31      0.40      0.35      1032
weighted avg       0.50      0.65      0.56      1032




  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [12]:
accuracy, report, predictions = evaluate(model, val_loader, device)

100%|█████████████████████████████████████████| 129/129 [00:16<00:00,  7.82it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [13]:
print(report)

              precision    recall  f1-score   support

         0.0       0.68      0.95      0.79       502
         1.0       0.57      0.64      0.60       300
         2.0       0.00      0.00      0.00       128
         3.0       0.00      0.00      0.00       102

    accuracy                           0.65      1032
   macro avg       0.31      0.40      0.35      1032
weighted avg       0.50      0.65      0.56      1032



In [14]:
torch.save(model, 'polyhope_multi_english_flant5.pt')

In [15]:
test_text = "I do not really think that this model does not works fine."
predict_sentiment(polyhope_inv_multi_labels, test_text, model, tokenizer, device)

'Not Hope'