In [1]:
import requests

remote_url = "https://raw.githubusercontent.com/hhhuang/nlp2019fall/master/classification/"

r = requests.get(remote_url + "train.csv", allow_redirects=True)
open('stance-detection-train.csv', 'wb').write(r.content)

r = requests.get(remote_url + "test.csv", allow_redirects=True)
open('stance-detection-test.csv', 'wb').write(r.content)


500660

In [2]:
import csv
from collections import Counter

# Training data

fin = open("stance-detection-train.csv")
# Test data
fin = open("stance-detection-test.csv")


reader = csv.DictReader(fin)
labels = Counter()
targets = Counter()
for line in reader:
    labels[line['Stance']] += 1
    targets[line['Target']] += 1
print(labels)
print(targets)

Counter({'AGAINST': 1014, 'NONE': 490, 'FAVOR': 452})
Counter({'Donald Trump': 707, 'Hillary Clinton': 295, 'Feminist Movement': 285, 'Legalization of Abortion': 280, 'Atheism': 220, 'Climate Change is a Real Concern': 169})


In [3]:
! pip install transformers==4.28.0
! pip install datasets
! pip install torch
! pip install tqdm
! pip install evaluate
! pip install pandas
! pip install --upgrade accelerate



In [4]:
import csv

from collections import defaultdict, Counter

import transformers
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModel, AutoConfig
from transformers import TrainingArguments, Trainer
from transformers import get_scheduler

import torch
from torch.utils.data import DataLoader
from torch.optim import AdamW
from torch import nn

from datasets import Dataset
import evaluate
import pandas as pd
import numpy as np

In [5]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

# Assignment III 修改區
請修改下方的程式碼，使得 F1 越高越好

In [33]:
from transformers import TextClassificationPipeline
from transformers import AdamW, get_linear_schedule_with_warmup


class StanceDetectionModel:
    def __init__(self):
        self.test_data = "stance-detection-test.csv"
        self.training_data = "stance-detection-train.csv"
        self.model_path = "trained_model"
        self.stance_label_mapping = {'AGAINST': 0, 'NONE': 1, 'FAVOR': 2}
        self.inverted_stance_label_mapping = {v: k for k, v in self.stance_label_mapping.items()}

    def create_model(self, load_trained=False):
        pretrained_model = "vinai/bertweet-base"
        self.tokenizer = AutoTokenizer.from_pretrained(pretrained_model)
        self.model = AutoModelForSequenceClassification.from_pretrained(pretrained_model, num_labels=3)        
            
    def load_tweet_data(self, filename):
        """ Data source: https://alt.qcri.org/semeval2016/task6/index.php?id=data-and-tools """
        import csv
        reader = csv.DictReader(open(filename))    # The original released dataset was encoded in ISO-8859-1. The attached version has been changed to utf8 for universality. 
        cnt = 0
        for row in reader:
            cnt += 1
            labels = [0.0] * 3
            labels[self.stance_label_mapping[row['Stance']]] = 1.0
            yield {'labels': labels, 'text': row['Tweet']}            
        
    def tokenize_dataset(self, data):
        return self.tokenizer(data['text'], return_tensors='pt', truncation=True, padding='max_length', max_length=280)
    
    def compute_metrics(self, eval_pred):
        logits, labels = eval_pred
        predictions, references = [], []
        for i in range(len(logits)):
            predictions.append(np.argmax(logits[i][:3]))
            references.append(np.argmax(labels[i][:3]))
        #predictions = np.argmax(logits, axis=-1)
        # Following the evaluation of SemEval-2016, only the F-scores of Favour (2) and Against (0) are measured. 
        return self.metric.compute(predictions=predictions, references=references, labels=[0, 2], average='macro')

    def train(self):
        #stances = ['Hillary Clinton', 'Feminist Movement', 'Legalization of Abortion', 'Atheism', 'Climate Change is a Real Concern', 'Donald Trump']
        self.create_model()
        load_training_data = lambda: self.load_tweet_data(self.training_data)
        load_test_data = lambda: self.load_tweet_data(self.test_data)
        training_data = Dataset.from_generator(load_training_data).shuffle(seed=42).map(self.tokenize_dataset, batched=True)
        test_data = Dataset.from_generator(load_test_data).map(self.tokenize_dataset, batched=True)
                   
  
        training_args = TrainingArguments(
            output_dir="test_trainer", 
            evaluation_strategy="epoch",
            num_train_epochs=10,
            per_device_train_batch_size=24,
            per_device_eval_batch_size=24,
            load_best_model_at_end=True,
            save_strategy='epoch',            
            metric_for_best_model='f1'                  
            # scheduler = get_linear_schedule_with_warmup(AdamW(self.model.parameters(), lr=1e-5), num_warmup_steps=0, num_training_steps=len(training_data) * training_args.num_train_epochs)
            # learning_rate=2e-5,
            # weight_decay=0.01,
            # adam_epsilon=1e-8,
        )

        optimizer = AdamW(self.model.parameters(), lr=1e-5)
        total_steps = len(training_data) * training_args.num_train_epochs
        scheduler = get_linear_schedule_with_warmup(
            optimizer,
            num_warmup_steps=0,
            num_training_steps=total_steps
        )
        training_args.optimizers = (optimizer, scheduler)
        
        self.metric = evaluate.load("f1")
        self.trainer = Trainer(
            model=self.model,
            args=training_args,
            train_dataset=training_data,
            eval_dataset=test_data,
            compute_metrics=self.compute_metrics,
        )
        self.trainer.train()
        self.trainer.save_model()

    def evaluate(self):
        return self.trainer.evaluate()
        
    def predict(self, instances):
        device = self.model.device
        self.model.to(device)
        self.model.eval()
        predictions = []
        for instance in instances:
            inputs = self.tokenizer.encode_plus(
                instance["text"],
                add_special_tokens=True,
                return_tensors="pt",
                padding="longest",
                truncation=True,
                max_length=512,  
            )
            inputs = inputs.to(device)
            with torch.no_grad():
                outputs = self.model(**inputs)
                logits = outputs.logits
            predicted_label = torch.argmax(logits, dim=1).item()
            predicted_stance = self.inverted_stance_label_mapping[predicted_label]
            predictions.append(predicted_stance)
        return predictions


In [34]:
model = StanceDetectionModel()
model.train()

emoji is not installed, thus not converting emoticons or emojis into text. Install emoji: pip3 install emoji==0.6.0
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Some weights of the model checkpoint at vinai/bertweet-base were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'roberta.pooler.dense.weight', 'lm_head.decoder.bias', 'roberta.pooler.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initiali

Downloading and preparing dataset generator/default to /home/ttsai/.cache/huggingface/datasets/generator/default-b745610a6d34cae1/0.0.0...


Generating train split: 0 examples [00:00, ? examples/s]

Dataset generator downloaded and prepared to /home/ttsai/.cache/huggingface/datasets/generator/default-b745610a6d34cae1/0.0.0. Subsequent calls will reuse this data.


Map:   0%|          | 0/2914 [00:00<?, ? examples/s]

Downloading and preparing dataset generator/default to /home/ttsai/.cache/huggingface/datasets/generator/default-822166143c45abef/0.0.0...


Generating train split: 0 examples [00:00, ? examples/s]

Dataset generator downloaded and prepared to /home/ttsai/.cache/huggingface/datasets/generator/default-822166143c45abef/0.0.0. Subsequent calls will reuse this data.


Map:   0%|          | 0/1956 [00:00<?, ? examples/s]



  0%|          | 0/610 [00:00<?, ?it/s]



  0%|          | 0/41 [00:00<?, ?it/s]

{'eval_loss': 0.5769648551940918, 'eval_f1': 0.4585494287133631, 'eval_runtime': 13.8243, 'eval_samples_per_second': 141.49, 'eval_steps_per_second': 2.966, 'epoch': 1.0}




  0%|          | 0/41 [00:00<?, ?it/s]

{'eval_loss': 0.5602312088012695, 'eval_f1': 0.5528873847926268, 'eval_runtime': 13.8471, 'eval_samples_per_second': 141.257, 'eval_steps_per_second': 2.961, 'epoch': 2.0}




  0%|          | 0/41 [00:00<?, ?it/s]

{'eval_loss': 0.558013379573822, 'eval_f1': 0.612446064808484, 'eval_runtime': 13.8496, 'eval_samples_per_second': 141.231, 'eval_steps_per_second': 2.96, 'epoch': 3.0}




  0%|          | 0/41 [00:00<?, ?it/s]

{'eval_loss': 0.6262676119804382, 'eval_f1': 0.5853710911974908, 'eval_runtime': 13.8602, 'eval_samples_per_second': 141.123, 'eval_steps_per_second': 2.958, 'epoch': 4.0}




  0%|          | 0/41 [00:00<?, ?it/s]

{'eval_loss': 0.647953450679779, 'eval_f1': 0.6035047382982694, 'eval_runtime': 13.8784, 'eval_samples_per_second': 140.938, 'eval_steps_per_second': 2.954, 'epoch': 5.0}




  0%|          | 0/41 [00:00<?, ?it/s]

{'eval_loss': 0.7702760100364685, 'eval_f1': 0.593139798732155, 'eval_runtime': 13.878, 'eval_samples_per_second': 140.943, 'eval_steps_per_second': 2.954, 'epoch': 6.0}




  0%|          | 0/41 [00:00<?, ?it/s]

{'eval_loss': 0.8030319809913635, 'eval_f1': 0.6013234845036501, 'eval_runtime': 13.859, 'eval_samples_per_second': 141.135, 'eval_steps_per_second': 2.958, 'epoch': 7.0}




  0%|          | 0/41 [00:00<?, ?it/s]

{'eval_loss': 0.9013725519180298, 'eval_f1': 0.5871367203520016, 'eval_runtime': 13.8563, 'eval_samples_per_second': 141.164, 'eval_steps_per_second': 2.959, 'epoch': 8.0}




{'loss': 0.283, 'learning_rate': 9.016393442622952e-06, 'epoch': 8.2}


  0%|          | 0/41 [00:00<?, ?it/s]

{'eval_loss': 0.9366181492805481, 'eval_f1': 0.590286082340727, 'eval_runtime': 13.8592, 'eval_samples_per_second': 141.134, 'eval_steps_per_second': 2.958, 'epoch': 9.0}




  0%|          | 0/41 [00:00<?, ?it/s]

{'eval_loss': 0.9402152299880981, 'eval_f1': 0.5997320376259201, 'eval_runtime': 13.8728, 'eval_samples_per_second': 140.996, 'eval_steps_per_second': 2.955, 'epoch': 10.0}
{'train_runtime': 680.4075, 'train_samples_per_second': 42.827, 'train_steps_per_second': 0.897, 'train_loss': 0.24369667084490668, 'epoch': 10.0}


# Assignment III: 目標1

請修改 `Assignment III 修改區` 那格，使得下方回報之 F1 越高越好

In [35]:
print(model.evaluate()['eval_f1'])



  0%|          | 0/41 [00:00<?, ?it/s]

0.612446064808484


# Assignment III: 目標2
請修改 `Assignment III 修改區` 那格，使得下方程式碼能順利執行

In [36]:
model.predict([
    {"text": "I don't trust Hillary"}, 
    {"text": "I don't like Trump"},
    {"text": "I like Hillary Clinton. We are personal friends. My support for her candidacy is based on the fact that she is brilliantly qualified for the job."}
    ])

['AGAINST', 'AGAINST', 'AGAINST']

In [37]:
model.predict([{"text":"I'm man"}])

['FAVOR']