# Main imports and code

In [1]:
# check which gpu we're using
!nvidia-smi

Sat May  7 23:28:20 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   49C    P8    11W /  70W |      0MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
%cd /content/drive/MyDrive/nlp

/content/drive/MyDrive/nlp


In [4]:
!pip install simpletransformers
!pip install tensorboardx

Collecting simpletransformers
  Downloading simpletransformers-0.63.6-py3-none-any.whl (249 kB)
[K     |████████████████████████████████| 249 kB 12.6 MB/s 
[?25hCollecting tokenizers
  Downloading tokenizers-0.12.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.6 MB)
[K     |████████████████████████████████| 6.6 MB 17.6 MB/s 
[?25hCollecting seqeval
  Downloading seqeval-1.2.2.tar.gz (43 kB)
[K     |████████████████████████████████| 43 kB 2.4 MB/s 
Collecting sentencepiece
  Downloading sentencepiece-0.1.96-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)
[K     |████████████████████████████████| 1.2 MB 42.1 MB/s 
Collecting streamlit
  Downloading streamlit-1.9.0-py2.py3-none-any.whl (10.1 MB)
[K     |████████████████████████████████| 10.1 MB 56.4 MB/s 
Collecting datasets
  Downloading datasets-2.1.0-py3-none-any.whl (325 kB)
[K     |████████████████████████████████| 325 kB 53.0 MB/s 
Collecting wandb>=0.10.32
  Downloading wandb-0.12.16-py2.py

Collecting tensorboardx
  Downloading tensorboardX-2.5-py2.py3-none-any.whl (125 kB)
[?25l[K     |██▋                             | 10 kB 29.3 MB/s eta 0:00:01[K     |█████▎                          | 20 kB 34.6 MB/s eta 0:00:01[K     |███████▉                        | 30 kB 26.7 MB/s eta 0:00:01[K     |██████████▌                     | 40 kB 19.8 MB/s eta 0:00:01[K     |█████████████                   | 51 kB 16.4 MB/s eta 0:00:01[K     |███████████████▊                | 61 kB 18.8 MB/s eta 0:00:01[K     |██████████████████▎             | 71 kB 20.5 MB/s eta 0:00:01[K     |█████████████████████           | 81 kB 21.9 MB/s eta 0:00:01[K     |███████████████████████▌        | 92 kB 23.8 MB/s eta 0:00:01[K     |██████████████████████████▏     | 102 kB 24.1 MB/s eta 0:00:01[K     |████████████████████████████▊   | 112 kB 24.1 MB/s eta 0:00:01[K     |███████████████████████████████▍| 122 kB 24.1 MB/s eta 0:00:01[K     |████████████████████████████████| 125 kB 24.

In [5]:
from simpletransformers.classification import ClassificationModel, ClassificationArgs, MultiLabelClassificationModel, MultiLabelClassificationArgs
from urllib import request
import pandas as pd
import logging
import torch
from collections import Counter
from ast import literal_eval

In [6]:
# prepare logger
logging.basicConfig(level=logging.INFO)

transformers_logger = logging.getLogger("transformers")
transformers_logger.setLevel(logging.WARNING)

# check gpu
cuda_available = torch.cuda.is_available()

print('Cuda available? ',cuda_available)

Cuda available?  True


In [7]:
if cuda_available:
  import tensorflow as tf
  # Get the GPU device name.
  device_name = tf.test.gpu_device_name()
  # The device name should look like the following:
  if device_name == '/device:GPU:0':
      print('Found GPU at: {}'.format(device_name))
  else:
      raise SystemError('GPU device not found')

Found GPU at: /device:GPU:0


# Fetch Don't Patronize Me! data manager module

In [8]:
module_url = f"https://raw.githubusercontent.com/Perez-AlmendrosC/dontpatronizeme/master/semeval-2022/dont_patronize_me.py"
module_name = module_url.split('/')[-1]
print(f'Fetching {module_url}')
#with open("file_1.txt") as f1, open("file_2.txt") as f2
with request.urlopen(module_url) as f, open(module_name,'w') as outf:
  a = f.read()
  outf.write(a.decode('utf-8'))

Fetching https://raw.githubusercontent.com/Perez-AlmendrosC/dontpatronizeme/master/semeval-2022/dont_patronize_me.py


In [9]:
# helper function to save predictions to an output file
def labels2file(p, outf_path):
	with open(outf_path,'w') as outf:
		for pi in p:
			outf.write(','.join([str(k) for k in pi])+'\n')

In [10]:
from dont_patronize_me import DontPatronizeMe

In [11]:
dpm = DontPatronizeMe('.', '.')

In [12]:
dpm.load_task1()
dpm.load_task2(return_one_hot=True)

Map of label to numerical label:
{'Unbalanced_power_relations': 0, 'Shallow_solution': 1, 'Presupposition': 2, 'Authority_voice': 3, 'Metaphors': 4, 'Compassion': 5, 'The_poorer_the_merrier': 6}


# Load paragraph IDs

In [13]:
trids = pd.read_csv('train_semeval_parids-labels.csv')
teids = pd.read_csv('dev_semeval_parids-labels.csv')

In [14]:
trids.head()

Unnamed: 0,par_id,label
0,4341,"[1, 0, 0, 1, 0, 0, 0]"
1,4136,"[0, 1, 0, 0, 0, 0, 0]"
2,10352,"[1, 0, 0, 0, 0, 1, 0]"
3,8279,"[0, 0, 0, 1, 0, 0, 0]"
4,1164,"[1, 0, 0, 1, 1, 1, 0]"


In [15]:
trids.par_id = trids.par_id.astype(str)
teids.par_id = teids.par_id.astype(str)



# Rebuild training set (Task 1)

In [16]:
rows = [] # will contain par_id, label and text
for idx in range(len(trids)):  
  parid = trids.par_id[idx]
  #print(parid)
  # select row from original dataset to retrieve `text` and binary label
  text = dpm.train_task1_df.loc[dpm.train_task1_df.par_id == parid].text.values[0]
  label = dpm.train_task1_df.loc[dpm.train_task1_df.par_id == parid].label.values[0]
  rows.append({
      'par_id':parid,
      'text':text,
      'label':label
  })
  

In [17]:
trdf1 = pd.DataFrame(rows)

*italicized text*# Rebuild test set (Task 1)

In [18]:
rows = [] # will contain par_id, label and text
for idx in range(len(teids)):  
  parid = teids.par_id[idx]
  #print(parid)
  # select row from original dataset
  text = dpm.train_task1_df.loc[dpm.train_task1_df.par_id == parid].text.values[0]
  label = dpm.train_task1_df.loc[dpm.train_task1_df.par_id == parid].label.values[0]
  rows.append({
      'par_id':parid,
      'text':text,
      'label':label
  })
  

In [19]:
len(rows)

2094

In [20]:
tedf1 = pd.DataFrame(rows)

# RoBERTa Baseline for Task 1

In [21]:
# downsample negative instances
pcldf = trdf1[trdf1.label==1]
npos = len(pcldf)

training_set1 = pd.concat([pcldf,trdf1[trdf1.label==0][:npos*2]])

In [22]:
training_set1

Unnamed: 0,par_id,text,label
0,4341,"The scheme saw an estimated 150,000 children f...",1
1,4136,Durban 's homeless communities reconciliation ...,1
2,10352,The next immediate problem that cropped up was...,1
3,8279,Far more important than the implications for t...,1
4,1164,To strengthen child-sensitive social protectio...,1
...,...,...,...
2377,1775,Last but not the least element of culpability ...,0
2378,1776,"Then , taking the art of counter-intuitive non...",0
2379,1777,Kagunga village was reported to lack necessary...,0
2380,1778,"""After her parents high-profile divorce after ...",0


In [32]:

task1_model_args = ClassificationArgs(num_train_epochs=10, 
                                      train_batch_size=32,
                                      no_save=True, 
                                      no_cache=True, 
                                      overwrite_output_dir=True, 
                                      learning_rate=1e-5,
                                      )
task1_model = ClassificationModel("roberta", 
                                  'roberta-base', 
                                  args = task1_model_args, 
                                  num_labels=2, 
                                  use_cuda=cuda_available)
# train model
task1_model.train_model(training_set1[['text', 'label']])
# run predictions
preds_task1, _ = task1_model.predict(tedf1.text.tolist())

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.dense.bias', 'roberta.pooler.dense.weight', 'lm_head.decoder.weight', 'lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.bias', 'roberta.pooler.dense.bias', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifie

  0%|          | 0/2382 [00:00<?, ?it/s]



Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

Running Epoch 0 of 10:   0%|          | 0/75 [00:00<?, ?it/s]

Running Epoch 1 of 10:   0%|          | 0/75 [00:00<?, ?it/s]

Running Epoch 2 of 10:   0%|          | 0/75 [00:00<?, ?it/s]

Running Epoch 3 of 10:   0%|          | 0/75 [00:00<?, ?it/s]

Running Epoch 4 of 10:   0%|          | 0/75 [00:00<?, ?it/s]

Running Epoch 5 of 10:   0%|          | 0/75 [00:00<?, ?it/s]

Running Epoch 6 of 10:   0%|          | 0/75 [00:00<?, ?it/s]

Running Epoch 7 of 10:   0%|          | 0/75 [00:00<?, ?it/s]

Running Epoch 8 of 10:   0%|          | 0/75 [00:00<?, ?it/s]

Running Epoch 9 of 10:   0%|          | 0/75 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model: Training of roberta model complete. Saved to outputs/.
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/2094 [00:00<?, ?it/s]

  0%|          | 0/262 [00:00<?, ?it/s]

In [33]:
Counter(preds_task1)

Counter({0: 1712, 1: 382})

In [34]:
Counter(tedf1['label'])

Counter({0: 1895, 1: 199})

In [35]:
labels2file([[k] for k in preds_task1], 'task1.txt')

# Rebuild training set (Task 2)

In [None]:
rows2 = [] # will contain par_id, label and text
for idx in range(len(trids)):  
  parid = trids.par_id[idx]
  label = trids.label[idx]
  # select row from original dataset to retrieve the `text` value
  text = dpm.train_task1_df.loc[dpm.train_task1_df.par_id == parid].text.values[0]
  rows2.append({
      'par_id':parid,
      'text':text,
      'label':label
  })
  

In [None]:
trdf2 = pd.DataFrame(rows2)

In [None]:
trdf2

Unnamed: 0,par_id,text,label
0,4341,"The scheme saw an estimated 150,000 children f...","[1, 0, 0, 1, 0, 0, 0]"
1,4136,Durban 's homeless communities reconciliation ...,"[0, 1, 0, 0, 0, 0, 0]"
2,10352,The next immediate problem that cropped up was...,"[1, 0, 0, 0, 0, 1, 0]"
3,8279,Far more important than the implications for t...,"[0, 0, 0, 1, 0, 0, 0]"
4,1164,To strengthen child-sensitive social protectio...,"[1, 0, 0, 1, 1, 1, 0]"
...,...,...,...
8370,8380,Rescue teams search for survivors on the rubbl...,"[0, 0, 0, 0, 0, 0, 0]"
8371,8381,The launch of ' Happy Birthday ' took place la...,"[0, 0, 0, 0, 0, 0, 0]"
8372,8382,"The unrest has left at least 20,000 people dea...","[0, 0, 0, 0, 0, 0, 0]"
8373,8383,You have to see it from my perspective . I may...,"[0, 0, 0, 0, 0, 0, 0]"


In [None]:
trdf2.label = trdf2.label.apply(literal_eval)

# Rebuild test set (Task 2)

In [None]:
rows2 = [] # will contain par_id, label and text
for idx in range(len(teids)):  
  parid = teids.par_id[idx]
  label = teids.label[idx]
  #print(parid)
  # select row from original dataset to access the `text` value
  text = dpm.train_task1_df.loc[dpm.train_task1_df.par_id == parid].text.values[0]
  rows2.append({
      'par_id':parid,
      'text':text,
      'label':label
  })
  

In [None]:
tedf2 = pd.DataFrame(rows2)

In [None]:
tedf2

Unnamed: 0,par_id,text,label
0,4046,We also know that they can benefit by receivin...,"[1, 0, 0, 1, 0, 0, 0]"
1,1279,Pope Francis washed and kissed the feet of Mus...,"[0, 1, 0, 0, 0, 0, 0]"
2,8330,Many refugees do n't want to be resettled anyw...,"[0, 0, 1, 0, 0, 0, 0]"
3,4063,"""Budding chefs , like """" Fred """" , """" Winston ...","[1, 0, 0, 1, 1, 1, 0]"
4,4089,"""In a 90-degree view of his constituency , one...","[1, 0, 0, 0, 0, 0, 0]"
...,...,...,...
2089,10462,"The sad spectacle , which occurred on Saturday...","[0, 0, 0, 0, 0, 0, 0]"
2090,10463,""""""" The Pakistani police came to our house and...","[0, 0, 0, 0, 0, 0, 0]"
2091,10464,"""When Marie O'Donoghue went looking for a spec...","[0, 0, 0, 0, 0, 0, 0]"
2092,10465,"""Sri Lankan norms and culture inhibit women fr...","[0, 0, 0, 0, 0, 0, 0]"


In [None]:
tedf2.label = tedf2.label.apply(literal_eval)

# RoBERTa baseline for Task 2

In [None]:
all_negs = trdf2[trdf2.label.apply(lambda x:sum(x) == 0)]
all_pos = trdf2[trdf2.label.apply(lambda x:sum(x) > 0)]

training_set2 = pd.concat([all_pos,all_negs[:round(len(all_pos)*0.5)]])

In [None]:
training_set2

Unnamed: 0,par_id,text,label
0,4341,"The scheme saw an estimated 150,000 children f...","[1, 0, 0, 1, 0, 0, 0]"
1,4136,Durban 's homeless communities reconciliation ...,"[0, 1, 0, 0, 0, 0, 0]"
2,10352,The next immediate problem that cropped up was...,"[1, 0, 0, 0, 0, 1, 0]"
3,8279,Far more important than the implications for t...,"[0, 0, 0, 1, 0, 0, 0]"
4,1164,To strengthen child-sensitive social protectio...,"[1, 0, 0, 1, 1, 1, 0]"
...,...,...,...
1186,434,""""""" I was absolutely useless at school , hopel...","[0, 0, 0, 0, 0, 0, 0]"
1187,435,I also noticed the change in socio-economic le...,"[0, 0, 0, 0, 0, 0, 0]"
1188,436,"Can Donald Trump win ? It 's possible , but ce...","[0, 0, 0, 0, 0, 0, 0]"
1189,437,He added that any introduction of new law must...,"[0, 0, 0, 0, 0, 0, 0]"


In [None]:
task2_model_args = MultiLabelClassificationArgs(num_train_epochs=100,
                                                train_batch_size=32,
                                                no_save=True, 
                                                no_cache=True, 
                                                overwrite_output_dir=True
                                                )
task2_model = MultiLabelClassificationModel("roberta", 
                                            'roberta-base', 
                                            num_labels=7,
                                            args = task2_model_args, 
                                            use_cuda=cuda_available)
# train model
task2_model.train_model(training_set2[['text', 'label']])
# run predictions
preds_task2, _ = task2_model.predict(tedf2.text.tolist())

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForMultiLabelSequenceClassification: ['lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaForMultiLabelSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForMultiLabelSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForMultiLabelSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.out_proj.bias', 'cla

  0%|          | 0/1191 [00:00<?, ?it/s]



Epoch:   0%|          | 0/100 [00:00<?, ?it/s]

Running Epoch 0 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 1 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 2 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 3 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 4 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 5 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 6 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 7 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 8 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 9 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 10 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 11 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 12 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 13 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 14 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 15 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 16 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 17 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 18 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 19 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 20 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 21 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 22 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 23 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 24 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 25 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 26 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 27 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 28 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 29 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 30 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 31 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 32 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 33 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 34 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 35 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 36 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 37 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 38 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 39 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 40 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 41 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 42 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 43 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 44 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 45 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 46 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 47 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 48 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 49 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 50 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 51 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 52 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 53 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 54 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 55 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 56 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 57 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 58 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 59 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 60 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 61 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 62 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 63 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 64 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 65 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 66 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 67 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 68 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 69 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 70 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 71 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 72 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 73 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 74 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 75 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 76 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 77 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 78 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 79 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 80 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 81 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 82 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 83 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 84 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 85 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 86 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 87 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 88 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 89 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 90 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 91 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 92 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 93 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 94 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 95 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 96 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 97 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 98 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

Running Epoch 99 of 100:   0%|          | 0/38 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model: Training of roberta model complete. Saved to outputs/.
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/2094 [00:00<?, ?it/s]

  0%|          | 0/262 [00:00<?, ?it/s]

In [None]:
labels2file(preds_task2, 'task2.txt')

## Prepare submission

In [None]:
!cat task1.txt | head -n 10

0
1
0
1
0
1
1
1
0
1


In [None]:
!cat task2.txt | head -n 10

0,0,1,1,0,0,0
1,1,0,0,0,0,0
0,0,0,1,0,0,0
1,0,0,0,1,1,0
1,0,0,0,0,1,0
0,0,0,0,0,1,0
1,0,0,0,0,1,0
1,1,0,0,0,0,0
0,0,1,0,0,1,0
1,0,1,1,0,1,0


In [None]:
!zip submission.zip task1.txt task2.txt

updating: task1.txt (deflated 92%)
updating: task2.txt (deflated 94%)


In [28]:
import os

In [27]:
module_url = f"https://raw.githubusercontent.com/Perez-AlmendrosC/dontpatronizeme/master/semeval-2022/evaluation.py"
module_name = module_url.split('/')[-1]
print(f'Fetching {module_url}')
#with open("file_1.txt") as f1, open("file_2.txt") as f2
with request.urlopen(module_url) as f, open(module_name,'w') as outf:
  a = f.read()
  outf.write(a.decode('utf-8'))

Fetching https://raw.githubusercontent.com/Perez-AlmendrosC/dontpatronizeme/master/semeval-2022/evaluation.py


In [None]:
!mkdir ref res

mkdir: cannot create directory ‘ref’: File exists
mkdir: cannot create directory ‘res’: File exists


In [36]:
labels2file([[k] for k in preds_task1], os.path.join('res/', 'task1.txt'))

In [None]:
labels2file(preds_task2, os.path.join('res/', 'task2.txt'))

In [None]:
labels2file([[k] for k in tedf1['label']], os.path.join('ref/', 'task1.txt'))

In [None]:
labels2file(tedf2['label'], os.path.join('ref/', 'task2.txt'))

In [37]:
!python3 evaluation.py . .

In [None]:
!cat scores.txt

task1_precision:0.39267015706806285
task1_recall:0.7537688442211056
task1_f1:0.5163511187607575
task2_unb:0.3854660347551343
task2_sha:0.28099173553719003
task2_pre:0.28436018957345977
task2_aut:0.19487179487179487
task2_met:0.3023255813953488
task2_com:0.310838445807771
task2_the:0.2105263157894737
task2_avg:0.28134001396145325


In [31]:
!cat scores.txt

task1_precision:0.4132231404958678
task1_recall:0.7537688442211056
task1_f1:0.5338078291814947
task2_unb:0.3854660347551343
task2_sha:0.28099173553719003
task2_pre:0.28436018957345977
task2_aut:0.19487179487179487
task2_met:0.3023255813953488
task2_com:0.310838445807771
task2_the:0.2105263157894737
task2_avg:0.28134001396145325


In [38]:
!cat scores.txt

task1_precision:0.4005235602094241
task1_recall:0.7688442211055276
task1_f1:0.5266781411359726
task2_unb:0.3854660347551343
task2_sha:0.28099173553719003
task2_pre:0.28436018957345977
task2_aut:0.19487179487179487
task2_met:0.3023255813953488
task2_com:0.310838445807771
task2_the:0.2105263157894737
task2_avg:0.28134001396145325
