In [None]:
!ls

README.md                             main.py
[34m__pycache__[m[m                           model.py
data.py                               optimizer.py
entity_linker.py                      requirements.in
enwiki_20180420_entity_linker.pkl     requirements.txt
enwiki_20180420_entity_linker.pkl.bz2 [34mreuters-21578[m[m
enwiki_20180420_lg1_300d.pkl          reuters21578.tar
enwiki_20180420_lg1_300d.pkl.bz2      train.py
enwiki_20180420_lg1_300d.pkl.bz2.1


In [None]:
!pwd

/Users/liguolun/Desktop/COURSES/Fourth year/CSC413/Final Project/NABE/wikipedia2vec/examples/text_classification


# Imports

In [None]:
import functools
import logging
import os
import random
import re
import unicodedata
from collections import Counter
import numpy as np
from bs4 import BeautifulSoup
from sklearn.datasets import fetch_20newsgroups
from tqdm import tqdm

PAD_TOKEN = '<PAD>'
WHITESPACE_REGEXP = re.compile(r'\s+')

logger = logging.getLogger(__name__)

# Class and Helper Functions

In [None]:
class Dataset(object):
    def __init__(self, name, instances, label_names):
        self.name = name
        self.instances = instances
        self.label_names = label_names

    def __iter__(self):
        for instance in self.instances:
            yield instance

    def __len__(self):
        return len(self.instances)

    def get_instances(self, fold=None):
        if fold is None:
            return self.instances
        else:
            return [ins for ins in self.instances if ins.fold == fold]
class DatasetInstance(object):
    def __init__(self, text, label, fold):
        self.text = text
        self.label = label
        self.fold = fold

In [None]:
#this function seems to be key.
#Creates a dictionary containing word_id, entity_id, prior_prob and label features for each of train, dev and test set.
def generate_features(dataset, tokenizer, entity_linker, min_count, max_word_length, max_entity_length):

    @functools.lru_cache(maxsize=None)
    def tokenize(text):
        return tokenizer.tokenize(text)

    @functools.lru_cache(maxsize=None)
    def detect_mentions(text):
        return entity_linker.detect_mentions(text)

    def create_numpy_sequence(source_sequence, length, dtype):
        ret = np.zeros(length, dtype=dtype)
        source_sequence = source_sequence[:length]
        ret[:len(source_sequence)] = source_sequence
        return ret

    logger.info('Creating vocabulary...')
    word_counter = Counter()
    entity_counter = Counter()
    for instance in tqdm(dataset):
        word_counter.update(t.text for t in tokenize(instance.text))
        entity_counter.update(m.title for m in detect_mentions(instance.text))

    #creates word count dictionary
    words = [word for word, count in word_counter.items() if count >= min_count]
    word_vocab = {word: index for index, word in enumerate(words, 1)}
    word_vocab[PAD_TOKEN] = 0

    #creates entity count dictionary
    entity_titles = [title for title, count in entity_counter.items() if count >= min_count]
    entity_vocab = {title: index for index, title in enumerate(entity_titles, 1)}
    entity_vocab[PAD_TOKEN] = 0

    ret = dict(train=[], dev=[], test=[], word_vocab=word_vocab, entity_vocab=entity_vocab)

    for fold in ('train', 'dev', 'test'): #A validation dataset is a dataset of examples used to tune the hyperparameters. It is sometimes also called the development set or the "dev set".
        for instance in dataset.get_instances(fold):
            word_ids = [word_vocab[token.text] for token in tokenize(instance.text) if token.text in word_vocab] #all possible word ids
            entity_ids = []
            prior_probs = []
            for mention in detect_mentions(instance.text):
                if mention.title in entity_vocab: #why mention.title? is there mention.context?
                    entity_ids.append(entity_vocab[mention.title]) #appends the context?
                    prior_probs.append(mention.prior_prob)

            ret[fold].append(dict(word_ids=create_numpy_sequence(word_ids, max_word_length, np.int),
                                  entity_ids=create_numpy_sequence(entity_ids, max_entity_length, np.int),
                                  prior_probs=create_numpy_sequence(prior_probs, max_entity_length, np.float32),
                                  label=instance.label))

    return ret


In [None]:
def normalize_text(text): #What does this do?
    text = text.lower()
    text = re.sub(WHITESPACE_REGEXP, ' ', text)

    # remove accents: https://stackoverflow.com/a/518232
    text = ''.join(c for c in unicodedata.normalize('NFD', text) if unicodedata.category(c) != 'Mn')
    text = unicodedata.normalize('NFC', text)

    return text

In [None]:
def load_20ng_dataset(dev_size=0.05):
    train_data = []
    test_data = []

    for fold in ('train', 'test'):
        dataset_obj = fetch_20newsgroups(subset=fold, shuffle=False)

        for text, label in zip(dataset_obj['data'], dataset_obj['target']):
            text = normalize_text(text)
            if fold == 'train':
                train_data.append((text, label))
            else:
                test_data.append((text, label))

    dev_size = int(len(train_data) * dev_size)
    random.shuffle(train_data)

    instances = []
    instances += [DatasetInstance(text, label, 'dev') for text, label in train_data[-dev_size:]]
    instances += [DatasetInstance(text, label, 'train') for text, label in train_data[:-dev_size]]
    instances += [DatasetInstance(text, label, 'test') for text, label in test_data]

    return Dataset('20ng', instances, fetch_20newsgroups()['target_names'])


In [None]:
def load_r8_dataset(dataset_path, dev_size=0.05):
    label_names = ['grain', 'earn', 'interest', 'acq', 'trade', 'crude', 'ship', 'money-fx']
    label_index = {t: i for i, t in enumerate(label_names)}

    train_data = []
    test_data = []

    for file_name in sorted(os.listdir(dataset_path)):
        if file_name.endswith('.sgm'): #这是数据的格式？
            with open(os.path.join(dataset_path, file_name), encoding='ISO-8859-1') as f:
                for node in BeautifulSoup(f.read(), 'html.parser').find_all('reuters'): #What does beautiful soup do?
                    text = normalize_text(node.find('text').text)
                    label_nodes = [n.text for n in node.topics.find_all('d')]
                    if len(label_nodes) != 1:
                        continue

                    labels = [label_index[l] for l in label_nodes if l in label_index]
                    if len(labels) == 1:
                        if node['topics'] != 'YES':
                            continue
                        if node['lewissplit'] == 'TRAIN':
                            train_data.append((text, labels[0]))
                        elif node['lewissplit'] == 'TEST':
                            test_data.append((text, labels[0]))
                        else:
                            continue

    dev_size = int(len(train_data) * dev_size)
    random.shuffle(train_data)

    instances = []
    instances += [DatasetInstance(text, label, 'dev') for text, label in train_data[-dev_size:]]
    instances += [DatasetInstance(text, label, 'train') for text, label in train_data[:-dev_size]]
    instances += [DatasetInstance(text, label, 'test') for text, label in test_data]

    return Dataset('r8', instances, label_names)

# Analysis of LoadR8()

In [None]:
def load_r8_dataset(dataset_path, dev_size=0.05):
    label_names = ['grain', 'earn', 'interest', 'acq', 'trade', 'crude', 'ship', 'money-fx']
    label_index = {t: i for i, t in enumerate(label_names)}

    train_data = []
    test_data = []

    for file_name in sorted(os.listdir(dataset_path)):
        if file_name.endswith('.sgm'): #HTML格式
            with open(os.path.join(dataset_path, file_name), encoding='ISO-8859-1') as f:
                for node in BeautifulSoup(f.read(), 'html.parser').find_all('reuters'): #What does beautiful soup do?
                    text = normalize_text(node.find('text').text)
                    label_nodes = [n.text for n in node.topics.find_all('d')]
                    if len(label_nodes) != 1:
                        continue

                    labels = [label_index[l] for l in label_nodes if l in label_index]
                    if len(labels) == 1:
                        if node['topics'] != 'YES':
                            continue
                        if node['lewissplit'] == 'TRAIN':
                            train_data.append((text, labels[0]))
                        elif node['lewissplit'] == 'TEST':
                            test_data.append((text, labels[0]))
                        else:
                            continue
    dev_size = int(len(train_data) * dev_size)
    random.shuffle(train_data)

    instances = []
    instances += [DatasetInstance(text, label, 'dev') for text, label in train_data[-dev_size:]]
    instances += [DatasetInstance(text, label, 'train') for text, label in train_data[:-dev_size]]
    instances += [DatasetInstance(text, label, 'test') for text, label in test_data]

    return Dataset('r8', instances, label_names)

In [None]:
dataset_path = "reuters-21578" #defined in main()
dev = 0.05
all_files = sorted(os.listdir(dataset_path))
sgm_files = [ file for file in all_files if file.endswith('.sgm')]

In [None]:
def normalize_text(text): #lower cases and removes unnecessary symbols
    text = text.lower()
    text = re.sub(WHITESPACE_REGEXP, ' ', text)

    # remove accents: https://stackoverflow.com/a/518232
    text = ''.join(c for c in unicodedata.normalize('NFD', text) if unicodedata.category(c) != 'Mn')
    text = unicodedata.normalize('NFC', text)

    return text

In [None]:
    label_names = ['grain', 'earn', 'interest', 'acq', 'trade', 'crude', 'ship', 'money-fx']
    label_index = {t: i for i, t in enumerate(label_names)}

    train_data = []
    test_data = []

    for file_name in sorted(os.listdir(dataset_path)):
        if file_name.endswith('.sgm'): #HTML格式
            with open(os.path.join(dataset_path, file_name), encoding='ISO-8859-1') as f:
                for node in BeautifulSoup(f.read(), 'html.parser').find_all('reuters'): #What does beautiful soup do?
                    text = normalize_text(node.find('text').text)
                    label_nodes = [n.text for n in node.topics.find_all('d')]
                    if len(label_nodes) != 1:
                        continue

                    labels = [label_index[l] for l in label_nodes if l in label_index]
                    if len(labels) == 1:
                        if node['topics'] != 'YES':
                            continue
                        if node['lewissplit'] == 'TRAIN':
                            train_data.append((text, labels[0]))
                        elif node['lewissplit'] == 'TEST':
                            test_data.append((text, labels[0]))
                        else:
                            continue

In [None]:
train_data

In [None]:
labels = [data[1] for data in train_data]
print(min(labels), max(labels))

0 7


In [None]:
    instances = []
    instances += [DatasetInstance(text, label, 'dev') for text, label in train_data[-dev_size:]]
    instances += [DatasetInstance(text, label, 'train') for text, label in train_data[:-dev_size]]
    instances += [DatasetInstance(text, label, 'test') for text, label in test_data]

In [None]:
len(instances)

7674

In [None]:
unique(train_data)

In [None]:
text, label = train_data[-1]
print(text)
print()
print(label)

 australian unions launch new south wales strikes sydney, april 8 - australian trade unions said they have launched week-long strikes and other industrial action in new south wales (nsw) to protest against new laws that would reduce injury compensation payments. union sources said talks with the state government broke down last night, but the two sides are scheduled to meet later today in an attempt to find a compromise. rail freight and shipping cargo movements in the country's most populous state were the first to be affected, and union officials said almost every business sector will be hit unless there is a quick settlement. the state government recently introduced a new workers' compensation act which would cut the cash benefits to injured workers by up to a third. the act is now awaiting parliamentary ratification. nsw state premier barrie unsworth has said workers' compensation has risen steeply in recent years and the proposed cuts would save hundreds of mlns of dollars a year

In [None]:
ag_news_path = 'sentiment_datasets/ag_news_csv'
ag_news_path

'sentiment_datasets/ag_news_csv'

In [None]:
a = pd.read_csv()

In [None]:
import pandas as pd
def load_data(filename, data_path = ag_news_path):
  data_df = pd.read_csv(os.path.join(data_path, filename), header=None)
  data_df.columns = ['rating', 'topic','description']
  data = data_df[['description','rating']].to_numpy()
  return data

In [None]:
train_data = load_data('train.csv')
test_data = load_data('test.csv')

In [None]:
dev_ratio=0.05
dev_size = round(len(train_data) * dev_ratio)
print(dev_size)
random.shuffle(train_data)
dev_data = train_data[:dev_size]
train_data = train_data[dev_size:]

274

In [None]:
train_data.T[1].max()

4

In [None]:
train_data.T[1].min()

1

In [None]:
test_data.T[1].max()

4

In [None]:
np.unique(test_data.T[1])

array([1, 2, 3, 4], dtype=object)

In [None]:
np.unique(train_data.T[1])

array([1, 2, 3, 4], dtype=object)

In [None]:
instances += [DatasetInstance(data[0], data[1], 'train') for data in train_data]
instances += [DatasetInstance(data[0], data[1], 'dev') for data in dev_data]
instances += [DatasetInstance(data[0], data[1], 'test') for data in test_data]
Dataset('agnews', instances, [1,2,3,4])

__main__.Dataset

In [None]:
def load_agnews_dataset(agnews_path, dev_ratio = 0.05, sample_ratio = 0.2):
    def load_data(filename, data_path = agnews_path):
      data_df = pd.read_csv(os.path.join(data_path, filename), header=None)
      data_df.columns = ['rating', 'topic','description']
      data = data_df[['description','rating']].to_numpy()
      return data

    train_data = load_data('train.csv')
    train_data = random.choices(train_data, k = round(sample_ratio*train_data.shape[0]) )
    test_data = load_data('test.csv')
    test_data = random.choices(test_data, k = round(sample_ratio*test_data.shape[0]) )

    #train dev split
    dev_size = round(len(train_data) * dev_ratio)
    print(dev_size)
    random.shuffle(train_data)
    dev_data = train_data[:dev_size]
    train_data = train_data[dev_size:]

    instances = []
    instances += [DatasetInstance(data[0], data[1], 'train') for data in train_data]
    instances += [DatasetInstance(data[0], data[1], 'dev') for data in dev_data]
    instances += [DatasetInstance(data[0], data[1], 'test') for data in test_data]
    labels = [1,2,3,4]
    return Dataset('agnews', instances, labels)

In [None]:
!ls $pwd

README.md                             main.py
[34m__pycache__[m[m                           model.py
data.py                               optimizer.py
entity_linker.py                      requirements.in
enwiki_20180420_entity_linker.pkl     requirements.txt
enwiki_20180420_entity_linker.pkl.bz2 [34mreuters-21578[m[m
enwiki_20180420_lg1_300d.pkl          reuters21578.tar
enwiki_20180420_lg1_300d.pkl.bz2      train.py
enwiki_20180420_lg1_300d.pkl.bz2.1


In [1]:
import time 

In [2]:
t1 = time.time()
print(time.time() - t1)

5.602836608886719e-05


In [3]:
#20news group, accuracy should be around 0.86
t1 = time.time()
!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset=20ng --patience=3
print(f"Total time elapsed: {time.time() - t1}")

{'patience': 3, 'min_count': None, 'max_word_length': None, 'max_entity_length': None, 'batch_size': None, 'learning_rate': None, 'weight_decay': None, 'warmup_epochs': None, 'dropout_prob': None, 'use_gpu': False, 'use_word': True}
[2021-04-18 02:51:07,005] [INFO] Creating vocabulary... (generate_features@data.py:65)
100%|████████████████████████████████████| 18846/18846 [01:01<00:00, 307.19it/s]
epoch: 0 loss: 2.87369680: 100%|██████████████| 336/336 [02:47<00:00,  2.01it/s]
accuracy (dev): 0.2478
f-measure (dev): 0.1900
epoch: 1 loss: 2.58604074: 100%|██████████████| 336/336 [02:47<00:00,  2.01it/s]
accuracy (dev): 0.5876
f-measure (dev): 0.5260
epoch: 2 loss: 1.88835907: 100%|██████████████| 336/336 [02:48<00:00,  2.00it/s]
accuracy (dev): 0.7274
f-measure (dev): 0.6931
epoch: 3 loss: 0.95640075: 100%|██████████████| 336/336 [02:47<00:00,  2.00it/s]
accuracy (dev): 0.8566
f-measure (dev): 0.8392
epoch: 4 loss: 0.45326430: 100%|██████████████| 336/336 [02:47<00:00,  2.00it/s]
accura

In [4]:
#r8 dataset, accuracy should be around 0.97
t1 = time.time()
!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset=r8 --dataset-path=reuters-21578 --patience=3
print(f"Total time elapsed: {time.time() - t1}")

{'patience': 3, 'min_count': None, 'max_word_length': None, 'max_entity_length': None, 'batch_size': None, 'learning_rate': None, 'weight_decay': None, 'warmup_epochs': None, 'dropout_prob': None, 'use_gpu': False, 'use_word': True}
[2021-04-18 03:40:26,676] [INFO] Creating vocabulary... (generate_features@data.py:65)
100%|██████████████████████████████████████| 7674/7674 [00:09<00:00, 827.66it/s]
epoch: 0 loss: 0.66338152: 100%|██████████████| 163/163 [00:19<00:00,  8.35it/s]
accuracy (dev): 0.7372
f-measure (dev): 0.2005
epoch: 1 loss: 0.13576712: 100%|██████████████| 163/163 [00:25<00:00,  6.45it/s]
accuracy (dev): 0.9343
f-measure (dev): 0.7219
epoch: 2 loss: 0.14597020: 100%|██████████████| 163/163 [00:23<00:00,  6.92it/s]
accuracy (dev): 0.9526
f-measure (dev): 0.8994
epoch: 3 loss: 0.01007028: 100%|██████████████| 163/163 [00:23<00:00,  6.93it/s]
accuracy (dev): 0.9891
f-measure (dev): 0.9719
epoch: 4 loss: 0.01432292: 100%|██████████████| 163/163 [00:23<00:00,  6.91it/s]
accura

In [5]:
!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset=agnews --dataset-path='sentiment_datasets/ag_news_csv' --sample_ratio=0.2 --patience=3

{'patience': 3, 'min_count': None, 'max_word_length': None, 'max_entity_length': None, 'batch_size': None, 'learning_rate': None, 'weight_decay': None, 'warmup_epochs': None, 'dropout_prob': None, 'use_gpu': False, 'use_word': True}
[2021-04-18 03:57:38,251] [INFO] Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. (_init_num_threads@utils.py:129)
[2021-04-18 03:57:38,251] [INFO] NumExpr defaulting to 8 threads. (_init_num_threads@utils.py:141)
1200
[2021-04-18 03:57:39,976] [INFO] Creating vocabulary... (generate_features@data.py:65)
100%|███████████████████████████████████| 25520/25520 [00:06<00:00, 4025.51it/s]
epoch: 0 loss: 0.21575215: 100%|██████████████| 713/713 [01:12<00:00,  9.88it/s]
accuracy (dev): 0.8467
f-measure (dev): 0.8478
epoch: 1 loss: 0.14972335: 100%|██████████████| 713/713 [01:11<00:00,  9.91it/s]
accuracy (dev): 0.8833
f-measure (dev): 0.8852
epoch: 2 loss: 0.09223567: 100%|██████████████| 713/713 [01:11<00:00,  9.92i

In [7]:
!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset=agnews --dataset-path='sentiment_datasets/ag_news_csv' --sample_ratio=0.3 --patience=3 --dev-size=0.1

{'patience': 3, 'min_count': None, 'max_word_length': None, 'max_entity_length': None, 'batch_size': None, 'learning_rate': None, 'weight_decay': None, 'warmup_epochs': None, 'dropout_prob': None, 'use_gpu': False, 'use_word': True}
[2021-04-18 04:12:06,137] [INFO] Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. (_init_num_threads@utils.py:129)
[2021-04-18 04:12:06,137] [INFO] NumExpr defaulting to 8 threads. (_init_num_threads@utils.py:141)
2400
[2021-04-18 04:12:07,671] [INFO] Creating vocabulary... (generate_features@data.py:65)
100%|███████████████████████████████████| 31600/31600 [00:08<00:00, 3586.37it/s]
epoch: 0 loss: 0.62562615: 100%|██████████████| 675/675 [01:04<00:00, 10.49it/s]
accuracy (dev): 0.8450
f-measure (dev): 0.8450
epoch: 1 loss: 0.30967689: 100%|██████████████| 675/675 [01:17<00:00,  8.73it/s]
accuracy (dev): 0.8804
f-measure (dev): 0.8808
epoch: 2 loss: 0.33121398: 100%|██████████████| 675/675 [01:14<00:00,  9.04i

In [8]:
!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset=agnews --dataset-path='sentiment_datasets/ag_news_csv' --sample_ratio=0.4 --patience=3 --dev-size=0.1

{'patience': 3, 'min_count': None, 'max_word_length': None, 'max_entity_length': None, 'batch_size': None, 'learning_rate': None, 'weight_decay': None, 'warmup_epochs': None, 'dropout_prob': None, 'use_gpu': False, 'use_word': True}
[2021-04-18 04:39:13,773] [INFO] Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. (_init_num_threads@utils.py:129)
[2021-04-18 04:39:13,773] [INFO] NumExpr defaulting to 8 threads. (_init_num_threads@utils.py:141)
4800
[2021-04-18 04:39:15,490] [INFO] Creating vocabulary... (generate_features@data.py:65)
100%|███████████████████████████████████| 55600/55600 [00:15<00:00, 3481.83it/s]
epoch: 0 loss: 0.51255274: 100%|████████████| 1350/1350 [03:09<00:00,  7.11it/s]
accuracy (dev): 0.8792
f-measure (dev): 0.8788
epoch: 1 loss: 0.36382210: 100%|████████████| 1350/1350 [03:15<00:00,  6.90it/s]
accuracy (dev): 0.9031
f-measure (dev): 0.9030
epoch: 2 loss: 0.49471223: 100%|████████████| 1350/1350 [03:14<00:00,  6.95i

In [9]:
!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset=agnews --dataset-path='sentiment_datasets/ag_news_csv' --sample_ratio=0.5 --patience=3 --dev-size=0.1

{'patience': 3, 'min_count': None, 'max_word_length': None, 'max_entity_length': None, 'batch_size': None, 'learning_rate': None, 'weight_decay': None, 'warmup_epochs': None, 'dropout_prob': None, 'use_gpu': False, 'use_word': True}
[2021-04-18 04:59:35,574] [INFO] Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. (_init_num_threads@utils.py:129)
[2021-04-18 04:59:35,574] [INFO] NumExpr defaulting to 8 threads. (_init_num_threads@utils.py:141)
6000
[2021-04-18 04:59:37,260] [INFO] Creating vocabulary... (generate_features@data.py:65)
100%|███████████████████████████████████| 67600/67600 [00:17<00:00, 3919.99it/s]
epoch: 0 loss: 0.22995307: 100%|████████████| 1688/1688 [04:12<00:00,  6.68it/s]
accuracy (dev): 0.8872
f-measure (dev): 0.8874
epoch: 1 loss: 0.27880809: 100%|████████████| 1688/1688 [04:29<00:00,  6.27it/s]
accuracy (dev): 0.9068
f-measure (dev): 0.9071
epoch: 2 loss: 0.00934730: 100%|████████████| 1688/1688 [04:28<00:00,  6.30i

In [10]:
!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset=agnews --dataset-path='sentiment_datasets/ag_news_csv' --sample_ratio=0.6 --patience=3 --dev-size=0.1

{'patience': 3, 'min_count': None, 'max_word_length': None, 'max_entity_length': None, 'batch_size': None, 'learning_rate': None, 'weight_decay': None, 'warmup_epochs': None, 'dropout_prob': None, 'use_gpu': False, 'use_word': True}
[2021-04-18 05:31:34,793] [INFO] Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. (_init_num_threads@utils.py:129)
[2021-04-18 05:31:34,793] [INFO] NumExpr defaulting to 8 threads. (_init_num_threads@utils.py:141)
7200
[2021-04-18 05:31:36,411] [INFO] Creating vocabulary... (generate_features@data.py:65)
100%|███████████████████████████████████| 79600/79600 [00:19<00:00, 4072.95it/s]
epoch: 0 loss: 0.24468315: 100%|████████████| 2025/2025 [05:24<00:00,  6.23it/s]
accuracy (dev): 0.8903
f-measure (dev): 0.8904
epoch: 1 loss: 0.36283419: 100%|████████████| 2025/2025 [05:49<00:00,  5.80it/s]
accuracy (dev): 0.9185
f-measure (dev): 0.9189
epoch: 2 loss: 0.08021113: 100%|████████████| 2025/2025 [05:41<00:00,  5.92i

In [None]:
!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset=agnews --dataset-path='sentiment_datasets/ag_news_csv' --sample_ratio=0.6 --patience=3 --dev-size=0.1

In [None]:
#事实证明，用20%的training data就够了

In [None]:
#min_count=3

In [2]:
!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset=agnews --dataset-path='sentiment_datasets/ag_news_csv' --sample_ratio=0.2 --patience=3 --min-count=1

{'patience': 3, 'min_count': 1, 'max_word_length': None, 'max_entity_length': None, 'batch_size': None, 'learning_rate': None, 'weight_decay': None, 'warmup_epochs': None, 'dropout_prob': None, 'use_gpu': False, 'use_word': True}
[2021-04-18 09:06:19,164] [INFO] Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. (_init_num_threads@utils.py:129)
[2021-04-18 09:06:19,164] [INFO] NumExpr defaulting to 8 threads. (_init_num_threads@utils.py:141)
1200
[2021-04-18 09:06:21,111] [INFO] Creating vocabulary... (generate_features@data.py:65)
100%|███████████████████████████████████| 31600/31600 [00:08<00:00, 3833.76it/s]
epoch: 0 loss: 0.67812389: 100%|██████████████| 713/713 [02:40<00:00,  4.43it/s]
accuracy (dev): 0.8683
f-measure (dev): 0.8680
epoch: 1 loss: 0.11133724: 100%|██████████████| 713/713 [02:41<00:00,  4.41it/s]
accuracy (dev): 0.8967
f-measure (dev): 0.8961
epoch: 2 loss: 0.00968423: 100%|██████████████| 713/713 [02:42<00:00,  4.39it/s

In [19]:
!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset=agnews --dataset-path='sentiment_datasets/ag_news_csv' --sample_ratio=0.2 --patience=3 --min-count=5

{'patience': 3, 'min_count': 5, 'max_word_length': None, 'max_entity_length': None, 'batch_size': None, 'learning_rate': None, 'weight_decay': None, 'warmup_epochs': None, 'dropout_prob': None, 'use_gpu': False, 'use_word': True}
[2021-04-18 09:21:22,918] [INFO] Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. (_init_num_threads@utils.py:129)
[2021-04-18 09:21:22,918] [INFO] NumExpr defaulting to 8 threads. (_init_num_threads@utils.py:141)
1200
[2021-04-18 09:21:24,425] [INFO] Creating vocabulary... (generate_features@data.py:65)
100%|███████████████████████████████████| 31600/31600 [00:10<00:00, 3142.73it/s]
epoch: 0 loss: 0.95184708: 100%|██████████████| 713/713 [00:59<00:00, 12.01it/s]
accuracy (dev): 0.8575
f-measure (dev): 0.8575
epoch: 1 loss: 1.12400377: 100%|██████████████| 713/713 [01:01<00:00, 11.62it/s]
accuracy (dev): 0.8933
f-measure (dev): 0.8934
epoch: 2 loss: 0.06516819: 100%|██████████████| 713/713 [01:04<00:00, 11.10it/s

In [20]:
#max_word_length=43

In [21]:
!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset=agnews --dataset-path='sentiment_datasets/ag_news_csv' --sample_ratio=0.2 --patience=3 --max-word-length=21

{'patience': 3, 'max_word_length': 21, 'min_count': None, 'max_entity_length': None, 'batch_size': None, 'learning_rate': None, 'weight_decay': None, 'warmup_epochs': None, 'dropout_prob': None, 'use_gpu': False, 'use_word': True}
[2021-04-18 09:26:49,046] [INFO] Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. (_init_num_threads@utils.py:129)
[2021-04-18 09:26:49,046] [INFO] NumExpr defaulting to 8 threads. (_init_num_threads@utils.py:141)
1200
[2021-04-18 09:26:50,457] [INFO] Creating vocabulary... (generate_features@data.py:65)
100%|███████████████████████████████████| 31600/31600 [00:08<00:00, 3671.73it/s]
epoch: 0 loss: 0.41725725: 100%|██████████████| 713/713 [01:19<00:00,  8.97it/s]
accuracy (dev): 0.8525
f-measure (dev): 0.8521
epoch: 1 loss: 0.22154669: 100%|██████████████| 713/713 [01:19<00:00,  8.91it/s]
accuracy (dev): 0.8967
f-measure (dev): 0.8963
epoch: 2 loss: 0.27551037: 100%|██████████████| 713/713 [01:19<00:00,  8.93it/

In [22]:
!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset=agnews --dataset-path='sentiment_datasets/ag_news_csv' --sample_ratio=0.2 --patience=3 --max-word-length=85

{'patience': 3, 'max_word_length': 85, 'min_count': None, 'max_entity_length': None, 'batch_size': None, 'learning_rate': None, 'weight_decay': None, 'warmup_epochs': None, 'dropout_prob': None, 'use_gpu': False, 'use_word': True}
[2021-04-18 09:33:47,715] [INFO] Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. (_init_num_threads@utils.py:129)
[2021-04-18 09:33:47,715] [INFO] NumExpr defaulting to 8 threads. (_init_num_threads@utils.py:141)
1200
[2021-04-18 09:33:49,084] [INFO] Creating vocabulary... (generate_features@data.py:65)
100%|███████████████████████████████████| 31600/31600 [00:08<00:00, 3877.65it/s]
epoch: 0 loss: 0.39765996: 100%|██████████████| 713/713 [01:19<00:00,  9.02it/s]
accuracy (dev): 0.8483
f-measure (dev): 0.8476
epoch: 1 loss: 0.19685851: 100%|██████████████| 713/713 [01:20<00:00,  8.89it/s]
accuracy (dev): 0.8908
f-measure (dev): 0.8903
epoch: 2 loss: 0.30737793: 100%|██████████████| 713/713 [01:19<00:00,  8.94it/

In [23]:
#max_entity_length=256

In [24]:
!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset=agnews --dataset-path='sentiment_datasets/ag_news_csv' --sample_ratio=0.2 --patience=3 --max-entity-length=128

{'patience': 3, 'max_entity_length': 128, 'min_count': None, 'max_word_length': None, 'batch_size': None, 'learning_rate': None, 'weight_decay': None, 'warmup_epochs': None, 'dropout_prob': None, 'use_gpu': False, 'use_word': True}
[2021-04-18 09:42:07,309] [INFO] Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. (_init_num_threads@utils.py:129)
[2021-04-18 09:42:07,309] [INFO] NumExpr defaulting to 8 threads. (_init_num_threads@utils.py:141)
1200
[2021-04-18 09:42:08,701] [INFO] Creating vocabulary... (generate_features@data.py:65)
100%|███████████████████████████████████| 31600/31600 [00:08<00:00, 3706.85it/s]
epoch: 0 loss: 0.39748710: 100%|██████████████| 713/713 [01:06<00:00, 10.74it/s]
accuracy (dev): 0.8483
f-measure (dev): 0.8476
epoch: 1 loss: 0.19038668: 100%|██████████████| 713/713 [01:09<00:00, 10.27it/s]
accuracy (dev): 0.8933
f-measure (dev): 0.8928
epoch: 2 loss: 0.30851525: 100%|██████████████| 713/713 [01:09<00:00, 10.31it

In [25]:
!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset=agnews --dataset-path='sentiment_datasets/ag_news_csv' --sample_ratio=0.2 --patience=3 --max-entity-length=512

{'patience': 3, 'max_entity_length': 512, 'min_count': None, 'max_word_length': None, 'batch_size': None, 'learning_rate': None, 'weight_decay': None, 'warmup_epochs': None, 'dropout_prob': None, 'use_gpu': False, 'use_word': True}
[2021-04-18 09:48:07,116] [INFO] Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. (_init_num_threads@utils.py:129)
[2021-04-18 09:48:07,116] [INFO] NumExpr defaulting to 8 threads. (_init_num_threads@utils.py:141)
1200
[2021-04-18 09:48:08,490] [INFO] Creating vocabulary... (generate_features@data.py:65)
100%|███████████████████████████████████| 31600/31600 [00:08<00:00, 3915.10it/s]
epoch: 0 loss: 0.39748758: 100%|██████████████| 713/713 [01:43<00:00,  6.91it/s]
accuracy (dev): 0.8483
f-measure (dev): 0.8476
epoch: 1 loss: 0.19039702: 100%|██████████████| 713/713 [02:05<00:00,  5.70it/s]
accuracy (dev): 0.8933
f-measure (dev): 0.8928
epoch: 2 loss: 0.30850285: 100%|██████████████| 713/713 [01:43<00:00,  6.88it

In [26]:
#batch_size=32

In [39]:
!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset=agnews --dataset-path='sentiment_datasets/ag_news_csv' --sample_ratio=0.2 --patience=3 --dim-size=20 --batch-size=8

{'patience': 3, 'dim_size': 20, 'batch_size': 8, 'min_count': None, 'max_word_length': None, 'max_entity_length': None, 'learning_rate': None, 'weight_decay': None, 'warmup_epochs': None, 'dropout_prob': None, 'use_gpu': False, 'use_word': True}
[2021-04-19 05:09:30,065] [INFO] Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. (_init_num_threads@utils.py:129)
[2021-04-19 05:09:30,065] [INFO] NumExpr defaulting to 8 threads. (_init_num_threads@utils.py:141)
1200
[2021-04-19 05:09:31,536] [INFO] Creating vocabulary... (generate_features@data.py:65)
100%|███████████████████████████████████| 31600/31600 [00:09<00:00, 3419.15it/s]
epoch: 0 loss: 0.25296992: 100%|███████████| 2850/2850 [00:28<00:00, 101.52it/s]
accuracy (dev): 0.8758
f-measure (dev): 0.8760
epoch: 1 loss: 0.76977217: 100%|████████████| 2850/2850 [00:32<00:00, 87.98it/s]
accuracy (dev): 0.8967
f-measure (dev): 0.8964
epoch: 2 loss: 0.08509895: 100%|████████████| 2850/2850 [00:33<

In [37]:
!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset=agnews --dataset-path='sentiment_datasets/ag_news_csv' --sample_ratio=0.2 --patience=3 --dim-size=20 --batch-size=16

{'patience': 3, 'dim_size': 20, 'batch_size': 16, 'min_count': None, 'max_word_length': None, 'max_entity_length': None, 'learning_rate': None, 'weight_decay': None, 'warmup_epochs': None, 'dropout_prob': None, 'use_gpu': False, 'use_word': True}
[2021-04-18 22:19:19,356] [INFO] Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. (_init_num_threads@utils.py:129)
[2021-04-18 22:19:19,356] [INFO] NumExpr defaulting to 8 threads. (_init_num_threads@utils.py:141)
1200
[2021-04-18 22:19:20,588] [INFO] Creating vocabulary... (generate_features@data.py:65)
100%|███████████████████████████████████| 31600/31600 [00:08<00:00, 3829.93it/s]
epoch: 0 loss: 0.45790142: 100%|████████████| 1425/1425 [00:14<00:00, 98.41it/s]
accuracy (dev): 0.8767
f-measure (dev): 0.8766
epoch: 1 loss: 0.72059411: 100%|████████████| 1425/1425 [00:15<00:00, 90.52it/s]
accuracy (dev): 0.9000
f-measure (dev): 0.8999
epoch: 2 loss: 0.29207978: 100%|████████████| 1425/1425 [00:17

In [40]:
!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset=agnews --dataset-path='sentiment_datasets/ag_news_csv' --sample_ratio=0.2 --patience=3 --dim-size=20 --batch-size=32

{'patience': 3, 'dim_size': 20, 'batch_size': 32, 'min_count': None, 'max_word_length': None, 'max_entity_length': None, 'learning_rate': None, 'weight_decay': None, 'warmup_epochs': None, 'dropout_prob': None, 'use_gpu': False, 'use_word': True}
[2021-04-19 05:12:31,186] [INFO] Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. (_init_num_threads@utils.py:129)
[2021-04-19 05:12:31,186] [INFO] NumExpr defaulting to 8 threads. (_init_num_threads@utils.py:141)
1200
[2021-04-19 05:12:32,616] [INFO] Creating vocabulary... (generate_features@data.py:65)
100%|███████████████████████████████████| 31600/31600 [00:08<00:00, 3831.11it/s]
epoch: 0 loss: 0.61241001: 100%|██████████████| 713/713 [00:07<00:00, 91.26it/s]
accuracy (dev): 0.8692
f-measure (dev): 0.8686
epoch: 1 loss: 0.73502123: 100%|██████████████| 713/713 [00:08<00:00, 88.20it/s]
accuracy (dev): 0.8917
f-measure (dev): 0.8915
epoch: 2 loss: 0.28671446: 100%|██████████████| 713/713 [00:07

In [28]:
!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset=agnews --dataset-path='sentiment_datasets/ag_news_csv' --sample_ratio=0.2 --patience=3 --batch-size=64

{'patience': 3, 'batch_size': 64, 'min_count': None, 'max_word_length': None, 'max_entity_length': None, 'learning_rate': None, 'weight_decay': None, 'warmup_epochs': None, 'dropout_prob': None, 'use_gpu': False, 'use_word': True}
[2021-04-18 10:08:26,756] [INFO] Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. (_init_num_threads@utils.py:129)
[2021-04-18 10:08:26,757] [INFO] NumExpr defaulting to 8 threads. (_init_num_threads@utils.py:141)
1200
[2021-04-18 10:08:28,209] [INFO] Creating vocabulary... (generate_features@data.py:65)
100%|███████████████████████████████████| 31600/31600 [00:07<00:00, 4095.04it/s]
epoch: 0 loss: 0.52649426: 100%|██████████████| 357/357 [00:51<00:00,  6.88it/s]
accuracy (dev): 0.8383
f-measure (dev): 0.8376
epoch: 1 loss: 0.20958732: 100%|██████████████| 357/357 [00:57<00:00,  6.19it/s]
accuracy (dev): 0.8917
f-measure (dev): 0.8912
epoch: 2 loss: 0.34953910: 100%|██████████████| 357/357 [00:51<00:00,  6.88it/

In [41]:
!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset=agnews --dataset-path='sentiment_datasets/ag_news_csv' --sample_ratio=0.2 --patience=3 --batch-size=128

{'patience': 3, 'batch_size': 128, 'min_count': None, 'max_word_length': None, 'max_entity_length': None, 'learning_rate': None, 'weight_decay': None, 'warmup_epochs': None, 'dropout_prob': None, 'use_gpu': False, 'use_word': True, 'dim_size': 300}
[2021-04-19 05:28:33,981] [INFO] Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. (_init_num_threads@utils.py:129)
[2021-04-19 05:28:33,981] [INFO] NumExpr defaulting to 8 threads. (_init_num_threads@utils.py:141)
1200
[2021-04-19 05:28:35,402] [INFO] Creating vocabulary... (generate_features@data.py:65)
100%|███████████████████████████████████| 31600/31600 [00:08<00:00, 3600.74it/s]
epoch: 0 loss: 0.44953221: 100%|██████████████| 179/179 [00:40<00:00,  4.40it/s]
accuracy (dev): 0.8750
f-measure (dev): 0.8748
epoch: 1 loss: 0.16732767: 100%|██████████████| 179/179 [00:40<00:00,  4.47it/s]
accuracy (dev): 0.8942
f-measure (dev): 0.8937
epoch: 2 loss: 0.24385175: 100%|██████████████| 179/179 [00:

In [42]:
!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset=agnews --dataset-path='sentiment_datasets/ag_news_csv' --sample_ratio=0.2 --patience=3 --batch-size=256

{'patience': 3, 'batch_size': 256, 'min_count': None, 'max_word_length': None, 'max_entity_length': None, 'learning_rate': None, 'weight_decay': None, 'warmup_epochs': None, 'dropout_prob': None, 'use_gpu': False, 'use_word': True, 'dim_size': 300}
[2021-04-19 05:32:15,137] [INFO] Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. (_init_num_threads@utils.py:129)
[2021-04-19 05:32:15,137] [INFO] NumExpr defaulting to 8 threads. (_init_num_threads@utils.py:141)
1200
[2021-04-19 05:32:16,452] [INFO] Creating vocabulary... (generate_features@data.py:65)
100%|███████████████████████████████████| 31600/31600 [00:07<00:00, 4009.61it/s]
epoch: 0 loss: 0.83935887: 100%|████████████████| 90/90 [00:35<00:00,  2.52it/s]
accuracy (dev): 0.8600
f-measure (dev): 0.8595
epoch: 1 loss: 0.19403049: 100%|████████████████| 90/90 [00:35<00:00,  2.52it/s]
accuracy (dev): 0.8925
f-measure (dev): 0.8924
epoch: 2 loss: 0.27995002: 100%|████████████████| 90/90 [00:

In [29]:
#learning_rate=0.003

In [36]:
!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset=agnews --dataset-path='sentiment_datasets/ag_news_csv' --sample_ratio=0.2 --patience=3 --dim-size=20 --learning-rate=0.0003

{'patience': 3, 'dim_size': 20, 'learning_rate': 0.0003, 'min_count': None, 'max_word_length': None, 'max_entity_length': None, 'batch_size': None, 'weight_decay': None, 'warmup_epochs': None, 'dropout_prob': None, 'use_gpu': False, 'use_word': True}
[2021-04-18 22:17:11,250] [INFO] Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. (_init_num_threads@utils.py:129)
[2021-04-18 22:17:11,250] [INFO] NumExpr defaulting to 8 threads. (_init_num_threads@utils.py:141)
1200
[2021-04-18 22:17:12,540] [INFO] Creating vocabulary... (generate_features@data.py:65)
100%|███████████████████████████████████| 31600/31600 [00:08<00:00, 3690.70it/s]
epoch: 0 loss: 1.39465201: 100%|██████████████| 713/713 [00:07<00:00, 89.70it/s]
accuracy (dev): 0.2875
f-measure (dev): 0.1469
epoch: 1 loss: 1.14501047: 100%|██████████████| 713/713 [00:08<00:00, 86.82it/s]
accuracy (dev): 0.8483
f-measure (dev): 0.8471
epoch: 2 loss: 0.78791499: 100%|██████████████| 713/713 [0

In [30]:
!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset=agnews --dataset-path='sentiment_datasets/ag_news_csv' --sample_ratio=0.2 --patience=3 --dim-size=20 --learning-rate=0.001

{'patience': 3, 'dim_size': 20, 'learning_rate': 0.001, 'min_count': None, 'max_word_length': None, 'max_entity_length': None, 'batch_size': None, 'weight_decay': None, 'warmup_epochs': None, 'dropout_prob': None, 'use_gpu': False, 'use_word': True}
[2021-04-18 22:10:36,326] [INFO] Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. (_init_num_threads@utils.py:129)
[2021-04-18 22:10:36,326] [INFO] NumExpr defaulting to 8 threads. (_init_num_threads@utils.py:141)
1200
[2021-04-18 22:10:37,686] [INFO] Creating vocabulary... (generate_features@data.py:65)
100%|███████████████████████████████████| 31600/31600 [00:08<00:00, 3550.86it/s]
epoch: 0 loss: 1.23200202: 100%|██████████████| 713/713 [00:08<00:00, 86.66it/s]
accuracy (dev): 0.8108
f-measure (dev): 0.8096
epoch: 1 loss: 0.65674126: 100%|██████████████| 713/713 [00:08<00:00, 84.09it/s]
accuracy (dev): 0.8717
f-measure (dev): 0.8713
epoch: 2 loss: 0.37715742: 100%|██████████████| 713/713 [00

In [33]:
!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset=agnews --dataset-path='sentiment_datasets/ag_news_csv' --sample_ratio=0.2 --patience=3 --dim-size=20 --learning-rate=0.003

{'patience': 3, 'dim_size': 20, 'learning_rate': 0.003, 'min_count': None, 'max_word_length': None, 'max_entity_length': None, 'batch_size': None, 'weight_decay': None, 'warmup_epochs': None, 'dropout_prob': None, 'use_gpu': False, 'use_word': True}
[2021-04-18 22:13:57,651] [INFO] Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. (_init_num_threads@utils.py:129)
[2021-04-18 22:13:57,652] [INFO] NumExpr defaulting to 8 threads. (_init_num_threads@utils.py:141)
1200
[2021-04-18 22:13:58,873] [INFO] Creating vocabulary... (generate_features@data.py:65)
100%|███████████████████████████████████| 31600/31600 [00:07<00:00, 3973.34it/s]
epoch: 0 loss: 0.61241001: 100%|██████████████| 713/713 [00:07<00:00, 89.20it/s]
accuracy (dev): 0.8692
f-measure (dev): 0.8686
epoch: 1 loss: 0.73502123: 100%|██████████████| 713/713 [00:08<00:00, 87.37it/s]
accuracy (dev): 0.8917
f-measure (dev): 0.8915
epoch: 2 loss: 0.28671446: 100%|██████████████| 713/713 [00

In [31]:
!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset=agnews --dataset-path='sentiment_datasets/ag_news_csv' --sample_ratio=0.2 --patience=3 --dim-size=20 --learning-rate=0.01

{'patience': 3, 'dim_size': 20, 'learning_rate': 0.01, 'min_count': None, 'max_word_length': None, 'max_entity_length': None, 'batch_size': None, 'weight_decay': None, 'warmup_epochs': None, 'dropout_prob': None, 'use_gpu': False, 'use_word': True}
[2021-04-18 22:11:59,517] [INFO] Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. (_init_num_threads@utils.py:129)
[2021-04-18 22:11:59,518] [INFO] NumExpr defaulting to 8 threads. (_init_num_threads@utils.py:141)
1200
[2021-04-18 22:12:00,765] [INFO] Creating vocabulary... (generate_features@data.py:65)
100%|███████████████████████████████████| 31600/31600 [00:08<00:00, 3636.88it/s]
epoch: 0 loss: 0.33002666: 100%|██████████████| 713/713 [00:08<00:00, 86.96it/s]
accuracy (dev): 0.8842
f-measure (dev): 0.8844
epoch: 1 loss: 0.62666595: 100%|██████████████| 713/713 [00:08<00:00, 85.15it/s]
accuracy (dev): 0.8892
f-measure (dev): 0.8885
epoch: 2 loss: 0.20031598: 100%|██████████████| 713/713 [00:

In [38]:
!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset=agnews --dataset-path='sentiment_datasets/ag_news_csv' --sample_ratio=0.2 --patience=3  --dim-size=20 --learning-rate=0.03

{'patience': 3, 'dim_size': 20, 'learning_rate': 0.03, 'min_count': None, 'max_word_length': None, 'max_entity_length': None, 'batch_size': None, 'weight_decay': None, 'warmup_epochs': None, 'dropout_prob': None, 'use_gpu': False, 'use_word': True}
[2021-04-19 04:55:16,136] [INFO] Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. (_init_num_threads@utils.py:129)
[2021-04-19 04:55:16,137] [INFO] NumExpr defaulting to 8 threads. (_init_num_threads@utils.py:141)
1200
[2021-04-19 04:55:18,002] [INFO] Creating vocabulary... (generate_features@data.py:65)
100%|███████████████████████████████████| 31600/31600 [00:08<00:00, 3525.84it/s]
epoch: 0 loss: 0.29083192: 100%|██████████████| 713/713 [00:07<00:00, 89.65it/s]
accuracy (dev): 0.8700
f-measure (dev): 0.8698
epoch: 1 loss: 0.57669520: 100%|██████████████| 713/713 [00:08<00:00, 89.09it/s]
accuracy (dev): 0.8775
f-measure (dev): 0.8773
epoch: 2 loss: 0.07009823: 100%|██████████████| 713/713 [00:

In [34]:
!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset=agnews --dataset-path='sentiment_datasets/ag_news_csv' --sample_ratio=0.2 --patience=3  --dim-size=20 --learning-rate=0.1

{'patience': 3, 'dim_size': 20, 'learning_rate': 0.1, 'min_count': None, 'max_word_length': None, 'max_entity_length': None, 'batch_size': None, 'weight_decay': None, 'warmup_epochs': None, 'dropout_prob': None, 'use_gpu': False, 'use_word': True}
[2021-04-18 22:15:02,322] [INFO] Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. (_init_num_threads@utils.py:129)
[2021-04-18 22:15:02,323] [INFO] NumExpr defaulting to 8 threads. (_init_num_threads@utils.py:141)
1200
[2021-04-18 22:15:03,681] [INFO] Creating vocabulary... (generate_features@data.py:65)
100%|███████████████████████████████████| 31600/31600 [00:09<00:00, 3390.14it/s]
epoch: 0 loss: 0.18787357: 100%|██████████████| 713/713 [00:07<00:00, 90.68it/s]
accuracy (dev): 0.8508
f-measure (dev): 0.8496
epoch: 1 loss: 1.00940502: 100%|██████████████| 713/713 [00:07<00:00, 89.50it/s]
accuracy (dev): 0.8383
f-measure (dev): 0.8380
epoch: 2 loss: 0.40640652: 100%|██████████████| 713/713 [00:0

In [35]:
!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset=agnews --dataset-path='sentiment_datasets/ag_news_csv' --sample_ratio=0.2 --patience=3  --dim-size=20 --learning-rate=0.3

{'patience': 3, 'dim_size': 20, 'learning_rate': 0.3, 'min_count': None, 'max_word_length': None, 'max_entity_length': None, 'batch_size': None, 'weight_decay': None, 'warmup_epochs': None, 'dropout_prob': None, 'use_gpu': False, 'use_word': True}
[2021-04-18 22:16:24,117] [INFO] Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. (_init_num_threads@utils.py:129)
[2021-04-18 22:16:24,117] [INFO] NumExpr defaulting to 8 threads. (_init_num_threads@utils.py:141)
1200
[2021-04-18 22:16:25,290] [INFO] Creating vocabulary... (generate_features@data.py:65)
100%|███████████████████████████████████| 31600/31600 [00:07<00:00, 4276.68it/s]
epoch: 0 loss: 0.40930349: 100%|██████████████| 713/713 [00:07<00:00, 93.40it/s]
accuracy (dev): 0.7992
f-measure (dev): 0.7976
epoch: 1 loss: 4.57277536: 100%|██████████████| 713/713 [00:08<00:00, 87.52it/s]
accuracy (dev): 0.7983
f-measure (dev): 0.7982
epoch: 2 loss: 2.88950706: 100%|██████████████| 713/713 [00:0

In [None]:
!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset=agnews --dataset-path='sentiment_datasets/ag_news_csv' --sample_ratio=0.3 --dev-size=0.33 --patience=7 --dim-size=10

In [2]:
#warmup_epochs=2

In [16]:
!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset=agnews --dataset-path='sentiment_datasets/ag_news_csv' --sample_ratio=0.3 --dev-size=0.33 --patience=3 --dim-size=20 --warmup-epochs=0

{'patience': 3, 'dim_size': 20, 'warmup_epochs': 0, 'min_count': None, 'max_word_length': None, 'max_entity_length': None, 'batch_size': None, 'learning_rate': None, 'weight_decay': None, 'dropout_prob': None, 'use_gpu': False, 'use_word': True}
[2021-04-19 06:54:41,968] [INFO] Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. (_init_num_threads@utils.py:129)
[2021-04-19 06:54:41,968] [INFO] NumExpr defaulting to 8 threads. (_init_num_threads@utils.py:141)
11880
[2021-04-19 06:54:43,333] [INFO] Creating vocabulary... (generate_features@data.py:65)
100%|███████████████████████████████████| 43600/43600 [00:11<00:00, 3813.97it/s]
epoch: 0 loss: 0.36341110: 100%|██████████████| 754/754 [00:08<00:00, 86.58it/s]
accuracy (dev): 0.8760
f-measure (dev): 0.8758
epoch: 1 loss: 0.34367085: 100%|██████████████| 754/754 [00:08<00:00, 87.30it/s]
accuracy (dev): 0.8843
f-measure (dev): 0.8844
epoch: 2 loss: 0.26877916: 100%|██████████████| 754/754 [00:08

In [17]:
!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset=agnews --dataset-path='sentiment_datasets/ag_news_csv' --sample_ratio=0.3 --dev-size=0.33 --patience=3 --dim-size=20 --warmup-epochs=1

{'patience': 3, 'dim_size': 20, 'warmup_epochs': 1, 'min_count': None, 'max_word_length': None, 'max_entity_length': None, 'batch_size': None, 'learning_rate': None, 'weight_decay': None, 'dropout_prob': None, 'use_gpu': False, 'use_word': True}
[2021-04-19 06:55:58,204] [INFO] Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. (_init_num_threads@utils.py:129)
[2021-04-19 06:55:58,204] [INFO] NumExpr defaulting to 8 threads. (_init_num_threads@utils.py:141)
11880
[2021-04-19 06:55:59,413] [INFO] Creating vocabulary... (generate_features@data.py:65)
100%|███████████████████████████████████| 43600/43600 [00:09<00:00, 4725.41it/s]
epoch: 0 loss: 0.49007526: 100%|██████████████| 754/754 [00:08<00:00, 88.61it/s]
accuracy (dev): 0.8682
f-measure (dev): 0.8680
epoch: 1 loss: 0.50555015: 100%|██████████████| 754/754 [00:08<00:00, 85.57it/s]
accuracy (dev): 0.8845
f-measure (dev): 0.8846
epoch: 2 loss: 0.29995775: 100%|██████████████| 754/754 [00:08

In [18]:
!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset=agnews --dataset-path='sentiment_datasets/ag_news_csv' --sample_ratio=0.2 --patience=3 --dim-size=20 --warmup-epochs=2

{'patience': 3, 'dim_size': 20, 'warmup_epochs': 2, 'min_count': None, 'max_word_length': None, 'max_entity_length': None, 'batch_size': None, 'learning_rate': None, 'weight_decay': None, 'dropout_prob': None, 'use_gpu': False, 'use_word': True}
[2021-04-19 06:57:14,215] [INFO] Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. (_init_num_threads@utils.py:129)
[2021-04-19 06:57:14,215] [INFO] NumExpr defaulting to 8 threads. (_init_num_threads@utils.py:141)
1200
[2021-04-19 06:57:15,436] [INFO] Creating vocabulary... (generate_features@data.py:65)
100%|███████████████████████████████████| 31600/31600 [00:07<00:00, 4165.14it/s]
epoch: 0 loss: 0.61241001: 100%|██████████████| 713/713 [00:07<00:00, 92.28it/s]
accuracy (dev): 0.8692
f-measure (dev): 0.8686
epoch: 1 loss: 0.73502123: 100%|██████████████| 713/713 [00:08<00:00, 88.79it/s]
accuracy (dev): 0.8917
f-measure (dev): 0.8915
epoch: 2 loss: 0.28671446: 100%|██████████████| 713/713 [00:08<

In [19]:
!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset=agnews --dataset-path='sentiment_datasets/ag_news_csv' --sample_ratio=0.2 --patience=3 --dim-size=20 --warmup-epochs=4

{'patience': 3, 'dim_size': 20, 'warmup_epochs': 4, 'min_count': None, 'max_word_length': None, 'max_entity_length': None, 'batch_size': None, 'learning_rate': None, 'weight_decay': None, 'dropout_prob': None, 'use_gpu': False, 'use_word': True}
[2021-04-19 06:58:17,565] [INFO] Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. (_init_num_threads@utils.py:129)
[2021-04-19 06:58:17,566] [INFO] NumExpr defaulting to 8 threads. (_init_num_threads@utils.py:141)
1200
[2021-04-19 06:58:18,784] [INFO] Creating vocabulary... (generate_features@data.py:65)
100%|███████████████████████████████████| 31600/31600 [00:08<00:00, 3947.68it/s]
epoch: 0 loss: 1.03458762: 100%|██████████████| 713/713 [00:07<00:00, 93.45it/s]
accuracy (dev): 0.8500
f-measure (dev): 0.8488
epoch: 1 loss: 0.67376113: 100%|██████████████| 713/713 [00:08<00:00, 88.44it/s]
accuracy (dev): 0.8833
f-measure (dev): 0.8831
epoch: 2 loss: 0.32969204: 100%|██████████████| 713/713 [00:08<

In [20]:
!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset=agnews --dataset-path='sentiment_datasets/ag_news_csv' --sample_ratio=0.2 --patience=3 --dim-size=20 --warmup-epochs=8

{'patience': 3, 'dim_size': 20, 'warmup_epochs': 8, 'min_count': None, 'max_word_length': None, 'max_entity_length': None, 'batch_size': None, 'learning_rate': None, 'weight_decay': None, 'dropout_prob': None, 'use_gpu': False, 'use_word': True}
[2021-04-19 06:59:21,285] [INFO] Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. (_init_num_threads@utils.py:129)
[2021-04-19 06:59:21,285] [INFO] NumExpr defaulting to 8 threads. (_init_num_threads@utils.py:141)
1200
[2021-04-19 06:59:22,617] [INFO] Creating vocabulary... (generate_features@data.py:65)
100%|███████████████████████████████████| 31600/31600 [00:07<00:00, 4060.41it/s]
epoch: 0 loss: 1.31058002: 100%|██████████████| 713/713 [00:07<00:00, 91.19it/s]
accuracy (dev): 0.6783
f-measure (dev): 0.6657
epoch: 1 loss: 0.69626158: 100%|██████████████| 713/713 [00:08<00:00, 88.49it/s]
accuracy (dev): 0.8750
f-measure (dev): 0.8745
epoch: 2 loss: 0.40284395: 100%|██████████████| 713/713 [00:08<

In [21]:
!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset=agnews --dataset-path='sentiment_datasets/ag_news_csv' --sample_ratio=0.2 --patience=3 --dim-size=20 --warmup-epochs=16

{'patience': 3, 'dim_size': 20, 'warmup_epochs': 16, 'min_count': None, 'max_word_length': None, 'max_entity_length': None, 'batch_size': None, 'learning_rate': None, 'weight_decay': None, 'dropout_prob': None, 'use_gpu': False, 'use_word': True}
[2021-04-19 07:00:42,949] [INFO] Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. (_init_num_threads@utils.py:129)
[2021-04-19 07:00:42,950] [INFO] NumExpr defaulting to 8 threads. (_init_num_threads@utils.py:141)
1200
[2021-04-19 07:00:44,235] [INFO] Creating vocabulary... (generate_features@data.py:65)
100%|███████████████████████████████████| 31600/31600 [00:07<00:00, 3992.63it/s]
epoch: 0 loss: 1.38557351: 100%|██████████████| 713/713 [00:07<00:00, 92.75it/s]
accuracy (dev): 0.3283
f-measure (dev): 0.2106
epoch: 1 loss: 1.03833508: 100%|██████████████| 713/713 [00:08<00:00, 88.08it/s]
accuracy (dev): 0.8608
f-measure (dev): 0.8598
epoch: 2 loss: 0.58301306: 100%|██████████████| 713/713 [00:08

In [24]:
!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset=agnews --dataset-path='sentiment_datasets/ag_news_csv' --sample_ratio=0.2 --patience=3 --dim-size=20 --warmup-epochs=32

{'patience': 3, 'dim_size': 20, 'warmup_epochs': 32, 'min_count': None, 'max_word_length': None, 'max_entity_length': None, 'batch_size': None, 'learning_rate': None, 'weight_decay': None, 'dropout_prob': None, 'use_gpu': False, 'use_word': True}
[2021-04-19 07:17:02,343] [INFO] Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. (_init_num_threads@utils.py:129)
[2021-04-19 07:17:02,343] [INFO] NumExpr defaulting to 8 threads. (_init_num_threads@utils.py:141)
1200
[2021-04-19 07:17:03,645] [INFO] Creating vocabulary... (generate_features@data.py:65)
100%|███████████████████████████████████| 31600/31600 [00:08<00:00, 3770.99it/s]
epoch: 0 loss: 1.40541923: 100%|██████████████| 713/713 [00:07<00:00, 89.96it/s]
accuracy (dev): 0.2675
f-measure (dev): 0.1071
epoch: 1 loss: 1.27761734: 100%|██████████████| 713/713 [00:08<00:00, 87.30it/s]
accuracy (dev): 0.6733
f-measure (dev): 0.6597
epoch: 2 loss: 1.01901221: 100%|██████████████| 713/713 [00:08

In [5]:
#dropout_prob=0.4

In [8]:
!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset=agnews --dataset-path='sentiment_datasets/ag_news_csv' --sample_ratio=0.2 --patience=3 --dropout-prob=0.2

{'patience': 3, 'dropout_prob': 0.2, 'min_count': None, 'max_word_length': None, 'max_entity_length': None, 'batch_size': None, 'learning_rate': None, 'weight_decay': None, 'warmup_epochs': None, 'use_gpu': False, 'use_word': True}
[2021-04-18 17:12:03,754] [INFO] Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. (_init_num_threads@utils.py:129)
[2021-04-18 17:12:03,754] [INFO] NumExpr defaulting to 8 threads. (_init_num_threads@utils.py:141)
1200
[2021-04-18 17:12:05,048] [INFO] Creating vocabulary... (generate_features@data.py:65)
100%|███████████████████████████████████| 31600/31600 [00:08<00:00, 3589.20it/s]
epoch: 0 loss: 0.37532973: 100%|██████████████| 713/713 [01:18<00:00,  9.03it/s]
accuracy (dev): 0.8558
f-measure (dev): 0.8552
epoch: 1 loss: 0.20480226: 100%|██████████████| 713/713 [01:18<00:00,  9.07it/s]
accuracy (dev): 0.8942
f-measure (dev): 0.8939
epoch: 2 loss: 0.19900559: 100%|██████████████| 713/713 [01:19<00:00,  8.99it

In [9]:
!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset=agnews --dataset-path='sentiment_datasets/ag_news_csv' --sample_ratio=0.2 --patience=3 --dropout-prob=0.8

{'patience': 3, 'dropout_prob': 0.8, 'min_count': None, 'max_word_length': None, 'max_entity_length': None, 'batch_size': None, 'learning_rate': None, 'weight_decay': None, 'warmup_epochs': None, 'use_gpu': False, 'use_word': True}
[2021-04-18 17:18:59,141] [INFO] Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. (_init_num_threads@utils.py:129)
[2021-04-18 17:18:59,141] [INFO] NumExpr defaulting to 8 threads. (_init_num_threads@utils.py:141)
1200
[2021-04-18 17:19:00,428] [INFO] Creating vocabulary... (generate_features@data.py:65)
100%|███████████████████████████████████| 31600/31600 [00:08<00:00, 3917.79it/s]
epoch: 0 loss: 0.62678760: 100%|██████████████| 713/713 [01:18<00:00,  9.13it/s]
accuracy (dev): 0.8242
f-measure (dev): 0.8234
epoch: 1 loss: 0.28783217: 100%|██████████████| 713/713 [01:19<00:00,  9.02it/s]
accuracy (dev): 0.8742
f-measure (dev): 0.8737
epoch: 2 loss: 0.44036064: 100%|██████████████| 713/713 [01:18<00:00,  9.10it

In [28]:
!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset=agnews --dataset-path='sentiment_datasets/ag_news_csv' --sample_ratio=0.3 --dev-size=0.33 --patience=3 --dropout-prob=0.95 --dim-size=20

{'patience': 3, 'dropout_prob': 0.95, 'dim_size': 20, 'min_count': None, 'max_word_length': None, 'max_entity_length': None, 'batch_size': None, 'learning_rate': None, 'weight_decay': None, 'warmup_epochs': None, 'use_gpu': False, 'use_word': True}
[2021-04-18 21:57:44,415] [INFO] Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. (_init_num_threads@utils.py:129)
[2021-04-18 21:57:44,415] [INFO] NumExpr defaulting to 8 threads. (_init_num_threads@utils.py:141)
11880
[2021-04-18 21:57:45,740] [INFO] Creating vocabulary... (generate_features@data.py:65)
100%|███████████████████████████████████| 43600/43600 [00:12<00:00, 3550.20it/s]
epoch: 0 loss: 1.27766168: 100%|██████████████| 754/754 [00:09<00:00, 80.84it/s]
accuracy (dev): 0.7878
f-measure (dev): 0.7869
epoch: 1 loss: 1.17743349: 100%|██████████████| 754/754 [00:09<00:00, 80.61it/s]
accuracy (dev): 0.8348
f-measure (dev): 0.8349
epoch: 2 loss: 1.01345861: 100%|██████████████| 754/754 [00

In [None]:
#test the effect of using 10% of all data as dev set

In [10]:
!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset=agnews --dataset-path='sentiment_datasets/ag_news_csv' --sample_ratio=0.3 --dev-size=0.33 --patience=3

{'patience': 3, 'min_count': None, 'max_word_length': None, 'max_entity_length': None, 'batch_size': None, 'learning_rate': None, 'weight_decay': None, 'warmup_epochs': None, 'dropout_prob': None, 'use_gpu': False, 'use_word': True}
[2021-04-18 17:28:30,315] [INFO] Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. (_init_num_threads@utils.py:129)
[2021-04-18 17:28:30,315] [INFO] NumExpr defaulting to 8 threads. (_init_num_threads@utils.py:141)
11880
[2021-04-18 17:28:31,547] [INFO] Creating vocabulary... (generate_features@data.py:65)
100%|███████████████████████████████████| 43600/43600 [00:10<00:00, 4191.76it/s]
epoch: 0 loss: 0.27541691: 100%|██████████████| 754/754 [01:30<00:00,  8.30it/s]
accuracy (dev): 0.8519
f-measure (dev): 0.8518
epoch: 1 loss: 0.22377980: 100%|██████████████| 754/754 [01:35<00:00,  7.91it/s]
accuracy (dev): 0.8846
f-measure (dev): 0.8847
epoch: 2 loss: 0.07205369: 100%|██████████████| 754/754 [01:34<00:00,  7.94

In [None]:
#test the effect of embedding dimension (dim_size)
#conclusion: dimension=10 is enough

In [14]:
!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset=agnews --dataset-path='sentiment_datasets/ag_news_csv' --sample_ratio=0.3 --dev-size=0.33 --patience=3 --dim-size=300

{'patience': 3, 'dim_size': 300, 'min_count': None, 'max_word_length': None, 'max_entity_length': None, 'batch_size': None, 'learning_rate': None, 'weight_decay': None, 'warmup_epochs': None, 'dropout_prob': None, 'use_gpu': False, 'use_word': True}
[2021-04-18 19:05:35,377] [INFO] Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. (_init_num_threads@utils.py:129)
[2021-04-18 19:05:35,377] [INFO] NumExpr defaulting to 8 threads. (_init_num_threads@utils.py:141)
11880
[2021-04-18 19:05:36,666] [INFO] Creating vocabulary... (generate_features@data.py:65)
100%|███████████████████████████████████| 43600/43600 [00:10<00:00, 4090.30it/s]
epoch: 0 loss: 0.27541691: 100%|██████████████| 754/754 [01:35<00:00,  7.90it/s]
accuracy (dev): 0.8519
f-measure (dev): 0.8518
epoch: 1 loss: 0.22377980: 100%|██████████████| 754/754 [01:36<00:00,  7.82it/s]
accuracy (dev): 0.8846
f-measure (dev): 0.8847
epoch: 2 loss: 0.07205369: 100%|██████████████| 754/754 [0

In [17]:
!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset=agnews --dataset-path='sentiment_datasets/ag_news_csv' --sample_ratio=0.3 --dev-size=0.33 --patience=3 --dim-size=100

{'patience': 3, 'dim_size': 100, 'min_count': None, 'max_word_length': None, 'max_entity_length': None, 'batch_size': None, 'learning_rate': None, 'weight_decay': None, 'warmup_epochs': None, 'dropout_prob': None, 'use_gpu': False, 'use_word': True}
[2021-04-18 19:23:50,312] [INFO] Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. (_init_num_threads@utils.py:129)
[2021-04-18 19:23:50,312] [INFO] NumExpr defaulting to 8 threads. (_init_num_threads@utils.py:141)
11880
[2021-04-18 19:23:51,606] [INFO] Creating vocabulary... (generate_features@data.py:65)
100%|███████████████████████████████████| 43600/43600 [00:10<00:00, 4170.89it/s]
epoch: 0 loss: 0.24819005: 100%|██████████████| 754/754 [00:34<00:00, 21.72it/s]
accuracy (dev): 0.8662
f-measure (dev): 0.8660
epoch: 1 loss: 0.09105700: 100%|██████████████| 754/754 [00:33<00:00, 22.47it/s]
accuracy (dev): 0.8864
f-measure (dev): 0.8864
epoch: 2 loss: 0.25775036: 100%|██████████████| 754/754 [0

In [21]:

!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset=agnews --dataset-path='sentiment_datasets/ag_news_csv' --sample_ratio=0.3 --dev-size=0.33 --patience=3 --dim-size=20

{'patience': 3, 'dim_size': 20, 'min_count': None, 'max_word_length': None, 'max_entity_length': None, 'batch_size': None, 'learning_rate': None, 'weight_decay': None, 'warmup_epochs': None, 'dropout_prob': None, 'use_gpu': False, 'use_word': True}
[2021-04-18 21:05:35,404] [INFO] Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. (_init_num_threads@utils.py:129)
[2021-04-18 21:05:35,404] [INFO] NumExpr defaulting to 8 threads. (_init_num_threads@utils.py:141)
11880
[2021-04-18 21:05:36,762] [INFO] Creating vocabulary... (generate_features@data.py:65)
100%|███████████████████████████████████| 43600/43600 [00:10<00:00, 4024.08it/s]
epoch: 0 loss: 0.72138524: 100%|██████████████| 754/754 [00:08<00:00, 84.65it/s]
accuracy (dev): 0.8514
f-measure (dev): 0.8510
epoch: 1 loss: 0.55409890: 100%|██████████████| 754/754 [00:09<00:00, 81.93it/s]
accuracy (dev): 0.8799
f-measure (dev): 0.8798
epoch: 2 loss: 0.35931602: 100%|██████████████| 754/754 [00

In [18]:
!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset=agnews --dataset-path='sentiment_datasets/ag_news_csv' --sample_ratio=0.3 --dev-size=0.33 --patience=3 --dim-size=10

{'patience': 3, 'dim_size': 10, 'min_count': None, 'max_word_length': None, 'max_entity_length': None, 'batch_size': None, 'learning_rate': None, 'weight_decay': None, 'warmup_epochs': None, 'dropout_prob': None, 'use_gpu': False, 'use_word': True}
[2021-04-18 19:51:28,122] [INFO] Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. (_init_num_threads@utils.py:129)
[2021-04-18 19:51:28,123] [INFO] NumExpr defaulting to 8 threads. (_init_num_threads@utils.py:141)
11880
[2021-04-18 19:51:29,409] [INFO] Creating vocabulary... (generate_features@data.py:65)
100%|███████████████████████████████████| 43600/43600 [00:10<00:00, 4250.16it/s]
epoch: 0 loss: 0.74027365: 100%|█████████████| 754/754 [00:07<00:00, 100.71it/s]
accuracy (dev): 0.8449
f-measure (dev): 0.8444
epoch: 1 loss: 0.15134956: 100%|██████████████| 754/754 [00:07<00:00, 95.28it/s]
accuracy (dev): 0.8763
f-measure (dev): 0.8762
epoch: 2 loss: 0.08319517: 100%|██████████████| 754/754 [00

In [20]:
!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset=agnews --dataset-path='sentiment_datasets/ag_news_csv' --sample_ratio=0.3 --dev-size=0.33 --patience=3 --dim-size=5

{'patience': 3, 'dim_size': 5, 'min_count': None, 'max_word_length': None, 'max_entity_length': None, 'batch_size': None, 'learning_rate': None, 'weight_decay': None, 'warmup_epochs': None, 'dropout_prob': None, 'use_gpu': False, 'use_word': True}
[2021-04-18 20:14:54,517] [INFO] Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. (_init_num_threads@utils.py:129)
[2021-04-18 20:14:54,517] [INFO] NumExpr defaulting to 8 threads. (_init_num_threads@utils.py:141)
11880
[2021-04-18 20:14:56,057] [INFO] Creating vocabulary... (generate_features@data.py:65)
100%|███████████████████████████████████| 43600/43600 [00:11<00:00, 3716.81it/s]
epoch: 0 loss: 0.95982498: 100%|█████████████| 754/754 [00:07<00:00, 107.11it/s]
accuracy (dev): 0.8396
f-measure (dev): 0.8389
epoch: 1 loss: 0.58879846: 100%|█████████████| 754/754 [00:07<00:00, 104.32it/s]
accuracy (dev): 0.8746
f-measure (dev): 0.8745
epoch: 2 loss: 0.24550717: 100%|█████████████| 754/754 [00:0

In [19]:
!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset=agnews --dataset-path='sentiment_datasets/ag_news_csv' --sample_ratio=0.3 --dev-size=0.33 --patience=3 --dim-size=2

{'patience': 3, 'dim_size': 2, 'min_count': None, 'max_word_length': None, 'max_entity_length': None, 'batch_size': None, 'learning_rate': None, 'weight_decay': None, 'warmup_epochs': None, 'dropout_prob': None, 'use_gpu': False, 'use_word': True}
[2021-04-18 20:13:03,787] [INFO] Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. (_init_num_threads@utils.py:129)
[2021-04-18 20:13:03,787] [INFO] NumExpr defaulting to 8 threads. (_init_num_threads@utils.py:141)
11880
[2021-04-18 20:13:05,095] [INFO] Creating vocabulary... (generate_features@data.py:65)
100%|███████████████████████████████████| 43600/43600 [00:10<00:00, 4036.79it/s]
epoch: 0 loss: 1.29236352: 100%|██████████████| 754/754 [00:07<00:00, 96.11it/s]
accuracy (dev): 0.6465
f-measure (dev): 0.6451
epoch: 1 loss: 0.79416919: 100%|██████████████| 754/754 [00:07<00:00, 94.45it/s]
accuracy (dev): 0.7620
f-measure (dev): 0.7638
epoch: 2 loss: 0.71892554: 100%|██████████████| 754/754 [00:

In [22]:
!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset=agnews --dataset-path='sentiment_datasets/ag_news_csv' --sample_ratio=0.3 --dev-size=0.33 --patience=3 --dim-size=1

{'patience': 3, 'dim_size': 1, 'min_count': None, 'max_word_length': None, 'max_entity_length': None, 'batch_size': None, 'learning_rate': None, 'weight_decay': None, 'warmup_epochs': None, 'dropout_prob': None, 'use_gpu': False, 'use_word': True}
[2021-04-18 21:13:51,745] [INFO] Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. (_init_num_threads@utils.py:129)
[2021-04-18 21:13:51,746] [INFO] NumExpr defaulting to 8 threads. (_init_num_threads@utils.py:141)
11880
[2021-04-18 21:13:53,088] [INFO] Creating vocabulary... (generate_features@data.py:65)
100%|███████████████████████████████████| 43600/43600 [00:11<00:00, 3902.51it/s]
epoch: 0 loss: 1.30217612: 100%|█████████████| 754/754 [00:06<00:00, 123.28it/s]
accuracy (dev): 0.4295
f-measure (dev): 0.2911
epoch: 1 loss: 0.91083628: 100%|█████████████| 754/754 [00:06<00:00, 119.65it/s]
accuracy (dev): 0.5133
f-measure (dev): 0.4638
epoch: 2 loss: 1.24727368: 100%|█████████████| 754/754 [00:0

In [None]:
#test patience
#conclusion: patience doesn't have any effect

In [46]:
#test weight-decay
!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset=agnews --dataset-path='sentiment_datasets/ag_news_csv' --sample_ratio=0.3 --dev-size=0.33 --patience=3 --dim-size=20 --weight-decay=0

{'patience': 3, 'dim_size': 30, 'weight_decay': 0.0, 'min_count': None, 'max_word_length': None, 'max_entity_length': None, 'batch_size': None, 'learning_rate': None, 'warmup_epochs': None, 'dropout_prob': None, 'use_gpu': False, 'use_word': True}
[2021-04-19 05:53:30,209] [INFO] Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. (_init_num_threads@utils.py:129)
[2021-04-19 05:53:30,209] [INFO] NumExpr defaulting to 8 threads. (_init_num_threads@utils.py:141)
11880
[2021-04-19 05:53:31,667] [INFO] Creating vocabulary... (generate_features@data.py:65)
100%|███████████████████████████████████| 43600/43600 [00:11<00:00, 3707.38it/s]
epoch: 0 loss: 0.48876694: 100%|██████████████| 754/754 [00:11<00:00, 63.27it/s]
accuracy (dev): 0.8562
f-measure (dev): 0.8557
epoch: 1 loss: 0.14257501: 100%|██████████████| 754/754 [00:11<00:00, 63.83it/s]
accuracy (dev): 0.8871
f-measure (dev): 0.8872
epoch: 2 loss: 0.04594561: 100%|██████████████| 754/754 [00:

In [26]:
!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset=agnews --dataset-path='sentiment_datasets/ag_news_csv' --sample_ratio=0.3 --dev-size=0.33 --patience=7 --dim-size=20

{'patience': 7, 'dim_size': 30, 'min_count': None, 'max_word_length': None, 'max_entity_length': None, 'batch_size': None, 'learning_rate': None, 'weight_decay': None, 'warmup_epochs': None, 'dropout_prob': None, 'use_gpu': False, 'use_word': True}
[2021-04-18 21:46:13,087] [INFO] Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. (_init_num_threads@utils.py:129)
[2021-04-18 21:46:13,087] [INFO] NumExpr defaulting to 8 threads. (_init_num_threads@utils.py:141)
11880
[2021-04-18 21:46:14,432] [INFO] Creating vocabulary... (generate_features@data.py:65)
100%|███████████████████████████████████| 43600/43600 [00:10<00:00, 4081.58it/s]
epoch: 0 loss: 0.48916975: 100%|██████████████| 754/754 [00:11<00:00, 63.69it/s]
accuracy (dev): 0.8561
f-measure (dev): 0.8556
epoch: 1 loss: 0.14455046: 100%|██████████████| 754/754 [00:11<00:00, 64.55it/s]
accuracy (dev): 0.8870
f-measure (dev): 0.8871
epoch: 2 loss: 0.04804389: 100%|██████████████| 754/754 [00

In [24]:
!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset=agnews --dataset-path='sentiment_datasets/ag_news_csv' --sample_ratio=0.3 --dev-size=0.33 --patience=15 --dim-size=20

{'patience': 15, 'dim_size': 10, 'min_count': None, 'max_word_length': None, 'max_entity_length': None, 'batch_size': None, 'learning_rate': None, 'weight_decay': None, 'warmup_epochs': None, 'dropout_prob': None, 'use_gpu': False, 'use_word': True}
[2021-04-18 21:17:49,943] [INFO] Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. (_init_num_threads@utils.py:129)
[2021-04-18 21:17:49,943] [INFO] NumExpr defaulting to 8 threads. (_init_num_threads@utils.py:141)
11880
[2021-04-18 21:17:51,362] [INFO] Creating vocabulary... (generate_features@data.py:65)
100%|███████████████████████████████████| 43600/43600 [00:11<00:00, 3648.51it/s]
epoch: 0 loss: 0.74027365: 100%|█████████████| 754/754 [00:07<00:00, 100.07it/s]
accuracy (dev): 0.8449
f-measure (dev): 0.8444
epoch: 1 loss: 0.15134956: 100%|██████████████| 754/754 [00:07<00:00, 94.72it/s]
accuracy (dev): 0.8763
f-measure (dev): 0.8762
epoch: 2 loss: 0.08319517: 100%|██████████████| 754/754 [0

In [23]:
!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset=agnews --dataset-path='sentiment_datasets/ag_news_csv' --sample_ratio=0.3 --dev-size=0.33 --patience=30 --dim-size=20

{'patience': 30, 'dim_size': 20, 'min_count': None, 'max_word_length': None, 'max_entity_length': None, 'batch_size': None, 'learning_rate': None, 'weight_decay': None, 'warmup_epochs': None, 'dropout_prob': None, 'use_gpu': False, 'use_word': True}
[2021-04-19 07:10:30,661] [INFO] Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. (_init_num_threads@utils.py:129)
[2021-04-19 07:10:30,662] [INFO] NumExpr defaulting to 8 threads. (_init_num_threads@utils.py:141)
11880
[2021-04-19 07:10:31,915] [INFO] Creating vocabulary... (generate_features@data.py:65)
100%|███████████████████████████████████| 43600/43600 [00:12<00:00, 3566.21it/s]
epoch: 0 loss: 0.72138524: 100%|██████████████| 754/754 [00:09<00:00, 80.03it/s]
accuracy (dev): 0.8514
f-measure (dev): 0.8510
epoch: 1 loss: 0.55409890: 100%|██████████████| 754/754 [00:09<00:00, 81.94it/s]
accuracy (dev): 0.8799
f-measure (dev): 0.8798
epoch: 2 loss: 0.35931602: 100%|██████████████| 754/754 [0

In [47]:
#test weight-decay
!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset=agnews --dataset-path='sentiment_datasets/ag_news_csv' --sample_ratio=0.3 --dev-size=0.33 --patience=3 --dim-size=30 --weight-decay=0.1

{'patience': 3, 'dim_size': 30, 'weight_decay': 0.1, 'min_count': None, 'max_word_length': None, 'max_entity_length': None, 'batch_size': None, 'learning_rate': None, 'warmup_epochs': None, 'dropout_prob': None, 'use_gpu': False, 'use_word': True}
[2021-04-19 05:55:05,578] [INFO] Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. (_init_num_threads@utils.py:129)
[2021-04-19 05:55:05,578] [INFO] NumExpr defaulting to 8 threads. (_init_num_threads@utils.py:141)
11880
[2021-04-19 05:55:06,844] [INFO] Creating vocabulary... (generate_features@data.py:65)
100%|███████████████████████████████████| 43600/43600 [00:10<00:00, 4030.41it/s]
epoch: 0 loss: 0.49332750: 100%|██████████████| 754/754 [00:12<00:00, 61.87it/s]
accuracy (dev): 0.8560
f-measure (dev): 0.8555
epoch: 1 loss: 0.16265494: 100%|██████████████| 754/754 [00:12<00:00, 62.13it/s]
accuracy (dev): 0.8858
f-measure (dev): 0.8858
epoch: 2 loss: 0.06735275: 100%|██████████████| 754/754 [00:

In [1]:
#test weight-decay
!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset=agnews --dataset-path='sentiment_datasets/ag_news_csv' --sample_ratio=0.3 --dev-size=0.33 --patience=3 --dim-size=30 --weight-decay=0.2

{'patience': 3, 'dim_size': 30, 'weight_decay': 0.2, 'min_count': None, 'max_word_length': None, 'max_entity_length': None, 'batch_size': None, 'learning_rate': None, 'warmup_epochs': None, 'dropout_prob': None, 'use_gpu': False, 'use_word': True}
[2021-04-19 06:16:59,115] [INFO] Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. (_init_num_threads@utils.py:129)
[2021-04-19 06:16:59,115] [INFO] NumExpr defaulting to 8 threads. (_init_num_threads@utils.py:141)
11880
[2021-04-19 06:17:00,949] [INFO] Creating vocabulary... (generate_features@data.py:65)
100%|███████████████████████████████████| 43600/43600 [00:12<00:00, 3626.21it/s]
epoch: 0 loss: 0.49898657: 100%|██████████████| 754/754 [00:11<00:00, 62.86it/s]
accuracy (dev): 0.8550
f-measure (dev): 0.8545
epoch: 1 loss: 0.18366091: 100%|██████████████| 754/754 [00:12<00:00, 62.37it/s]
accuracy (dev): 0.8841
f-measure (dev): 0.8841
epoch: 2 loss: 0.09034809: 100%|██████████████| 754/754 [00:

In [2]:
#test weight-decay
!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset=agnews --dataset-path='sentiment_datasets/ag_news_csv' --sample_ratio=0.3 --dev-size=0.33 --patience=3 --dim-size=30 --weight-decay=0.4

{'patience': 3, 'dim_size': 30, 'weight_decay': 0.4, 'min_count': None, 'max_word_length': None, 'max_entity_length': None, 'batch_size': None, 'learning_rate': None, 'warmup_epochs': None, 'dropout_prob': None, 'use_gpu': False, 'use_word': True}
[2021-04-19 06:18:35,687] [INFO] Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. (_init_num_threads@utils.py:129)
[2021-04-19 06:18:35,687] [INFO] NumExpr defaulting to 8 threads. (_init_num_threads@utils.py:141)
11880
[2021-04-19 06:18:36,829] [INFO] Creating vocabulary... (generate_features@data.py:65)
100%|███████████████████████████████████| 43600/43600 [00:09<00:00, 4439.41it/s]
epoch: 0 loss: 0.51324546: 100%|██████████████| 754/754 [00:11<00:00, 67.54it/s]
accuracy (dev): 0.8543
f-measure (dev): 0.8538
epoch: 1 loss: 0.22821410: 100%|██████████████| 754/754 [00:12<00:00, 61.54it/s]
accuracy (dev): 0.8811
f-measure (dev): 0.8812
epoch: 2 loss: 0.14779682: 100%|██████████████| 754/754 [00:

In [3]:
#test weight-decay
!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset=agnews --dataset-path='sentiment_datasets/ag_news_csv' --sample_ratio=0.3 --dev-size=0.33 --patience=3 --dim-size=30 --weight-decay=0.8

{'patience': 3, 'dim_size': 30, 'weight_decay': 0.8, 'min_count': None, 'max_word_length': None, 'max_entity_length': None, 'batch_size': None, 'learning_rate': None, 'warmup_epochs': None, 'dropout_prob': None, 'use_gpu': False, 'use_word': True}
[2021-04-19 06:19:56,220] [INFO] Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. (_init_num_threads@utils.py:129)
[2021-04-19 06:19:56,221] [INFO] NumExpr defaulting to 8 threads. (_init_num_threads@utils.py:141)
11880
[2021-04-19 06:19:57,596] [INFO] Creating vocabulary... (generate_features@data.py:65)
100%|███████████████████████████████████| 43600/43600 [00:10<00:00, 4102.77it/s]
epoch: 0 loss: 0.55153555: 100%|██████████████| 754/754 [00:11<00:00, 64.88it/s]
accuracy (dev): 0.8519
f-measure (dev): 0.8513
epoch: 1 loss: 0.33243099: 100%|██████████████| 754/754 [00:11<00:00, 66.12it/s]
accuracy (dev): 0.8740
f-measure (dev): 0.8739
epoch: 2 loss: 0.28969181: 100%|██████████████| 754/754 [00:

In [4]:
#test weight-decay
!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset=agnews --dataset-path='sentiment_datasets/ag_news_csv' --sample_ratio=0.3 --dev-size=0.33 --patience=3 --dim-size=30 --weight-decay=1.6

{'patience': 3, 'dim_size': 30, 'weight_decay': 1.6, 'min_count': None, 'max_word_length': None, 'max_entity_length': None, 'batch_size': None, 'learning_rate': None, 'warmup_epochs': None, 'dropout_prob': None, 'use_gpu': False, 'use_word': True}
[2021-04-19 06:22:18,310] [INFO] Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. (_init_num_threads@utils.py:129)
[2021-04-19 06:22:18,310] [INFO] NumExpr defaulting to 8 threads. (_init_num_threads@utils.py:141)
11880
[2021-04-19 06:22:19,651] [INFO] Creating vocabulary... (generate_features@data.py:65)
100%|███████████████████████████████████| 43600/43600 [00:10<00:00, 4152.70it/s]
epoch: 0 loss: 0.65671206: 100%|██████████████| 754/754 [00:11<00:00, 64.35it/s]
accuracy (dev): 0.8484
f-measure (dev): 0.8476
epoch: 1 loss: 0.59035248: 100%|██████████████| 754/754 [00:11<00:00, 64.17it/s]
accuracy (dev): 0.8583
f-measure (dev): 0.8580
epoch: 2 loss: 0.58102101: 100%|██████████████| 754/754 [00:

In [5]:
#test weight-decay
!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset=agnews --dataset-path='sentiment_datasets/ag_news_csv' --sample_ratio=0.3 --dev-size=0.33 --patience=3 --dim-size=30 --weight-decay=3.2

{'patience': 3, 'dim_size': 30, 'weight_decay': 3.2, 'min_count': None, 'max_word_length': None, 'max_entity_length': None, 'batch_size': None, 'learning_rate': None, 'warmup_epochs': None, 'dropout_prob': None, 'use_gpu': False, 'use_word': True}
[2021-04-19 06:23:38,451] [INFO] Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. (_init_num_threads@utils.py:129)
[2021-04-19 06:23:38,451] [INFO] NumExpr defaulting to 8 threads. (_init_num_threads@utils.py:141)
11880
[2021-04-19 06:23:39,674] [INFO] Creating vocabulary... (generate_features@data.py:65)
100%|███████████████████████████████████| 43600/43600 [00:10<00:00, 4051.79it/s]
epoch: 0 loss: 0.90111160: 100%|██████████████| 754/754 [00:12<00:00, 62.45it/s]
accuracy (dev): 0.8380
f-measure (dev): 0.8366
epoch: 1 loss: 1.04934573: 100%|██████████████| 754/754 [00:12<00:00, 61.27it/s]
accuracy (dev): 0.8328
f-measure (dev): 0.8318
epoch: 2 loss: 1.04461682: 100%|██████████████| 754/754 [00:

In [51]:
#test weight-decay
!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset=agnews --dataset-path='sentiment_datasets/ag_news_csv' --sample_ratio=0.3 --dev-size=0.33 --patience=3 --dim-size=30 --weight-decay=10

{'patience': 3, 'dim_size': 30, 'weight_decay': 10.0, 'min_count': None, 'max_word_length': None, 'max_entity_length': None, 'batch_size': None, 'learning_rate': None, 'warmup_epochs': None, 'dropout_prob': None, 'use_gpu': False, 'use_word': True}
[2021-04-19 06:01:08,554] [INFO] Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. (_init_num_threads@utils.py:129)
[2021-04-19 06:01:08,554] [INFO] NumExpr defaulting to 8 threads. (_init_num_threads@utils.py:141)
11880
[2021-04-19 06:01:09,807] [INFO] Creating vocabulary... (generate_features@data.py:65)
100%|███████████████████████████████████| 43600/43600 [00:10<00:00, 4183.78it/s]
epoch: 0 loss: 1.32853770: 100%|██████████████| 754/754 [00:11<00:00, 64.01it/s]
accuracy (dev): 0.7848
f-measure (dev): 0.7803
epoch: 1 loss: 1.35569000: 100%|██████████████| 754/754 [00:11<00:00, 64.42it/s]
accuracy (dev): 0.6753
f-measure (dev): 0.6599
epoch: 2 loss: 1.33920133: 100%|██████████████| 754/754 [00

In [None]:
#embedding weights not trainable - horrible performance 

In [2]:
!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset=agnews --dataset-path='sentiment_datasets/ag_news_csv' --sample_ratio=0.3 --dev-size=0.33 --patience=3 --dim-size=30

{'patience': 3, 'dim_size': 30, 'min_count': None, 'max_word_length': None, 'max_entity_length': None, 'batch_size': None, 'learning_rate': None, 'weight_decay': None, 'warmup_epochs': None, 'dropout_prob': None, 'use_gpu': False, 'use_word': True}
[2021-04-19 22:07:49,809] [INFO] Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. (_init_num_threads@utils.py:129)
[2021-04-19 22:07:49,809] [INFO] NumExpr defaulting to 8 threads. (_init_num_threads@utils.py:141)
11880
[2021-04-19 22:07:51,194] [INFO] Creating vocabulary... (generate_features@data.py:65)
100%|███████████████████████████████████| 43600/43600 [00:11<00:00, 3922.37it/s]
epoch: 0 loss: 1.38938236: 100%|█████████████| 754/754 [00:03<00:00, 223.87it/s]
accuracy (dev): 0.2971
f-measure (dev): 0.1990
epoch: 1 loss: 1.38208997: 100%|█████████████| 754/754 [00:03<00:00, 228.04it/s]
accuracy (dev): 0.3303
f-measure (dev): 0.2901
epoch: 2 loss: 1.38477361: 100%|█████████████| 754/754 [00:

In [None]:
#embedding weights pretrained

In [4]:
!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset=agnews --dataset-path='sentiment_datasets/ag_news_csv' --sample_ratio=0.3 --dev-size=0.33 --patience=3 

{'patience': 3, 'dim_size': 30, 'min_count': None, 'max_word_length': None, 'max_entity_length': None, 'batch_size': None, 'learning_rate': None, 'weight_decay': None, 'warmup_epochs': None, 'dropout_prob': None, 'use_gpu': False, 'use_word': True}
[2021-04-19 22:11:23,504] [INFO] Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. (_init_num_threads@utils.py:129)
[2021-04-19 22:11:23,504] [INFO] NumExpr defaulting to 8 threads. (_init_num_threads@utils.py:141)
11880
[2021-04-19 22:11:24,891] [INFO] Creating vocabulary... (generate_features@data.py:65)
100%|███████████████████████████████████| 43600/43600 [00:12<00:00, 3577.56it/s]
epoch: 0 loss: 0.67419654: 100%|██████████████| 754/754 [00:11<00:00, 63.98it/s]
accuracy (dev): 0.7652
f-measure (dev): 0.7649
epoch: 1 loss: 0.52082962: 100%|██████████████| 754/754 [00:14<00:00, 52.42it/s]
accuracy (dev): 0.7940
f-measure (dev): 0.7940
epoch: 2 loss: 0.41320327: 100%|██████████████| 754/754 [00

In [1]:
!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset=agnews --dataset-path='sentiment_datasets/ag_news_csv' --sample_ratio=1 --dev-size=0.1 --patience=3 

{'patience': 3, 'min_count': None, 'max_word_length': None, 'max_entity_length': None, 'batch_size': None, 'learning_rate': None, 'weight_decay': None, 'warmup_epochs': None, 'dropout_prob': None, 'use_gpu': False, 'use_word': True, 'dim_size': 300}
[2021-04-19 23:25:08,485] [INFO] Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. (_init_num_threads@utils.py:129)
[2021-04-19 23:25:08,486] [INFO] NumExpr defaulting to 8 threads. (_init_num_threads@utils.py:141)
12000
[2021-04-19 23:25:10,636] [INFO] Creating vocabulary... (generate_features@data.py:65)
100%|█████████████████████████████████| 127600/127600 [00:27<00:00, 4645.75it/s]
epoch: 0 loss: 0.54806042: 100%|████████████| 3375/3375 [01:04<00:00, 52.72it/s]
accuracy (dev): 0.8040
f-measure (dev): 0.8032
epoch: 1 loss: 0.53157073: 100%|████████████| 3375/3375 [01:03<00:00, 53.16it/s]
accuracy (dev): 0.8145
f-measure (dev): 0.8145
epoch: 2 loss: 0.35161698: 100%|████████████| 3375/3375 [0

In [None]:
!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset=agnews --dataset-path='sentiment_datasets/ag_news_csv' --sample_ratio=0.3 --dev-size=0.33 --patience=3 

In [1]:
!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset=agnews --dataset-path='sentiment_datasets/ag_news_csv' --sample_ratio=0.3 --dev-size=0.33 --patience=3 

{'patience': 3, 'min_count': None, 'max_word_length': None, 'max_entity_length': None, 'batch_size': None, 'learning_rate': None, 'weight_decay': None, 'warmup_epochs': None, 'dropout_prob': None, 'use_gpu': False, 'use_word': True, 'dim_size': 300}
[2021-04-20 05:32:28,691] [INFO] Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. (_init_num_threads@utils.py:129)
[2021-04-20 05:32:28,692] [INFO] NumExpr defaulting to 8 threads. (_init_num_threads@utils.py:141)
11880
[2021-04-20 05:32:30,635] [INFO] Creating vocabulary... (generate_features@data.py:65)
100%|███████████████████████████████████| 43600/43600 [00:12<00:00, 3414.17it/s]
trainable params:1207
epoch: 0 loss: 0.67419654: 100%|██████████████| 754/754 [00:14<00:00, 50.99it/s]
accuracy (dev): 0.7652
f-measure (dev): 0.7649
epoch: 1 loss: 0.52082962: 100%|██████████████| 754/754 [00:13<00:00, 56.61it/s]
accuracy (dev): 0.7940
f-measure (dev): 0.7940
epoch: 2 loss: 0.41320327: 100%|████

In [2]:
!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset=agnews --dataset-path='sentiment_datasets/ag_news_csv' --sample_ratio=0.3 --dev-size=0.33 --patience=3 

{'patience': 3, 'min_count': None, 'max_word_length': None, 'max_entity_length': None, 'batch_size': None, 'learning_rate': None, 'weight_decay': None, 'warmup_epochs': None, 'dropout_prob': None, 'use_gpu': False, 'use_word': True, 'dim_size': 300}
[2021-04-20 05:35:08,380] [INFO] Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. (_init_num_threads@utils.py:129)
[2021-04-20 05:35:08,380] [INFO] NumExpr defaulting to 8 threads. (_init_num_threads@utils.py:141)
11880
[2021-04-20 05:35:10,290] [INFO] Creating vocabulary... (generate_features@data.py:65)
100%|███████████████████████████████████| 43600/43600 [00:11<00:00, 3855.83it/s]
trainable params:15403207
epoch: 0 loss: 0.27541691: 100%|██████████████| 754/754 [01:39<00:00,  7.57it/s]
accuracy (dev): 0.8519
f-measure (dev): 0.8518
epoch: 1 loss: 0.22377980: 100%|██████████████| 754/754 [01:39<00:00,  7.59it/s]
accuracy (dev): 0.8846
f-measure (dev): 0.8847
epoch: 2 loss: 0.07205369: 100%|

# Positional Encodings

In [None]:
#1. positional encodings with pretrained embeddings-code changed

In [2]:
!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset=agnews --dataset-path='sentiment_datasets/ag_news_csv' --sample_ratio=0.3 --dev-size=0.33 --patience=3 

{'patience': 3, 'min_count': None, 'max_word_length': None, 'max_entity_length': None, 'batch_size': None, 'learning_rate': None, 'weight_decay': None, 'warmup_epochs': None, 'dropout_prob': None, 'use_gpu': False, 'use_word': True, 'dim_size': 300}
[2021-04-20 12:08:03,702] [INFO] Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. (_init_num_threads@utils.py:129)
[2021-04-20 12:08:03,702] [INFO] NumExpr defaulting to 8 threads. (_init_num_threads@utils.py:141)
11880
[2021-04-20 12:08:04,867] [INFO] Creating vocabulary... (generate_features@data.py:65)
100%|███████████████████████████████████| 43600/43600 [00:12<00:00, 3526.80it/s]
trainable params:1207
epoch: 0 loss: 0.87837654: 100%|██████████████| 754/754 [00:38<00:00, 19.65it/s]
accuracy (dev): 0.7407
f-measure (dev): 0.7411
epoch: 1 loss: 0.51857001: 100%|██████████████| 754/754 [00:40<00:00, 18.51it/s]
accuracy (dev): 0.7748
f-measure (dev): 0.7732
epoch: 2 loss: 0.49885586: 100%|████

In [20]:
!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset=r8 --dataset-path='reuters-21578' --patience=3 

{'patience': 3, 'min_count': None, 'max_word_length': None, 'max_entity_length': None, 'batch_size': None, 'learning_rate': None, 'weight_decay': None, 'warmup_epochs': None, 'dropout_prob': None, 'use_gpu': False, 'use_word': True, 'dim_size': 300}
[2021-04-20 14:38:46,368] [INFO] Creating vocabulary... (generate_features@data.py:65)
100%|██████████████████████████████████████| 7674/7674 [00:10<00:00, 708.77it/s]
trainable params:2411
epoch: 0 loss: 0.97391814: 100%|██████████████| 163/163 [00:23<00:00,  7.07it/s]
accuracy (dev): 0.6606
f-measure (dev): 0.1781
epoch: 1 loss: 0.75208479: 100%|██████████████| 163/163 [00:22<00:00,  7.11it/s]
accuracy (dev): 0.7190
f-measure (dev): 0.2974
epoch: 2 loss: 0.74639136: 100%|██████████████| 163/163 [00:23<00:00,  6.99it/s]
accuracy (dev): 0.7920
f-measure (dev): 0.4914
epoch: 3 loss: 0.40875810: 100%|██████████████| 163/163 [00:23<00:00,  6.90it/s]
accuracy (dev): 0.8285
f-measure (dev): 0.6631
epoch: 4 loss: 0.49731544: 100%|██████████████| 

In [19]:
!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset='20ng' --patience=3 

{'patience': 3, 'min_count': None, 'max_word_length': None, 'max_entity_length': None, 'batch_size': None, 'learning_rate': None, 'weight_decay': None, 'warmup_epochs': None, 'dropout_prob': None, 'use_gpu': False, 'use_word': True, 'dim_size': 300}
[2021-04-20 14:33:28,148] [INFO] Creating vocabulary... (generate_features@data.py:65)
100%|████████████████████████████████████| 18846/18846 [01:11<00:00, 262.90it/s]
trainable params:6023
epoch: 0 loss: 2.99712372: 100%|██████████████| 336/336 [00:15<00:00, 21.41it/s]
accuracy (dev): 0.0938
f-measure (dev): 0.0640
epoch: 1 loss: 2.86046410: 100%|██████████████| 336/336 [00:15<00:00, 21.25it/s]
accuracy (dev): 0.2796
f-measure (dev): 0.2223
epoch: 2 loss: 2.61102104: 100%|██████████████| 336/336 [00:15<00:00, 21.52it/s]
accuracy (dev): 0.3681
f-measure (dev): 0.3296
epoch: 3 loss: 2.36069298: 100%|██████████████| 336/336 [00:15<00:00, 21.46it/s]
accuracy (dev): 0.4319
f-measure (dev): 0.3667
epoch: 4 loss: 2.22352386: 100%|██████████████| 

In [None]:
#2. Positional Embeddings with learned embeddings

In [14]:
!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset=agnews --dataset-path='sentiment_datasets/ag_news_csv' --sample_ratio=0.3 --dev-size=0.33 --patience=3 

{'patience': 3, 'min_count': None, 'max_word_length': None, 'max_entity_length': None, 'batch_size': None, 'learning_rate': None, 'weight_decay': None, 'warmup_epochs': None, 'dropout_prob': None, 'use_gpu': False, 'use_word': True, 'dim_size': 300}
[2021-04-20 12:39:32,008] [INFO] Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. (_init_num_threads@utils.py:129)
[2021-04-20 12:39:32,008] [INFO] NumExpr defaulting to 8 threads. (_init_num_threads@utils.py:141)
11880
[2021-04-20 12:39:33,374] [INFO] Creating vocabulary... (generate_features@data.py:65)
100%|███████████████████████████████████| 43600/43600 [00:12<00:00, 3512.63it/s]
trainable params:15403207
epoch: 0 loss: 0.39047870: 100%|██████████████| 754/754 [02:05<00:00,  6.03it/s]
accuracy (dev): 0.8428
f-measure (dev): 0.8425
epoch: 1 loss: 0.16040744: 100%|██████████████| 754/754 [02:27<00:00,  5.10it/s]
accuracy (dev): 0.8829
f-measure (dev): 0.8830
epoch: 2 loss: 0.09728407: 100%|

In [17]:
!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset=r8 --dataset-path='reuters-21578' --patience=3 

{'patience': 3, 'min_count': None, 'max_word_length': None, 'max_entity_length': None, 'batch_size': None, 'learning_rate': None, 'weight_decay': None, 'warmup_epochs': None, 'dropout_prob': None, 'use_gpu': False, 'use_word': True, 'dim_size': 300}
[2021-04-20 13:04:51,824] [INFO] Creating vocabulary... (generate_features@data.py:65)
100%|██████████████████████████████████████| 7674/7674 [00:10<00:00, 725.20it/s]
trainable params:11137211
epoch: 0 loss: 0.72213984: 100%|██████████████| 163/163 [00:40<00:00,  4.01it/s]
accuracy (dev): 0.7263
f-measure (dev): 0.2145
epoch: 1 loss: 0.16910115: 100%|██████████████| 163/163 [00:42<00:00,  3.79it/s]
accuracy (dev): 0.9343
f-measure (dev): 0.7388
epoch: 2 loss: 0.14280237: 100%|██████████████| 163/163 [00:43<00:00,  3.78it/s]
accuracy (dev): 0.9489
f-measure (dev): 0.8898
epoch: 3 loss: 0.01714265: 100%|██████████████| 163/163 [00:42<00:00,  3.80it/s]
accuracy (dev): 0.9818
f-measure (dev): 0.9652
epoch: 4 loss: 0.01747063: 100%|████████████

In [18]:
!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset='20ng' --patience=3 

{'patience': 3, 'min_count': None, 'max_word_length': None, 'max_entity_length': None, 'batch_size': None, 'learning_rate': None, 'weight_decay': None, 'warmup_epochs': None, 'dropout_prob': None, 'use_gpu': False, 'use_word': True, 'dim_size': 300}
[2021-04-20 13:13:06,080] [INFO] Creating vocabulary... (generate_features@data.py:65)
100%|████████████████████████████████████| 18846/18846 [01:08<00:00, 274.94it/s]
trainable params:59540423
epoch: 0 loss: 2.97698307: 100%|██████████████| 336/336 [03:09<00:00,  1.78it/s]
accuracy (dev): 0.1381
f-measure (dev): 0.1094
epoch: 1 loss: 2.77377367: 100%|██████████████| 336/336 [03:07<00:00,  1.79it/s]
accuracy (dev): 0.4513
f-measure (dev): 0.3911
epoch: 2 loss: 2.17071080: 100%|██████████████| 336/336 [03:08<00:00,  1.78it/s]
accuracy (dev): 0.6283
f-measure (dev): 0.5866
epoch: 3 loss: 1.31894052: 100%|██████████████| 336/336 [03:12<00:00,  1.75it/s]
accuracy (dev): 0.7628
f-measure (dev): 0.7276
epoch: 4 loss: 0.87232262: 100%|████████████

In [None]:
#3. Without Positional Encoding with learned embeddings -> data already obtained

In [None]:
#3. Without Positional Encoding with pretrained embeddings -> data already obtained

In [None]:
!python main.py train-classifier enwiki_20180420_lg1_300d.pkl enwiki_20180420_entity_linker.pkl --dataset=agnews --dataset-path='sentiment_datasets/ag_news_csv' --sample_ratio=0.3 --dev-size=0.33 --patience=3 

In [8]:
import torch
pe = torch.tensor(range(256))
pe[1::2].shape

torch.Size([128])

In [9]:
pe[0::2].shape

torch.Size([128])

In [46]:
A = torch.tensor([[0,1],[3,4]])
B = torch.tensor([[0,1],[3,10]])

In [49]:
A[B == 0] = 1
A

tensor([[1, 1],
        [3, 4]])

In [41]:
d_model = 300
torch.exp(torch.arange(0, d_model, 2) * -(math.log(10000.0) / d_model)).dtype

torch.float32

In [36]:
print('a', 2)

a 2


In [13]:
import torch
a = torch.tensor([[1,2],[3,4]])
a

tensor([[1, 2],
        [3, 4]])

In [31]:
a + torch.tensor([[1,1]])

tensor([[2, 3],
        [4, 5]])

In [14]:
a == 0

tensor([[False, False],
        [False, False]])

In [15]:
a[a==0]

tensor([], dtype=torch.int64)

In [20]:
import torch.nn as nn
import math
class PositionalEncoding(nn.Module):
    "Implement the PE function."

    def __init__(self, d_model, dropout, max_len=5000):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)

        # Compute the positional encodings once in log space.
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) *
                             -(math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + Variable(self.pe[:, :x.size(1)],
                         requires_grad=False)
        return self.dropout(x)

In [26]:
pe = PositionalEncoding(300, 0.3, 256)
pe.pe.shape

torch.Size([1, 256, 300])

In [28]:
pe.pe

tensor([[[ 0.0000e+00,  1.0000e+00,  0.0000e+00,  ...,  1.0000e+00,
           0.0000e+00,  1.0000e+00],
         [ 8.4147e-01,  5.4030e-01,  8.0782e-01,  ...,  1.0000e+00,
           1.0633e-04,  1.0000e+00],
         [ 9.0930e-01, -4.1615e-01,  9.5231e-01,  ...,  1.0000e+00,
           2.1267e-04,  1.0000e+00],
         ...,
         [ 9.9482e-01, -1.0162e-01, -7.3692e-01,  ...,  9.9959e-01,
           2.6899e-02,  9.9964e-01],
         [ 4.5200e-01, -8.9202e-01,  1.1171e-01,  ...,  9.9959e-01,
           2.7005e-02,  9.9964e-01],
         [-5.0639e-01, -8.6230e-01,  8.6861e-01,  ...,  9.9958e-01,
           2.7112e-02,  9.9963e-01]]])

In [27]:
pe.pe.repeat(32, 1, 1).shape

torch.Size([32, 256, 300])

## Test on Other datasets.

# Comparison Between Datasets


In [None]:
#Conclusion: Not able to figure out why pretrained models perform worse on AG news.

## 20 Newsgroup

In [4]:
from sklearn.datasets import fetch_20newsgroups
ng20_train = fetch_20newsgroups(subset='train')

In [6]:
ng20_train.keys()

dict_keys(['data', 'filenames', 'target_names', 'target', 'DESCR'])

In [8]:
ng20_train['target']

array([7, 4, 4, ..., 3, 1, 8])

In [9]:
ng20_train['target_names']

['alt.atheism',
 'comp.graphics',
 'comp.os.ms-windows.misc',
 'comp.sys.ibm.pc.hardware',
 'comp.sys.mac.hardware',
 'comp.windows.x',
 'misc.forsale',
 'rec.autos',
 'rec.motorcycles',
 'rec.sport.baseball',
 'rec.sport.hockey',
 'sci.crypt',
 'sci.electronics',
 'sci.med',
 'sci.space',
 'soc.religion.christian',
 'talk.politics.guns',
 'talk.politics.mideast',
 'talk.politics.misc',
 'talk.religion.misc']

In [12]:
print(ng20_train['data'][0])

From: lerxst@wam.umd.edu (where's my thing)
Subject: WHAT car is this!?
Nntp-Posting-Host: rac3.wam.umd.edu
Organization: University of Maryland, College Park
Lines: 15

 I was wondering if anyone out there could enlighten me on this car I saw
the other day. It was a 2-door sports car, looked to be from the late 60s/
early 70s. It was called a Bricklin. The doors were really small. In addition,
the front bumper was separate from the rest of the body. This is 
all I know. If anyone can tellme a model name, engine specs, years
of production, where this car is made, history, or whatever info you
have on this funky looking car, please e-mail.

Thanks,
- IL
   ---- brought to you by your neighborhood Lerxst ----







In [14]:
print(ng20_train['data'][20])

From: keith@cco.caltech.edu (Keith Allan Schneider)
Subject: Re: <<Pompous ass
Organization: California Institute of Technology, Pasadena
Lines: 16
NNTP-Posting-Host: punisher.caltech.edu

livesey@solntze.wpd.sgi.com (Jon Livesey) writes:

[...]
>>The "`little' things" above were in reference to Germany, clearly.  People
>>said that there were similar things in Germany, but no one could name any.
>That's not true.  I gave you two examples.  One was the rather
>pevasive anti-semitism in German Christianity well before Hitler
>arrived.  The other was the system of social ranks that were used
>in Imperail Germany and Austria to distinguish Jews from the rest 
>of the population.

These don't seem like "little things" to me.  At least, they are orders
worse than the motto.  Do you think that the motto is a "little thing"
that will lead to worse things?

keith



In [18]:
print(ng20_train['target_names'] [ ng20_train['target'][20] ] )

alt.atheism


In [19]:
print(ng20_train['data'][40])

From:  (Sean Garrison)
Subject: Re: Bonilla
Nntp-Posting-Host: berkeley-kstar-node.net.yale.edu
Organization: Yale Univeristy
Lines: 37

In article <1993Apr17.213553.2181@organpipe.uug.arizona.edu>,
krueger@helium.gas.uug.arizona.edu (theodore r krueger) wrote:
 
> Isn't it funny that  a white person calls comeone a "nigger" and gets banned 
> for a year, but a black person calls someone a "faggot" and there is no 
> consequence?

> Ted


Ted, you're missing a vital point.  As Roger Lustig pointed out in a
previous response, the reason why Schott was banned from baseball was
because she had been known to call and think in a racially biased manner on
a constant basis.  Such thoughts affected her hiring practices.  Bonilla,
on the other hand, was found to have mentioned this one word a single time.
 If he had been known to go around, criticizing homosexuals, it would be a
different story.  Furthermore, he is merely an athlete.  He doesn't have to
hire anyone as Schott had to do.  Dave Pa

In [20]:
print(ng20_train['target_names'] [ ng20_train['target'][40] ] )

rec.sport.baseball


# Test Positional Encoding

In [35]:
import torch
import math

In [36]:
d_model = 30
max_len = 100

In [37]:
pe = torch.zeros(max_len, d_model)

In [38]:
position = torch.arange(0, max_len).unsqueeze(1)

In [39]:
position.shape

torch.Size([100, 1])

In [40]:
div_term = torch.exp(torch.arange(0, d_model, 2) * -(math.log(10000.0) / d_model))

In [41]:
div_term

tensor([1.0000e+00, 5.4117e-01, 2.9286e-01, 1.5849e-01, 8.5770e-02, 4.6416e-02,
        2.5119e-02, 1.3594e-02, 7.3564e-03, 3.9811e-03, 2.1544e-03, 1.1659e-03,
        6.3096e-04, 3.4146e-04, 1.8479e-04])

In [42]:
pe[:, 0::2] = torch.sin(position * div_term)
pe[:, 1::2] = torch.cos(position * div_term)
pe = pe.unsqueeze(0)

In [43]:
pe

tensor([[[ 0.0000e+00,  1.0000e+00,  0.0000e+00,  ...,  1.0000e+00,
           0.0000e+00,  1.0000e+00],
         [ 8.4147e-01,  5.4030e-01,  5.1514e-01,  ...,  1.0000e+00,
           1.8479e-04,  1.0000e+00],
         [ 9.0930e-01, -4.1615e-01,  8.8306e-01,  ...,  1.0000e+00,
           3.6957e-04,  1.0000e+00],
         ...,
         [ 3.7961e-01, -9.2515e-01,  7.9173e-01,  ...,  9.9945e-01,
           1.7923e-02,  9.9984e-01],
         [-5.7338e-01, -8.1929e-01,  3.6391e-01,  ...,  9.9944e-01,
           1.8108e-02,  9.9984e-01],
         [-9.9921e-01,  3.9821e-02, -1.6791e-01,  ...,  9.9943e-01,
           1.8293e-02,  9.9983e-01]]])

In [44]:
pe.shape

torch.Size([1, 100, 30])

In [45]:
pe[:, :2]

tensor([[[0.0000e+00, 1.0000e+00, 0.0000e+00, 1.0000e+00, 0.0000e+00,
          1.0000e+00, 0.0000e+00, 1.0000e+00, 0.0000e+00, 1.0000e+00,
          0.0000e+00, 1.0000e+00, 0.0000e+00, 1.0000e+00, 0.0000e+00,
          1.0000e+00, 0.0000e+00, 1.0000e+00, 0.0000e+00, 1.0000e+00,
          0.0000e+00, 1.0000e+00, 0.0000e+00, 1.0000e+00, 0.0000e+00,
          1.0000e+00, 0.0000e+00, 1.0000e+00, 0.0000e+00, 1.0000e+00],
         [8.4147e-01, 5.4030e-01, 5.1514e-01, 8.5711e-01, 2.8870e-01,
          9.5742e-01, 1.5783e-01, 9.8747e-01, 8.5664e-02, 9.9632e-01,
          4.6399e-02, 9.9892e-01, 2.5116e-02, 9.9968e-01, 1.3593e-02,
          9.9991e-01, 7.3564e-03, 9.9997e-01, 3.9811e-03, 9.9999e-01,
          2.1544e-03, 1.0000e+00, 1.1659e-03, 1.0000e+00, 6.3096e-04,
          1.0000e+00, 3.4146e-04, 1.0000e+00, 1.8479e-04, 1.0000e+00]]])