# A simple implementation of skipgrams with negative sampling
Author: Pierre Nugues

Adapted from _Distributed Representations of Words and Phrases and their Compositionality_, Sect. 2.2, by Mikolov et al. 2013.

The imports

In [1]:
import tensorflow as tf
from tensorflow.keras import backend
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Embedding, Lambda, Average, GlobalAveragePooling1D, Dot, Input, Reshape, Activation
import regex as re
import os
from tensorflow.keras.utils import to_categorical
import numpy as np
from scipy.spatial.distance import cosine
from tqdm import tqdm
from random import shuffle, randint
from collections import Counter
import math, random

## Parameters

The embedding size, context size, and negative counts

In [2]:
embedding_dim = 100
w_size = 2
c_size = w_size * 2 + 1
K_NEG = 5
t = 1e-3
power = 0.75

## The Corpus

We select a dataset and execute locally or on colab

In [3]:
dataset = 'homer'  # 'homer' dickens' 'selma' 'big'
colab = False # On my machine or on colab
debug = False
DOWNSAMPLING = False

In [4]:
if colab:
    BASE_PATH = '/content/drive/My Drive/Colab Notebooks/'
else:
    BASE_PATH = '../../../'

In [5]:
if colab:
    from google.colab import drive
    drive.mount('/content/drive')

We read the files from a folder

In [6]:
def get_files(dir, suffix):
    """
    Returns all the files in a folder ending with suffix
    :param dir:
    :param suffix:
    :return: the list of file names
    """
    files = []
    for file in os.listdir(dir):
        if file.endswith(suffix):
            files.append(file)
    return files


def load_corpus(path):
    files = get_files(path, 'txt')
    files = [path + file for file in files]
    print(files)
    text = ''
    for file in files:
        text += open(file).read()
    return text

In [7]:
if dataset == 'homer':
    #text = 'Sing, O goddess, the anger of Achilles son of Peleus'.lower()
    text1 = open(BASE_PATH + 'corpus/iliad.mb.txt', encoding='utf-8').read().lower()
    text2 = open(BASE_PATH + 'corpus/odyssey.mb.txt', encoding='utf-8').read().lower()
    text = text1 + text2
    test_words = ['he', 'she', 'ulysses', 'penelope', 'achaeans', 'trojans']
if dataset == 'dickens':
    path = BASE_PATH + 'corpus/Dickens/'
    text = load_corpus(path)
    test_words = ['he', 'she', 'paris', 'london', 'table', 'rare', 'monday', 'sunday', 'man', 'woman', 'king', 'queen', 'boy',
                  'girl']
elif dataset == 'selma':
    path = BASE_PATH + 'corpus/Selma/'
    text = load_corpus(path)
    test_words = ['han', 'hon', 'att', 'bord', 'bordet', 'måndag', 'söndag', 'man', 'kvinna', 'kung', 'drottning',
                  'pojke', 'flicka']
elif dataset == 'big':
    path = BASE_PATH + 'corpus/Dickens/'
    text = load_corpus(path)
    path = BASE_PATH + 'corpus/Norvig/'
    text += load_corpus(path)
    test_words = ['he', 'she', 'paris', 'london', 'table', 'rare', 'monday', 'sunday', 'man', 'woman', 'king', 'queen', 'boy',
                  'girl']

## Processing the Corpus

### Tokenizing

We set all the text in lowercase

In [8]:
text = text.lower()
word_seq = re.findall('\p{L}+', text)
word_seq[:5]

['book', 'i', 'the', 'quarrel', 'between']

### Downsampling

We can downsample the frequent wordsWe can downsample the frequent words. We first count the words, then we discard randomly some words in the text, depending on their frequency. Frequent words will often be discarded. Rare words, never. We will have to count them again after sampling.. We first count the words. We will have to count them again after sampling

In [9]:
counts = Counter(word_seq)
word_cnt = sum(counts.values())
word_cnt

272712

In [10]:
counts['the'], counts['he'], counts['penelope']

(15905, 4746, 110)

The discard probability threshold, following § 2.3 of the paper

In [11]:
discard_probs = dict(counts)
for key in discard_probs:
    discard_probs[key] = max(0, 1 - math.sqrt(t/(counts[key]/word_cnt)))

In [12]:
discard_probs['the'], discard_probs['he'], discard_probs.get('penelope')

(0.8690560952429589, 0.7602888379452187, 0)

In [13]:
subsampled_word_seq = []
for word in word_seq:
    if discard_probs[word] < np.random.random():
        subsampled_word_seq += [word]

In [14]:
if DOWNSAMPLING:
    word_seq = subsampled_word_seq

### Counting the words

In [15]:
counts = Counter(word_seq)
word_cnt = sum(counts.values())
word_cnt

272712

In [16]:
counts['the'], counts['he'], counts['penelope']

(15905, 4746, 110)

In [17]:
counts['the']/word_cnt, counts['he']/word_cnt, counts['penelope']/word_cnt

(0.05832159934289653, 0.01740297456657573, 0.00040335592126492415)

We extract the unique words

In [18]:
unique_words = sorted(list(counts.keys()))
unique_words[:10]

['a',
 'abantes',
 'abarbarea',
 'abas',
 'abate',
 'abated',
 'abetting',
 'abhorred',
 'abians',
 'abide']

In [19]:
vocab_size = len(unique_words)
vocab_size

9725

### Indices

And we create indices

In [20]:
word2idx = {word: i for (i, word) in enumerate(unique_words)}
idx2word = {v: k for k, v in word2idx.items()}
#word2idx

We map the words to their indices and we get the sequence of word indices

In [21]:
widx_seq = list(map(word2idx.get, word_seq))
widx_seq[:5]

[1037, 4334, 8518, 6666, 897]

### Power transform

We apply a power tranform to a list of counts and we return power transformed probabilities:
$$
\frac{\text{cnt}(w)^\text{power}}{\sum_i \text{cnt}(w_i)^\text{power}}
$$

In [22]:
def power_transform(counts, power):
    trfmd_probs = dict()
    for word in counts:
        trfmd_probs[word] = math.pow(counts[word], power)
    sum_probs = sum(trfmd_probs.values())
    for word in trfmd_probs:
        trfmd_probs[word] /= sum_probs
    return trfmd_probs

In [23]:
trfmd_probs = power_transform(counts, power)

In [24]:
trfmd_probs['the'], trfmd_probs['he'], trfmd_probs.get('penelope')

(0.020224400021262735, 0.008165282068782646, 0.00048503145644814705)

### Negative sampling
For each positive pair, and word and a context word, we draw $k$ words randomly to form negative pairs.

We build the index and proability lists for the random choice function

In [25]:
trfmd_probs_idx = {word2idx[k]: v for k, v in trfmd_probs.items()}

`random.choices` needs the index and the probabilities

In [26]:
draw_idx, probs = zip(*trfmd_probs_idx.items())

Given the words in the context, we draw $k$ as many words.

In [27]:
random.choices(draw_idx, weights=probs, k=K_NEG * 2 * w_size)

[3562,
 5982,
 4334,
 4160,
 9163,
 794,
 5756,
 4010,
 4132,
 2947,
 184,
 8989,
 7237,
 2313,
 4132,
 7399,
 4839,
 3569,
 7372,
 9399]

## The pairs

For all the words, we form positive and negative pairs. We extract the context words of a word from its neighbors in the word sequence to form the positive pairs and at random to form the negative ones.

In [28]:
X_i = []
X_c = []
y = []
for idx, widx in tqdm(enumerate(widx_seq[w_size:-w_size], w_size)):
    # We create the start and end indices as in range(start, end)
    start_idx = idx - w_size
    end_idx = idx + w_size + 1
    X_i += [widx_seq[idx]] * (K_NEG + 1) * 2 * w_size
    X_c += [widx_seq[c_idx] for c_idx in
               [*range(start_idx, idx), *range(idx + 1, end_idx)]]
    X_c += random.choices(draw_idx, weights=probs,
                              k=K_NEG * 2 * w_size)
    #X_c += list(np.random.choice(draw_idx, size=K_NEG * 2 * w_size, p=probs))
    y += [1] * w_size * 2 + [0] * w_size * 2 * K_NEG

272708it [01:22, 3313.80it/s]


We build two inputs: The left input is the input word and the right one is a context word.

In [29]:
y = np.array(y)
X_i = np.array(X_i)
X_c = np.array(X_c)

## The Architecture

And now the architecture

In [30]:
i_word = Input(shape=(1,))
i_embedding = Embedding(vocab_size, embedding_dim, input_length=1)(i_word)

c_word = Input(shape=(1,))
c_embedding = Embedding(vocab_size, embedding_dim, input_length=1)(c_word)

dot_prod = Dot(axes=-1)([i_embedding, c_embedding])
output = Dense(1, activation='sigmoid')(dot_prod)
model = Model([i_word, c_word], output)
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 1)]          0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            [(None, 1)]          0                                            
__________________________________________________________________________________________________
embedding (Embedding)           (None, 1, 100)       972500      input_1[0][0]                    
__________________________________________________________________________________________________
embedding_1 (Embedding)         (None, 1, 100)       972500      input_2[0][0]                    
______________________________________________________________________________________________

In [31]:
def loss_neg(y_true, y_pred):
    y_true = tf.cast(y_true, tf.float32)
    log_y_pred_1 = tf.math.log(y_pred)
    log_y_pred_0 = tf.math.log(1.0 - y_pred)
    loss = -tf.math.add(tf.math.multiply(y_true, log_y_pred_1),
                       tf.math.multiply(1.0 - y_true, log_y_pred_0))
    return loss

In [32]:
model.compile(loss=loss_neg, optimizer='rmsprop')

In [33]:
model.fit([X_i, X_c], y, epochs=2, batch_size=512)

Epoch 1/2
Epoch 2/2


<tensorflow.python.keras.callbacks.History at 0x7fe3949292e0>

In [34]:
def most_sim_vecs(vector, U, nbr_words=10):
    # Here cosine distance and not cosine
    # distance between equal vectors: 0. max distance: 2
    dist = [cosine(vector, U[i, :]) if np.any(U[i, :]) else 2
            for i in range(U.shape[0])]
    sorted_vectors = sorted(range(len(dist)), key=lambda k: dist[k])
    return sorted_vectors[1:nbr_words + 1]

In [35]:
vectors = model.get_weights()[0]
most_sim_words = {}
for w in test_words:
    most_sim_words[w] = most_sim_vecs(vectors[word2idx[w]], vectors)
    most_sim_words[w] = list(map(idx2word.get, most_sim_words[w]))
    print(w, most_sim_words[w])

he ['she', 'they', 'so', 'it', 'even', 'as', 'there', 'thus', 'we', 'ulysses']
she ['he', 'they', 'even', 'thus', 'there', 'ulysses', 'it', 'so', 'minerva', 'much']
ulysses ['achilles', 'minerva', 'telemachus', 'hector', 'heaven', 'jove', 'menelaus', 'this', 'himself', 'patroclus']
penelope ['juno', 'o', 'ill', 'enough', 'alive', 'always', 'yourself', 'already', 'moreover', 'care']
achaeans ['trojans', 'gods', 'other', 'argives', 'men', 'city', 'suitors', 'sea', 'danaans', 'others']
trojans ['sea', 'city', 'achaeans', 'argives', 'other', 'ground', 'ships', 'danaans', 'wall', 'body']


In [36]:
"""
base 2 epochs
he ['i', 'you', 'a', 'his', 'him', 'as', 'with', 'that', 'for', 'in']
she ['her', 'who', 'no', 'or', 'their', 'this', 'be', 'your', 'one', 'said']
ulysses ['s', 'out', 'us', 'minerva', 'there', 'achilles', 'now', 'went', 'has', 'came']
penelope ['each', 'hands', 'gone', 'battle', 'looking', 'drink', 'telemachus', 'priam', 'same', 'fight']
achaeans ['ships', 'trojans', 'city', 'gods', 'sea', 'suitors', 'town', 'darkness', 'meanwhile', 'house']
trojans ['sea', 'achaeans', 'ships', 'suitors', 'city', 'getting', 'goddess', 'gods', 'house', 'before']
"""

"\nbase 2 epochs\nhe ['i', 'you', 'a', 'his', 'him', 'as', 'with', 'that', 'for', 'in']\nshe ['her', 'who', 'no', 'or', 'their', 'this', 'be', 'your', 'one', 'said']\nulysses ['s', 'out', 'us', 'minerva', 'there', 'achilles', 'now', 'went', 'has', 'came']\npenelope ['each', 'hands', 'gone', 'battle', 'looking', 'drink', 'telemachus', 'priam', 'same', 'fight']\nachaeans ['ships', 'trojans', 'city', 'gods', 'sea', 'suitors', 'town', 'darkness', 'meanwhile', 'house']\ntrojans ['sea', 'achaeans', 'ships', 'suitors', 'city', 'getting', 'goddess', 'gods', 'house', 'before']\n"

In [37]:
"""
power transform 2 epochs
he ['i', 'you', 'as', 'him', 'not', 'that', 'it', 'they', 'was', 'had']
she ['your', 'us', 'take', 'back', 'some', 's', 'may', 'go', 'let', 'which']
ulysses ['this', 'what', 'could', 'spoke', 'can', 'make', 'answered', 'made', 'hector', 'telemachus']
penelope ['find', 'let', 'than', 'think', 'noble', 'bring', 'back', 'himself', 'olympus', 'great']
achaeans ['among', 'gods', 'out', 'same', 'ships', 'city', 'where', 'house', 'gates', 'suitors']
trojans ['achaeans', 'ranks', 'rest', 'among', 'ground', 'others', 'suitors', 'island', 'dust', 'gates']
"""

"\npower transform 2 epochs\nhe ['i', 'you', 'as', 'him', 'not', 'that', 'it', 'they', 'was', 'had']\nshe ['your', 'us', 'take', 'back', 'some', 's', 'may', 'go', 'let', 'which']\nulysses ['this', 'what', 'could', 'spoke', 'can', 'make', 'answered', 'made', 'hector', 'telemachus']\npenelope ['find', 'let', 'than', 'think', 'noble', 'bring', 'back', 'himself', 'olympus', 'great']\nachaeans ['among', 'gods', 'out', 'same', 'ships', 'city', 'where', 'house', 'gates', 'suitors']\ntrojans ['achaeans', 'ranks', 'rest', 'among', 'ground', 'others', 'suitors', 'island', 'dust', 'gates']\n"

In [38]:
"""
power transform 2 epochs, neg loss
he ['you', 'i', 'it', 'cover', 'will', 'have', 'that', 'him', 'was', 'heal']
she ['ulysses', 'troy', 'did', 'long', 'jove', 'battle', 'very', 'put', 'her', 'think']
ulysses ['jove', 'be', 'battle', 'go', 'did', 'thus', 'after', 'think', 'friends', 'never']
penelope ['round', 'house', 'keep', 'host', 'strength', 'achaeans', 'presently', 'whom', 'servants', 'both']
achaeans ['into', 'house', 'other', 'gods', 'fire', 'both', 'city', 'host', 'king', 'back']
trojans ['into', 'other', 'about', 'themselves', 'away', 'sea', 'through', 'among', 'achaeans', 'back']"""

"\npower transform 2 epochs, neg loss\nhe ['you', 'i', 'it', 'cover', 'will', 'have', 'that', 'him', 'was', 'heal']\nshe ['ulysses', 'troy', 'did', 'long', 'jove', 'battle', 'very', 'put', 'her', 'think']\nulysses ['jove', 'be', 'battle', 'go', 'did', 'thus', 'after', 'think', 'friends', 'never']\npenelope ['round', 'house', 'keep', 'host', 'strength', 'achaeans', 'presently', 'whom', 'servants', 'both']\nachaeans ['into', 'house', 'other', 'gods', 'fire', 'both', 'city', 'host', 'king', 'back']\ntrojans ['into', 'other', 'about', 'themselves', 'away', 'sea', 'through', 'among', 'achaeans', 'back']"

In [39]:
"""
power transform 2 epochs, neg loss
he ['she', 'they', 'it', 'so', 'even', 'there', 'we', 'i', 'as', 'ulysses']
she ['he', 'they', 'ulysses', 'minerva', 'it', 'even', 'thus', 'achilles', 'there', 'much']
ulysses ['achilles', 'telemachus', 'minerva', 'heaven', 'hector', 'menelaus', 'apollo', 'this', 'jove', 'himself']
penelope ['eumaeus', 'already', 'juno', 'ill', 'sleep', 'kill', 'alone', 'antinous', 'return', 'alive']
achaeans ['gods', 'trojans', 'argives', 'city', 'suitors', 'danaans', 'other', 'wall', 'whole', 'men']
trojans ['city', 'sea', 'achaeans', 'argives', 'gods', 'wall', 'ground', 'other', 'suitors', 'whole']
"""

"\npower transform 2 epochs, neg loss\nhe ['she', 'they', 'it', 'so', 'even', 'there', 'we', 'i', 'as', 'ulysses']\nshe ['he', 'they', 'ulysses', 'minerva', 'it', 'even', 'thus', 'achilles', 'there', 'much']\nulysses ['achilles', 'telemachus', 'minerva', 'heaven', 'hector', 'menelaus', 'apollo', 'this', 'jove', 'himself']\npenelope ['eumaeus', 'already', 'juno', 'ill', 'sleep', 'kill', 'alone', 'antinous', 'return', 'alive']\nachaeans ['gods', 'trojans', 'argives', 'city', 'suitors', 'danaans', 'other', 'wall', 'whole', 'men']\ntrojans ['city', 'sea', 'achaeans', 'argives', 'gods', 'wall', 'ground', 'other', 'suitors', 'whole']\n"

In [40]:
"""
power transform 8 epochs, neg loss
he ['you', 'i', 'it', 'cover', 'will', 'have', 'that', 'him', 'was', 'heal']
she ['ulysses', 'troy', 'did', 'long', 'jove', 'battle', 'very', 'put', 'her', 'think']
ulysses ['jove', 'be', 'battle', 'go', 'did', 'thus', 'after', 'think', 'friends', 'never']
penelope ['round', 'house', 'keep', 'host', 'strength', 'achaeans', 'presently', 'whom', 'servants', 'both']
achaeans ['into', 'house', 'other', 'gods', 'fire', 'both', 'city', 'host', 'king', 'back']
trojans ['into', 'other', 'about', 'themselves', 'away', 'sea', 'through', 'among', 'achaeans', 'back']
"""

"\npower transform 8 epochs, neg loss\nhe ['you', 'i', 'it', 'cover', 'will', 'have', 'that', 'him', 'was', 'heal']\nshe ['ulysses', 'troy', 'did', 'long', 'jove', 'battle', 'very', 'put', 'her', 'think']\nulysses ['jove', 'be', 'battle', 'go', 'did', 'thus', 'after', 'think', 'friends', 'never']\npenelope ['round', 'house', 'keep', 'host', 'strength', 'achaeans', 'presently', 'whom', 'servants', 'both']\nachaeans ['into', 'house', 'other', 'gods', 'fire', 'both', 'city', 'host', 'king', 'back']\ntrojans ['into', 'other', 'about', 'themselves', 'away', 'sea', 'through', 'among', 'achaeans', 'back']\n"

In [41]:
"""
power transform 2 epochs, neg loss
he ['she', 'they', 'i', 'we', 'it', 'there', 'who', 'not', 'been', 'so']
she ['he', 'they', 'we', 'i', 'there', 'it', 'who', 'so', 'ever', 'never']
paris ['wold', 'immediate', 'france', 'spring', 'scorn', 'amazement', 'india', 'families', 'behalf', 'emphasis']
london ['themselves', 'death', 'itself', 'either', 'dinner', 'confidence', 'getting', 'waiting', 'purpose', 'law']
table ['fire', 'window', 'street', 'whole', 'chair', 'light', 'air', 'corner', 'glass', 'dark']
rare ['selfish', 'responsible', 'famous', 'convenient', 'evremonde', 'reward', 'learning', 'deaf', 'affliction', 'shy']
monday ['succeeding', 'musing', 'flowing', 'lightning', 'labouring', 'boxes', 'twist', 'yorkshire', 'subsequent', 'frost']
sunday ['trial', 'brothers', 'birds', 'gloomy', 'fields', 'song', 'cottage', 'instrument', 'aspect', 'pot']
man ['gentleman', 'lady', 'woman', 'time', 'young', 'house', 'day', 'little', 'old', 'room']
woman ['girl', 'gentleman', 'child', 'moment', 'voice', 'thing', 'boy', 'word', 'lady', 'morning']
king ['establishment', 'ship', 'beadle', 'roof', 'furniture', 'brothers', 'birds', 'news', 'worst', 'history']
queen ['bills', 'shivering', 'exertion', 'liquor', 'labour', 'horrible', 'dish', 'railway', 'monseigneur', 'painted']
boy ['child', 'captain', 'girl', 'name', 'doctor', 'friend', 'woman', 'mother', 'heart', 'father']
girl ['doctor', 'fellow', 'master', 'chapter', 'captain', 'person', 'child', 'matter', 'family', 'word']

"""

"\npower transform 2 epochs, neg loss\nhe ['she', 'they', 'i', 'we', 'it', 'there', 'who', 'not', 'been', 'so']\nshe ['he', 'they', 'we', 'i', 'there', 'it', 'who', 'so', 'ever', 'never']\nparis ['wold', 'immediate', 'france', 'spring', 'scorn', 'amazement', 'india', 'families', 'behalf', 'emphasis']\nlondon ['themselves', 'death', 'itself', 'either', 'dinner', 'confidence', 'getting', 'waiting', 'purpose', 'law']\ntable ['fire', 'window', 'street', 'whole', 'chair', 'light', 'air', 'corner', 'glass', 'dark']\nrare ['selfish', 'responsible', 'famous', 'convenient', 'evremonde', 'reward', 'learning', 'deaf', 'affliction', 'shy']\nmonday ['succeeding', 'musing', 'flowing', 'lightning', 'labouring', 'boxes', 'twist', 'yorkshire', 'subsequent', 'frost']\nsunday ['trial', 'brothers', 'birds', 'gloomy', 'fields', 'song', 'cottage', 'instrument', 'aspect', 'pot']\nman ['gentleman', 'lady', 'woman', 'time', 'young', 'house', 'day', 'little', 'old', 'room']\nwoman ['girl', 'gentleman', 'child',

In [42]:
"""
power transform 2 epochs, cross entropy loss
he ['she', 'they', 'i', 'it', 'we', 'there', 'never', 'topographical', 'ever', 'who']
she ['he', 'they', 'we', 'i', 'it', 'there', 'never', 'ever', 'who', 'much']
paris ['wold', 'france', 'mutton', 'chancery', 'coketown', 'advance', 'immediate', 'chesney', 'walks', 'despair']
london ['themselves', 'itself', 'death', 'dinner', 'speaking', 'several', 'either', 'ten', 'walking', 'immediately']
table ['window', 'fire', 'street', 'corner', 'whole', 'ground', 'light', 'glass', 'chair', 'wall']
rare ['severe', 'suspicious', 'trifling', 'distinction', 'unpleasant', 'emphatic', 'ale', 'fearful', 'failing', 'monstrous']
monday ['deck', 'peeping', 'tiptoe', 'kneeling', 'rushing', 'eastern', 'floating', 'shuddering', 'glittering', 'balls']
sunday ['blank', 'christmas', 'lonely', 'nights', 'travelling', 'notes', 'counter', 'porter', 'fields', 'french']
man ['gentleman', 'lady', 'woman', 'young', 'time', 'old', 'boy', 'little', 'house', 'day']
woman ['girl', 'gentleman', 'boy', 'child', 'lady', 'fellow', 'person', 'thing', 'moment', 'matter']
king ['establishment', 'inside', 'chief', 'ghost', 'roof', 'clerk', 'worst', 'bar', 'ship', 'furniture']
queen ['instruments', 'adventure', 'organ', 'glorious', 'scheme', 'estate', 'expense', 'list', 'footing', 'style']
boy ['child', 'girl', 'captain', 'doctor', 'woman', 'name', 'poor', 'fellow', 'matter', 'morning']
girl ['child', 'doctor', 'boy', 'fellow', 'matter', 'captain', 'woman', 'master', 'family', 'major']

"""

"\npower transform 2 epochs, cross entropy loss\nhe ['she', 'they', 'i', 'it', 'we', 'there', 'never', 'topographical', 'ever', 'who']\nshe ['he', 'they', 'we', 'i', 'it', 'there', 'never', 'ever', 'who', 'much']\nparis ['wold', 'france', 'mutton', 'chancery', 'coketown', 'advance', 'immediate', 'chesney', 'walks', 'despair']\nlondon ['themselves', 'itself', 'death', 'dinner', 'speaking', 'several', 'either', 'ten', 'walking', 'immediately']\ntable ['window', 'fire', 'street', 'corner', 'whole', 'ground', 'light', 'glass', 'chair', 'wall']\nrare ['severe', 'suspicious', 'trifling', 'distinction', 'unpleasant', 'emphatic', 'ale', 'fearful', 'failing', 'monstrous']\nmonday ['deck', 'peeping', 'tiptoe', 'kneeling', 'rushing', 'eastern', 'floating', 'shuddering', 'glittering', 'balls']\nsunday ['blank', 'christmas', 'lonely', 'nights', 'travelling', 'notes', 'counter', 'porter', 'fields', 'french']\nman ['gentleman', 'lady', 'woman', 'young', 'time', 'old', 'boy', 'little', 'house', 'day']