# Generator prototypes for user collections

In [22]:
bag_of_words = [
    'aragorn',
    'bilbo',
    'baggins',
    'boromir',
    'eowyn',
    'faramir',
    'frodo',
    'galadriel',
    'gandalf',
    'gollum',
    'legolas',
    'pippin',
    'samwise',
    'saruman',
    'sauron',
    'smaug'
]

I got the from [wiki](https://en.wikipedia.org/wiki/List_of_Middle-earth_characters). To be honest, I cherry-picked them, taking only those which I heard of :D

Also, not all the words are covered in the w2v version I am using, for example I tried adding Theoden, but there is no such word in the word2vec :(

## Word2Vec

In [59]:
import random
from collections import defaultdict
from copy import copy
from operator import itemgetter


import gensim.downloader
from gensim.models import Word2Vec, KeyedVectors

from pprint import pprint

In [3]:
external_model_path = gensim.downloader.load('glove-twitter-200', return_path=True)
w2v = KeyedVectors.load_word2vec_format(external_model_path)

### Possible strategies

#### Sampling positives for empty input

In [28]:
# empty input
positives = random.sample(bag_of_words, k=3)
print(positives)
w2v.most_similar(positive=positives)

['pippin', 'saruman', 'frodo']


[('gandalf', 0.7178667783737183),
 ('sauron', 0.6690242886543274),
 ('legolas', 0.6201995015144348),
 ('aragorn', 0.6091588735580444),
 ('gollum', 0.5979213118553162),
 ('boromir', 0.5526005029678345),
 ('baggins', 0.5374102592468262),
 ('arwen', 0.5329122543334961),
 ('gimli', 0.5312138795852661),
 ('tyrion', 0.5231751799583435)]

#### All words as positives for empty input

In [48]:
# empty input
positives = copy(bag_of_words)
print(positives)
w2v.most_similar(positive=positives)

['aragorn', 'bilbo', 'baggins', 'boromir', 'eowyn', 'faramir', 'frodo', 'galadriel', 'gandalf', 'gollum', 'legolas', 'pippin', 'samwise', 'saruman', 'sauron', 'smaug']


[('arwen', 0.694290816783905),
 ('gimli', 0.6431220173835754),
 ('thranduil', 0.6006397008895874),
 ('thorin', 0.5981868505477905),
 ('tauriel', 0.5963426828384399),
 ('smeagol', 0.5961745977401733),
 ('tyrion', 0.596038818359375),
 ('lotr', 0.5953423380851746),
 ('tywin', 0.5831868648529053),
 ('javert', 0.581065833568573)]

#### Sampling words for input with weights

In [49]:
# non-empty input
input_name = 'meadows'
positives = [(name, 0.25) for name in random.sample(bag_of_words, k=2)]
print(positives)
w2v.most_similar(positive=positives + [(input_name, 0.5)])

[('baggins', 0.25), ('sauron', 0.25)]


[('shire', 0.5350311994552612),
 ('greenway', 0.5055404305458069),
 ('meadow', 0.489088773727417),
 ('racetrack', 0.4845874011516571),
 ('frodo', 0.47548049688339233),
 ('ridge', 0.46909940242767334),
 ('grove', 0.4554486572742462),
 ('tavern', 0.449339359998703),
 ('hills', 0.4483398497104645),
 ('bilbo', 0.44480064511299133)]

In [55]:
# non-empty input
input_name = 'bow'
positives = [(name, 0.25) for name in random.sample(bag_of_words, k=2)]
print(positives)
w2v.most_similar(positive=positives + [(input_name, 0.5)])

[('gandalf', 0.25), ('boromir', 0.25)]


[('bows', 0.5288364887237549),
 ('legolas', 0.5241125226020813),
 ('bandana', 0.5125737190246582),
 ('tie', 0.509882390499115),
 ('headband', 0.5009905099868774),
 ('cross', 0.49508795142173767),
 ('aragorn', 0.4888427257537842),
 ('rings', 0.48588308691978455),
 ('frodo', 0.4845319092273712),
 ('lord', 0.48201265931129456)]

Those above definitely weren't the first generated suggestions, `legolas` appeared only after gendalf did.

So maybe we should think about sampling more iconic names, which we can detect by measuring average distance with all the others. At least it was my hypothesis, and it turned out to be true for this example. This way the selected words may provide more leaning towards the topic. It could also work the other way, so this is a field for experimenting.

In [60]:
scores = defaultdict(list)
for word in bag_of_words:
    for word2 in bag_of_words:
        if word == word2: continue
            
        scores[word].append(w2v.similarity(word, word2))

avg_scores = {word: np.mean(scores) for word, scores in scores.items()}
pprint(sorted(avg_scores.items(), key=itemgetter(1), reverse=True))

[('gandalf', 0.512717),
 ('frodo', 0.50498706),
 ('aragorn', 0.47652453),
 ('legolas', 0.47038412),
 ('gollum', 0.4569007),
 ('saruman', 0.4318012),
 ('sauron', 0.42934752),
 ('galadriel', 0.41067117),
 ('boromir', 0.3977174),
 ('bilbo', 0.38802972),
 ('baggins', 0.38202074),
 ('pippin', 0.3767573),
 ('faramir', 0.3521212),
 ('smaug', 0.35127968),
 ('eowyn', 0.31487483),
 ('samwise', 0.31416324)]


#### All the words as positives

In [68]:
# non-empty input
input_name = 'meadows'
positives = [(name, 0.6 / len(bag_of_words)) for name in bag_of_words]
print(positives)
w2v.most_similar(positive=positives + [(input_name, 0.4)])

[('aragorn', 0.0375), ('bilbo', 0.0375), ('baggins', 0.0375), ('boromir', 0.0375), ('eowyn', 0.0375), ('faramir', 0.0375), ('frodo', 0.0375), ('galadriel', 0.0375), ('gandalf', 0.0375), ('gollum', 0.0375), ('legolas', 0.0375), ('pippin', 0.0375), ('samwise', 0.0375), ('saruman', 0.0375), ('sauron', 0.0375), ('smaug', 0.0375)]


[('shire', 0.5253511071205139),
 ('thranduil', 0.5192374587059021),
 ('tywin', 0.508065402507782),
 ('meadow', 0.4798588752746582),
 ('fassbender', 0.4752791225910187),
 ('hedlund', 0.4702310860157013),
 ('primrose', 0.46888819336891174),
 ('hitchcock', 0.46849772334098816),
 ('claflin', 0.4677443504333496),
 ('arwen', 0.46700409054756165)]

In [70]:
# non-empty input
input_name = 'bow'
positives = [(name, 0.6 / len(bag_of_words)) for name in bag_of_words if not name == 'legolas']
print(positives)
w2v.most_similar(positive=positives + [(input_name, 0.4)])

[('aragorn', 0.0375), ('bilbo', 0.0375), ('baggins', 0.0375), ('boromir', 0.0375), ('eowyn', 0.0375), ('faramir', 0.0375), ('frodo', 0.0375), ('galadriel', 0.0375), ('gandalf', 0.0375), ('gollum', 0.0375), ('pippin', 0.0375), ('samwise', 0.0375), ('saruman', 0.0375), ('sauron', 0.0375), ('smaug', 0.0375)]


[('legolas', 0.5950618982315063),
 ('arwen', 0.5251522660255432),
 ('rings', 0.5174819231033325),
 ('bows', 0.5130518674850464),
 ('tyrion', 0.49903130531311035),
 ('headband', 0.49528932571411133),
 ('lord', 0.49409282207489014),
 ('lotr', 0.4822024703025818),
 ('bling', 0.4780886173248291),
 ('ring', 0.47767069935798645)]

In [71]:
# non-empty input
input_name = 'bow'
positives = [(name, 0.6 / len(bag_of_words)) for name in bag_of_words]
print(positives)
w2v.most_similar(positive=positives + [(input_name, 0.4)])

[('aragorn', 0.0375), ('bilbo', 0.0375), ('baggins', 0.0375), ('boromir', 0.0375), ('eowyn', 0.0375), ('faramir', 0.0375), ('frodo', 0.0375), ('galadriel', 0.0375), ('gandalf', 0.0375), ('gollum', 0.0375), ('legolas', 0.0375), ('pippin', 0.0375), ('samwise', 0.0375), ('saruman', 0.0375), ('sauron', 0.0375), ('smaug', 0.0375)]


[('arwen', 0.5389956831932068),
 ('rings', 0.5178107023239136),
 ('tyrion', 0.509496808052063),
 ('bows', 0.503146767616272),
 ('lotr', 0.49924659729003906),
 ('hobbit', 0.4895883798599243),
 ('headband', 0.4891901910305023),
 ('lord', 0.4891796410083771),
 ('thranduil', 0.48799988627433777),
 ('smeagol', 0.48408451676368713)]

It works pretty great, but needed some weights tuning, and I'm not entirely sure it will work that way for the other topics, maybe they need different weights. So it's a field to research too!

W2V also does not generate the names, we have inserted as positives, so I needed to remove the `legolas` from positives to get it, but I think we can try to create a work around this in implementation. We need to be sure this will work well, though.

`arwen` is also a good suggestions coming from the w2v, as she is the one legolas falls in love with in Hobbit films. And she shoots from the bow too. If I remember it right :)

Basically adding all the names seems to be more stable, at least for a small number of words.

#### Adding negatives for the most similar words with input to prevent from obvious suggestions

In [77]:
# non-empty input
input_name = 'meadows'
positives = [(name, 0.6 / len(bag_of_words)) for name in bag_of_words]
print(positives)
w2v.most_similar(positive=positives + [(input_name, 0.4)], negative=[('meadow', 0.1),
                                                                     ('greenway', 0.1),
                                                                     ('ridge', 0.1),
                                                                     ('grove', 0.1)])

[('aragorn', 0.0375), ('bilbo', 0.0375), ('baggins', 0.0375), ('boromir', 0.0375), ('eowyn', 0.0375), ('faramir', 0.0375), ('frodo', 0.0375), ('galadriel', 0.0375), ('gandalf', 0.0375), ('gollum', 0.0375), ('legolas', 0.0375), ('pippin', 0.0375), ('samwise', 0.0375), ('saruman', 0.0375), ('sauron', 0.0375), ('smaug', 0.0375)]


[('creek', 0.5875210762023926),
 ('hills', 0.5491596460342407),
 ('hill', 0.533150851726532),
 ('mckinley', 0.5321338772773743),
 ('greenwood', 0.5306721329689026),
 ('oaks', 0.5208395719528198),
 ('primrose', 0.5163178443908691),
 ('salem', 0.5148237347602844),
 ('brook', 0.5136245489120483),
 ('valley', 0.5130434036254883)]

In [78]:
w2v.most_similar('meadows')

[('meadow', 0.5784050226211548),
 ('greenway', 0.560979962348938),
 ('ridge', 0.5486966371536255),
 ('grove', 0.5073105692863464),
 ('hills', 0.49844449758529663),
 ('creek', 0.498390793800354),
 ('racetrack', 0.4840754568576813),
 ('richland', 0.47176942229270935),
 ('greenwood', 0.4684500992298126),
 ('lancaster', 0.46673813462257385)]

Doesn't seem to work at all! :)

And it has too complicated logic, not explainable for sure, I just had an idea and tried out :D

In [79]:
# non-empty input
input_name = 'meadows'
positives = [(name, 0.6 / len(bag_of_words)) for name in bag_of_words]
print(positives)
w2v.most_similar(positive=positives + [(input_name, 0.4)], negative=[('meadow', 0.01),
                                                                     ('greenway', 0.01),
                                                                     ('ridge', 0.01),
                                                                     ('grove', 0.01)])

[('aragorn', 0.0375), ('bilbo', 0.0375), ('baggins', 0.0375), ('boromir', 0.0375), ('eowyn', 0.0375), ('faramir', 0.0375), ('frodo', 0.0375), ('galadriel', 0.0375), ('gandalf', 0.0375), ('gollum', 0.0375), ('legolas', 0.0375), ('pippin', 0.0375), ('samwise', 0.0375), ('saruman', 0.0375), ('sauron', 0.0375), ('smaug', 0.0375)]


[('shire', 0.5264033079147339),
 ('thranduil', 0.5127642154693604),
 ('tywin', 0.5043323040008545),
 ('primrose', 0.47820809483528137),
 ('hedlund', 0.4708022475242615),
 ('hitchcock', 0.4665117859840393),
 ('fassbender', 0.4655265510082245),
 ('claflin', 0.46373462677001953),
 ('steed', 0.46329405903816223),
 ('wayland', 0.46223318576812744)]

For smaller weights it seems to have almost no effect, only removing the negative words from the suggestions, which we can do by hand without applying some weird strategies.

### Research

#### Inter-domain similarities

In [39]:
w2v.similarity('legolas', 'arrow')

0.2811519

In [40]:
w2v.similarity('legolas', 'bow')

0.21900098

In [41]:
w2v.similarity('legolas', 'bowman')

0.10048075

In [37]:
w2v.similarity('shire', 'meadows')

0.31756023

In [38]:
w2v.similarity('rohan', 'horse')

0.06317839

In [45]:
w2v.similarity('shire', 'frodo')

0.39044446

In [46]:
w2v.similarity('shire', 'hobbit')

0.3783192

#### Adjectives' similarities

In [80]:
w2v.similarity('legolas', 'accurate')

0.08787117

In [81]:
w2v.similarity('legolas', 'brave')

0.13840021

In [85]:
w2v.similarity('legolas', 'handsome')

0.23254476

In [86]:
w2v.similarity('legolas', 'elf')

0.31121325

## Adjectives

In [88]:
# took them from here -> https://grammar.yourdictionary.com/parts-of-speech/adjectives/list-of-adjective-words.html
all_adjectives = \
['adorable', 'adventurous', 'aggressive', 'agreeable', 'alert', 'alive', 'amused', 'angry', 'annoyed', 
 'annoying', 'anxious', 'arrogant', 'ashamed', 'attractive', 'average', 'awful', 'bad', 'beautiful', 
 'better', 'bewildered', 'black', 'bloody', 'blue', 'blue-eyed', 'blushing', 'bored', 'brainy', 'brave', 
 'breakable', 'bright', 'busy', 'calm', 'careful', 'cautious', 'charming', 'cheerful', 'clean', 'clear', 
 'clever', 'cloudy', 'clumsy', 'colorful', 'combative', 'comfortable', 'concerned', 'condemned', 'confused', 
 'cooperative', 'courageous', 'crazy', 'creepy', 'crowded', 'cruel', 'curious', 'cute', 'dangerous', 'dark', 
 'dead', 'defeated', 'defiant', 'delightful', 'depressed', 'determined', 'different', 'difficult', 'disgusted', 
 'distinct', 'disturbed', 'dizzy', 'doubtful', 'drab', 'dull', 'eager', 'easy', 'elated', 'elegant', 'embarrassed',
 'enchanting', 'encouraging', 'energetic', 'enthusiastic', 'envious', 'evil', 'excited', 'expensive', 'exuberant',
 'fair', 'faithful', 'famous', 'fancy', 'fantastic', 'fierce', 'filthy', 'fine', 'foolish', 'fragile', 'frail', 
 'frantic', 'friendly', 'frightened', 'funny', 'gentle', 'gifted', 'glamorous', 'gleaming', 'glorious', 'good', 
 'gorgeous', 'graceful', 'grieving', 'grotesque', 'grumpy', 'handsome', 'happy', 'healthy', 'helpful', 'helpless', 
 'hilarious', 'homeless', 'homely', 'horrible', 'hungry', 'hurt', 'ill', 'important', 'impossible', 'inexpensive', 
 'innocent', 'inquisitive', 'itchy', 'jealous', 'jittery', 'jolly', 'joyous', 'kind', 'lazy', 'light', 'lively', 
 'lonely', 'long', 'lovely', 'lucky', 'magnificent', 'misty', 'modern', 'motionless', 'muddy', 'mushy', 
 'mysterious', 'nasty', 'naughty', 'nervous', 'nice', 'nutty', 'obedient', 'obnoxious', 'odd', 'old-fashioned', 
 'open', 'outrageous', 'outstanding', 'panicky', 'perfect', 'plain', 'pleasant', 'poised', 'poor', 'powerful', 
 'precious', 'prickly', 'proud', 'putrid', 'puzzled', 'quaint', 'real', 'relieved', 'repulsive', 'rich', 'scary', 
 'selfish', 'shiny', 'shy', 'silly', 'sleepy', 'smiling', 'smoggy', 'sore', 'sparkling', 'splendid', 'spotless', 
 'stormy', 'strange', 'stupid', 'successful', 'super', 'talented', 'tame', 'tasty', 'tender', 'tense', 'terrible', 
 'thankful', 'thoughtful', 'thoughtless', 'tired', 'tough', 'troubled', 'ugliest', 'ugly', 'uninterested', 
 'unsightly', 'unusual', 'upset', 'uptight', 'vast', 'victorious', 'vivacious', 'wandering', 'weary', 'wicked', 
 'wide-eyed', 'wild', 'witty', 'worried', 'worrisome', 'wrong', 'zany', 'zealous']

In [90]:
adjectives = [adj for adj in all_adjectives if adj in w2v]
len(adjectives), len(all_adjectives)

(228, 228)

In [91]:
def get_3_best_adjectives(word: str) -> list[str]:
    scores = {adj: w2v.similarity(word, adj) for adj in adjectives}
    return sorted(adjectives, key=lambda x: scores[x], reverse=True)[:3]

In [93]:
for word in bag_of_words:
    print(f'{word: >15} -> {get_3_best_adjectives(word)}')

        aragorn -> ['obedient', 'misty', 'glorious']
          bilbo -> ['victorious', 'long', 'grumpy']
        baggins -> ['agreeable', 'cooperative', 'wandering']
        boromir -> ['misty', 'obedient', 'defiant']
          eowyn -> ['misty', 'disturbed', 'obedient']
        faramir -> ['bewildered', 'putrid', 'obedient']
          frodo -> ['misty', 'grumpy', 'adorable']
      galadriel -> ['zealous', 'misty', 'obedient']
        gandalf -> ['misty', 'dark', 'angry']
         gollum -> ['precious', 'grotesque', 'misty']
        legolas -> ['adorable', 'misty', 'charming']
         pippin -> ['jolly', 'nutty', 'misty']
        samwise -> ['obedient', 'courageous', 'defiant']
        saruman -> ['bewildered', 'misty', 'unsightly']
         sauron -> ['misty', 'unsightly', 'grotesque']
          smaug -> ['misty', 'dead', 'mysterious']


Those are not the best suggestions, but they definitely have good examples, like:

* glorious aragorn
* precious/grotesque gollum
* charming legolas
* jolly/nutty pippin
* courageous samwise
* bewildered/unslightly saruman
* mysterious smaug

And we also have some strange examples:

* long bilbo
* grumpy frodo
* dark/angry gandalf :D

This generator prototype seems to be an interesting enhancement for the empty input with somewhat smaller weight than others.

I am not sure how to use it with input words, the only way I can think of is using it without the bag of words, just for the input word to find a corresponding adjectives. Another way would be to extract all the adjectives from the bag of words and use them with input if that is noun.

We could also use this for suggesting names to add to the collection. For example, we process the wikipedia, extract all the names and also pass them through a few generators. This way we will not only suggest the raw names, but already processed ones.

### Substring Matching

We could use the bag of words as the input to substring match generator to get new suggestions for empty input.

The other way around is we can extract affixes, remove the popular ones, so only the topic-related will stay, and then we can use them for the input words, but I am not sure how to compose them well with the input.

In [94]:
with open('../../data/suggestable_domains.csv', 'r', encoding='utf-8') as f:
    domains = f.read().strip('\n').split('\n')

In [95]:
def substring_match(word: str) -> list[str]:
    return [line for line in domains if word in line]

In [96]:
pprint(substring_match('legolas'))

['-legolas.eth,0.695852534562212,taken',
 'legendlegolas.eth,0.9285714285721828,taken',
 'legolasgreenleaf.eth,0.9285714285714477,taken',
 '🧝\u200d♂legolas.eth,0.33410138248847926,taken',
 '0xlegolas.eth,0.7649769585253455,taken',
 'legolasofthewoodlandrealm.eth,0.9285714285714286,taken',
 '🧝🏻\u200d♂legolas.eth,0.3317972350230415,taken',
 'legolas.eth,0.9285714913851298,available',
 '🧝🏼\u200d♂legolas.eth,0.3317972350230415,taken',
 'legolasexchange.eth,0.9285714285747219,available',
 '_legolas.eth,0.9101382488479263,taken',
 'legolas-exchange.eth,0.6774193548387097,available']


In [97]:
pprint(substring_match('gandalf'))

['gandalf.eth,0.9285715041313891,taken',
 'blockchaingandalf.eth,0.9285714285714286,taken',
 '$gandalf.eth,0.33870967741935487,taken',
 'btcgandalf.eth,0.9285714285714616,taken',
 'gandalfcrypto.eth,0.9285714285715281,taken',
 'gandalf🧙\u200d♂.eth,0.33410138248847926,taken',
 'gandalftherich.eth,0.9285714285715743,on_sale',
 'gandalf868.eth,0.7626728110599078,taken',
 'neongandalf.eth,0.9285714285716994,taken',
 'greygandalf.eth,0.9285714285723679,taken',
 'gandalfthejedi.eth,0.9285714285714315,taken',
 'gandalfthegrey.eth,0.9285714285714501,taken',
 'gandalfthewhite.eth,0.928571428571635,taken',
 'gandalfthegrail.eth,0.9285714285714295,taken',
 'gandalfeducator.eth,0.9285714285716153,taken',
 'gandalfthegwei.eth,0.9285714285714286,taken',
 'gandalf1818.eth,0.76036866359447,taken',
 'gandalfgreyhame.eth,0.9285714285714286,taken',
 'gandalfmd.eth,0.9285714285743195,taken',
 'gandalf-the-rich.eth,0.6774193548387097,taken',
 'pinkgandalf.eth,0.9285714285735718,taken',
 'ethgandalf.eth,0.9

`gandalfthegrey`, `gandalfwhite`, `gandalfwizard`, `gandalfthejedi` xd, `gandalf🧙\u200d♂`, `greygandalf` - those are interesting and related affixes. If we have the way to compose them nicely with the input names, then it may be quite cool!

For this one specifically we could somehow :) detect, that `gendalf` is a name, so these affixes could get used in the `PersonNameGenerator` producing suggestions like `josiahthebrown`, `josiahthewizard` instead of boring and topic-unrelated affixes.

In [98]:
pprint(substring_match('frodo'))

['frodohobbs.eth,0.9285714285715324,taken',
 'masterfrodo.eth,0.9285714285750503,on_sale',
 'cryptofrodo.eth,0.928571428571541,taken',
 'frodozee.eth,0.9285714285714928,taken',
 'drunkfrodo.eth,0.928571428572046,taken',
 'frodon.eth,0.9285714285744536,on_sale',
 'frodonsacquet.eth,0.9285714285714286,taken',
 '_frodo.eth,0.9147465437788018,taken',
 'frodobagless.eth,0.9285714285714288,taken',
 'alfrodo.eth,0.928571428578812,taken',
 'frodobalings.eth,0.9285714285714286,taken',
 'frodobolson.eth,0.9285714285714286,taken',
 'frodolives.eth,0.9285714285739415,on_sale',
 'frodooo.eth,0.928571428572022,taken',
 'forfrodo.eth,0.9285714289196829,on_sale',
 'frodoholder.eth,0.9285714285724552,taken',
 'frodokem.eth,0.9285714285714286,taken',
 'frodobagner.eth,0.9285714285714286,taken',
 'frodo.eth,0.9285715148708166,taken',
 'frododickbags.eth,0.9285714285714286,taken',
 '0xfrodo.eth,0.7695852534562212,taken',
 'frodobagners.eth,0.9285714285714286,taken',
 'frodos.eth,0.9285714286057772,taken',

In [99]:
pprint(substring_match('gollum'))

['gollumnft.eth,0.9285714285714286,taken',
 'gollums.eth,0.9285714285867164,on_sale',
 '_gollum.eth,0.912442396313364,taken',
 '0xgollum.eth,0.7672811059907835,taken',
 'gollumvault.eth,0.9285714285715748,taken',
 'gollum.eth,0.9285714683464906,taken',
 'ensgollum.eth,0.928571428571449,taken',
 'mrgollum.eth,0.9285714285729352,taken']


In [100]:
pprint(substring_match('sauron'))

['theeyeofsauron.eth,0.9285714285714304,taken',
 'eyeofsauron.eth,0.9285714285714519,taken',
 'metasauron.eth,0.9285714285717702,taken',
 'sonsofsauron.eth,0.9285714285714597,taken',
 'greateyeofsauron.eth,0.9285714285714286,taken',
 '0xsauron.eth,0.7672811059907835,taken',
 'sauron.eth,0.9285714647432534,taken',
 '_sauron.eth,0.912442396313364,taken']


I really like `eyeofsauron` :)

The biggest challenge with all of this is to extract those affixes wisely. In case of all affixes we could select them by hand, here we would need to not only make sure they are not boring like those for `gollum`, but also filter out all the meaningless, like the ones generated for `frodo`.

# Topics / Meta-topics

For now I don't have any utterly new suggestions, but only to bias the W2V by the words related to the topic.