# proof of concept: шляпа
Играем в шляпу (один загадывает, остальные отгадывают)

Текущие особенности реализации:
- проверена только работа fasttext и только с помощью одним подходом к подбору слов для загадывания и отгадывания
- модели недотренированы
- тексты "грязные"
- слово "угадано", если загаданное слово составляет его часть (word in guess) -- переменная CRITERIA

In [1]:
import re
import requests
import warnings
from collections import namedtuple

import numpy as np
import pandas as pd
import fasttext
from sklearn.metrics.pairwise import cosine_similarity
from IPython.display import display

## Обучим несколько моделей 

In [2]:
%%time

file_path = '20-newsgroups/all_texts.preprocessed.txt'

model_skipgram = fasttext.train_unsupervised(file_path, model='skipgram', dim=5)
model_cbow = fasttext.train_unsupervised(file_path, model='cbow', dim=16)
model_skipgram2 = fasttext.train_unsupervised(file_path, model='skipgram', dim=10)

CPU times: user 12min 32s, sys: 4.11 s, total: 12min 36s
Wall time: 1min 16s


In [3]:
!wc -l {file_path}

 1719263 20-newsgroups/all_texts.preprocessed.txt


In [4]:
!head {file_path}


newsgroup: sci . electronics
document_id: 52434
from: et@teal . csn . org  ( eric h .  taylor ) 
subject: re: help_with_tracking_device

in article <00969fba . e640ff10@aesop . rutgers . edu> mcdonald@aesop . rutgers . edu writes:
>[ .  .  . ]
>there are a variety of water-proof housings i could use but the real meat
>of the problem is the electronics .  .  . hence this posting .   what kind of


In [5]:
len(model_cbow.words)

76430

In [6]:
model_cbow['song']

array([-2.1965523 ,  1.9783983 ,  2.7871406 ,  0.8564429 , -1.15721   ,
        1.1739218 , -0.14424281, -3.0943472 , -2.3887136 ,  1.0926764 ,
       -0.15824847,  0.1018669 ,  1.3388399 ,  1.9259685 , -1.4959589 ,
        0.11113074], dtype=float32)

In [7]:
!mkdir models
model_skipgram.save_model('models/skipgram.model')
model_skipgram2.save_model('models/skipgram2.model')
model_cbow.save_model('models/cbow.model')

mkdir: models: File exists


In [8]:
!ls -lh models

total 579968
-rw-r--r--  1 aguschin  staff   133M Sep  8 14:37 cbow.model
-rw-r--r--  1 aguschin  staff    42M Sep  8 14:37 skipgram.model
-rw-r--r--  1 aguschin  staff    83M Sep  8 14:37 skipgram2.model


## Реализации классов для игроков

In [9]:
class AbstractPlayer:
    def __init__(self):
        raise NotImplementedError()

    def explain(self, word, n_words):
        raise NotImplementedError()
        
    def guess(self, words, n_words):
        raise NotImplementedError()


if hasattr(model_skipgram2, 'get_nearest_neighbors'):
    print('using latest fasttext version')
    
    class LocalFasttextPlayer(AbstractPlayer):
        def __init__(self, model):
            self.model = model

        def find_words_for_sentence(self, sentence, n_closest):
            neighbours = self.model.get_nearest_neighbors(sentence)
            words = [word for similariry, word in neighbours][:n_closest]
            return words

        def explain(self, word, n_words):
            return self.find_words_for_sentence(word, n_words)

        def guess(self, words, n_words):
            return self.find_words_for_sentence(' '.join(words), n_words)
else:
    print('using older fasttext version')
    class LocalFasttextPlayer(AbstractPlayer):
        def __init__(self, model):
            self.model = model
            self.words = model.get_words()
            self.matrix = np.concatenate([model[word].reshape(1, -1) for word in self.words], axis=0)

        def find_words_for_vector(self, vector, n_closest):
            sims = cosine_similarity(vector.reshape(1, -1), self.matrix).ravel()
            word_sims = pd.Series(sims, index=self.model.get_words()).sort_values(ascending=False)
            return list(word_sims.head(n_closest).index)

        def find_words_for_sentence(self, sentence, n_closest):
            vector = self.model.get_sentence_vector(sentence)
            return self.find_words_for_vector(vector, n_closest)

        def explain(self, word, n_words):
            return self.find_words_for_sentence(word, n_words)

        def guess(self, words, n_words):
            return self.find_words_for_sentence(' '.join(words), n_words)


class RemotePlayer(AbstractPlayer):
    def __init__(self, url):
        self.url = url
        
    def explain(self, word, n_words):
        response = requests.get(self.url + '/explain', {'word': word, 'n_words': n_words})
        if response.status_code == 200:
            word_list = response.json()
        else:
            warnings.warn(f'request failed: {response.status_code}')
            word_list = []
        return word_list
    
    def guess(self, words, n_words):
        response = requests.get(self.url + '/guess', {'words': words, 'n_words': n_words})
        if response.status_code == 200:
            word_list = response.json()
        else:
            warnings.warn(f'request failed: {response.status_code}')
            word_list = []
        return word_list

using latest fasttext version


In [10]:
remote_player = RemotePlayer('https://obscure-everglades-02893.herokuapp.com')
# remote_player = RemotePlayer('http://127.0.0.1:5000')
print(remote_player.explain('zen', 10))
print(remote_player.guess(['zen', 'desk', 'word'], 5))

['zen', 'sin;', '>[i', 'lover', 'mad', 'rant', 'scorn', '>honestly', '*laugh*', 'forever']
['have:', '"boggs"', '>[lotsa', 'qualifier', '[now']


In [11]:
local_player = LocalFasttextPlayer(model_skipgram)
print(local_player.explain('zen', 10))
print(local_player.guess(['zen', 'desk', 'word'], 5))

['>[i', 'sin;', 'forever', '"aunt', 'wiccan', 'rant', 'damns', 'insane', 'before:', 'thine']
['spx', 'suggest:', 'sunlight', 'umbdr522', 'in:']


## Игра

In [13]:
class Game:
    def __init__(self, players, words, criteria, n_rounds, n_explain_words, n_guessing_words):
        assert len(players) >= 2
        assert criteria in ('hard', 'soft')
        self.players = players
        self.words = words
        self.criteria = criteria
        self.n_rounds = n_rounds
        self.n_explain_words = n_explain_words
        self.n_guessing_words = n_guessing_words
        
    def remove_repeated_words(self, words):
        unique_words = []
        for c in words:
            if not c in unique_words:
                unique_words.append(c)
        return unique_words
        
    def create_word_list(self, player, word, n_words):
        if self.criteria == 'hard':
            explain_words = explain_words[:n_words]
        explain_words = player.explain(word, n_words)
        explain_words = [re.sub(r'[^\w]', '', c) for c in explain_words]
        explain_words = [c for c in explain_words if word not in c]
        explain_words = self.remove_repeated_words(explain_words)
        if self.criteria == 'soft':
            explain_words = explain_words[:n_words]
        return explain_words

    def play(self, explaining_player, guessing_players, word, criteria, verbose=False):

        if verbose:
            print(f'HOST to EXPLAINING PLAYER: the word is "{word}"')

        guessing_by = self.create_word_list(explaining_player.api, word, N_EXPLAIN_WORDS)
        if verbose:
            print(f'PLAYER 1 to HOST: my wordlist is {guessing_by}')

        df = []
        for i in range(1, len(guessing_by) + 1):
            if len(guessing_players) == 0:
                break
            game_round = {}
            if verbose:
                print(f'\n===ROUND {i}===\n')
            sentence = guessing_by[:i]
            if verbose:
                print(f"HOST: {sentence}")
            game_round.update({f'Explanation for "{word}" ({explaining_player.name})': sentence})
            for player in guessing_players:
                guessed_words = player.api.guess(sentence, N_GUESSING_WORDS)
                if criteria == 'soft':
                    guessed = any([word in c for c in guessed_words])
                else:
                    guessed = word in guessed_words
                if guessed:
                    guessing_players = [p for p in guessing_players if p != player]
                if verbose:
                    print(f'GUESSING PLAYER ({player.name}) to HOST: {guessed_words}')
                    print(f'HOST: {guessed}')
                game_round.update({f'Guess ({player.name})': guessed_words})
            df.append(game_round)
        return pd.DataFrame(df)

    def run(self):
        np.random.shuffle(self.words)
        igame = 0
        for r in range(self.n_rounds):
            for explaining_player in self.players:
                guessing_players = [p for p in self.players if p != explaining_player]
                word = self.words[igame]
                igame += 1
                df = self.play(explaining_player, guessing_players, word, criteria=self.criteria, verbose=False)
                display(df)
            
        
player = namedtuple('Player', ['name', 'api'])

N_EXPLAIN_WORDS = 10
N_GUESSING_WORDS = 5
N_ROUNDS = 1
CRITERIA = 'soft'

PLAYERS = [
    player('skipgram team', RemotePlayer('https://obscure-everglades-02893.herokuapp.com')),
    player('skipgram2 team', LocalFasttextPlayer(model_skipgram2)),
    player('cbow team', LocalFasttextPlayer(model_cbow))
]

WORDS = [
    'play', 'master', 'word', 'cocoa', 'coffee',
    'september', 'jungle', 'spell', 'python',
    'cat', 'joy', 'sadness', 'small', 'stick'
]
# WORDS = ['september'] * 10

game = Game(PLAYERS, WORDS, CRITERIA, N_ROUNDS, N_EXPLAIN_WORDS, N_GUESSING_WORDS)
game.run()

Unnamed: 0,"Explanation for ""play"" (skipgram team)",Guess (cbow team),Guess (skipgram2 team)
0,[suckers],"[swimmers, burners, muggers, wohlers, bimmers]","[>cubs, kettle, bandwagon, snuck, >go]"
1,"[suckers, walks]","[fuhr-bashers, strangers, knuckles, left-winge...","[blowout, cross-checking, suckers, >cubs, bummin]"
2,"[suckers, walks, comeback]","[comeback, horseback, >shortstops, watchit, ho...","[bummin, slugger, kettle, >cubs, >>season]"
3,"[suckers, walks, comeback, nmmthe]","[tradgedy, gummint, >shortstops, dole, coaches]","[>grew, kettle, bummin, >cubs, sushi]"
4,"[suckers, walks, comeback, nmmthe, hammer]","[toughen, shitty, homesick, dickhead, budweiser]","[sushi, chernobyl, blowout, >grew, bummin]"


Unnamed: 0,"Explanation for ""stick"" (skipgram2 team)",Guess (cbow team),Guess (skipgram team)
0,[jiggled],"[leaned, stowed, climbed, drooled, clamped]","[jiggled, welded, thumbs, 50+mph, 35%]"
1,"[jiggled, chopped]","[skipped, shopped, calmed, bopped, chopped]","[*still*, >tires, >nine, elevators, cut]"
2,"[jiggled, chopped, stomping]","[sparkling, splashing, wafffling, digging, pun...","[>tires, f*cked, bottled, sport-bike, *up*]"
3,"[jiggled, chopped, stomping, luckily]","[scratchy, downhill, sparkling, downing, >back]","[elevators, squat, *still*, ""down, straight]"
4,"[jiggled, chopped, stomping, luckily, cloves]","[stoufflet, puppet, shx, scratchy, walk]","[*still*, elevators, straight, squat, >playing]"
5,"[jiggled, chopped, stomping, luckily, cloves, ...","[galloping, shear, tooke, skipped, shx]","[elevators, *still*, stick, >revolver, squat]"
6,"[jiggled, chopped, stomping, luckily, cloves, ...","[shx, waaaay, scratchy, stoufflet, bummer]",
7,"[jiggled, chopped, stomping, luckily, cloves, ...","[scratchy, touchdown, downhill, >back, flurry]",
8,"[jiggled, chopped, stomping, luckily, cloves, ...","[scratchy, stoufflet, scratch, shx, >flash]",
9,"[jiggled, chopped, stomping, luckily, cloves, ...","[stoufflet, scratchy, shx, waaaay, touchdown]",


Unnamed: 0,"Explanation for ""cocoa"" (cbow team)",Guess (skipgram team),Guess (skipgram2 team)
0,[coy],"[coy, brader, gaijin@ale, shaig@composer, heaton]","[rlm@helen, surfcty, laquey, paeth, mech]"
1,"[coy, shield]","[reprint, ""partition, denon, zildjian, peri[jo...","[sandwich, cutlass, piloting, lubbock, vascar]"
2,"[coy, shield, poer]","[pinnacle, >votes, in-between, fbi;, amphiboly]","[|than, pallas, sandwich, tinseltown, no-brainer]"
3,"[coy, shield, poer, steet]","[hydroxyl, vascar, ""14-2"", blizzard, >rules]","[|than, piloting, sandwich, siphoned, pallas]"
4,"[coy, shield, poer, steet, panic]","[omelette, stamped, corrallary, >delivered, >>...","[|than, |>on, jking, panacea, artisitc]"
5,"[coy, shield, poer, steet, panic, swiss]","[crafty, pocono, tofranil, *unconfirmed*, puppy]","[|than, smallish, eves, dusk, ^~]"
6,"[coy, shield, poer, steet, panic, swiss, brims...","[tofranil, *unconfirmed*, crafty, inquired, helm]","[|than, smallish, dusk, pallas, caelum]"
7,"[coy, shield, poer, steet, panic, swiss, brims...","[headbangers, opps, rippling;, >week, eyesore]","[|than, pallas, water-head, tinseltown, seront]"
8,"[coy, shield, poer, steet, panic, swiss, brims...","[>week, rippling;, month;, eyesore, tofranil]","[|than, tinseltown, pallas, >cam, sapphire]"
9,"[coy, shield, poer, steet, panic, swiss, brims...","[squirrel, particular--have, anchor, torso, >`]","[|than, pallas, tinseltown, seront, caelum]"
