# proof of concept: шляпа
Играем в шляпу (один загадывает, остальные отгадывают)

Текущие особенности реализации:
- проверена только работа fasttext и только с помощью одним подходом к подбору слов для загадывания и отгадывания
- модели недотренированы
- тексты "грязные"
- слово "угадано", если загаданное слово составляет его часть (word in guess) -- переменная CRITERIA

In [1]:
import re
import requests
import warnings
from collections import namedtuple

import numpy as np
import pandas as pd
import fasttext
from sklearn.metrics.pairwise import cosine_similarity
from IPython.display import display

## Обучим несколько моделей 

In [2]:
%%time

file_path = '20-newsgroups/all_texts.preprocessed.txt'

model_skipgram = fasttext.train_unsupervised(file_path, model='skipgram', dim=5)
model_cbow = fasttext.train_unsupervised(file_path, model='cbow', dim=16)
model_skipgram2 = fasttext.train_unsupervised(file_path, model='skipgram', dim=10)

CPU times: user 15min 57s, sys: 7.74 s, total: 16min 5s
Wall time: 1min 41s


In [3]:
!wc -l {file_path}

 1719263 20-newsgroups/all_texts.preprocessed.txt


In [4]:
!head {file_path}


newsgroup: sci . electronics
document_id: 52434
from: et@teal . csn . org  ( eric h .  taylor ) 
subject: re: help_with_tracking_device

in article <00969fba . e640ff10@aesop . rutgers . edu> mcdonald@aesop . rutgers . edu writes:
>[ .  .  . ]
>there are a variety of water-proof housings i could use but the real meat
>of the problem is the electronics .  .  . hence this posting .   what kind of


In [5]:
len(model_cbow.words)

76430

In [6]:
model_cbow['song']

array([-1.7037996 ,  2.4888937 ,  2.6892757 ,  1.0378067 , -1.2333224 ,
        1.5700542 ,  1.1323178 , -3.8242252 , -2.200785  ,  1.6522602 ,
        0.13677068, -0.07873544,  1.2488995 ,  1.4893962 , -0.8431773 ,
       -0.41610107], dtype=float32)

In [7]:
!mkdir models
model_skipgram.save_model('models/skipgram.model')
model_skipgram2.save_model('models/skipgram2.model')
model_cbow.save_model('models/cbow.model')

mkdir: models: File exists


In [8]:
!ls -lh models

total 579968
-rw-r--r--  1 aguschin  staff   133M Sep 11 15:51 cbow.model
-rw-r--r--  1 aguschin  staff    42M Sep 11 15:51 skipgram.model
-rw-r--r--  1 aguschin  staff    83M Sep 11 15:51 skipgram2.model


## Реализации классов для игроков

In [9]:
class AbstractPlayer:
    def __init__(self):
        raise NotImplementedError()

    def explain(self, word, n_words):
        raise NotImplementedError()
        
    def guess(self, words, n_words):
        raise NotImplementedError()


if hasattr(model_skipgram2, 'get_nearest_neighbors'):
    print('using latest fasttext version')
    
    class LocalFasttextPlayer(AbstractPlayer):
        def __init__(self, model):
            self.model = model

        def find_words_for_sentence(self, sentence, n_closest):
            neighbours = self.model.get_nearest_neighbors(sentence)
            words = [word for similariry, word in neighbours][:n_closest]
            return words

        def explain(self, word, n_words):
            return self.find_words_for_sentence(word, n_words)

        def guess(self, words, n_words):
            return self.find_words_for_sentence(' '.join(words), n_words)
else:
    print('using older fasttext version')
    class LocalFasttextPlayer(AbstractPlayer):
        def __init__(self, model):
            self.model = model
            self.words = model.get_words()
            self.matrix = np.concatenate([model[word].reshape(1, -1) for word in self.words], axis=0)

        def find_words_for_vector(self, vector, n_closest):
            sims = cosine_similarity(vector.reshape(1, -1), self.matrix).ravel()
            word_sims = pd.Series(sims, index=self.model.get_words()).sort_values(ascending=False)
            return list(word_sims.head(n_closest).index)

        def find_words_for_sentence(self, sentence, n_closest):
            vector = self.model.get_sentence_vector(sentence)
            return self.find_words_for_vector(vector, n_closest)

        def explain(self, word, n_words):
            return self.find_words_for_sentence(word, n_words)

        def guess(self, words, n_words):
            return self.find_words_for_sentence(' '.join(words), n_words)


class RemotePlayer(AbstractPlayer):
    def __init__(self, url):
        self.url = url
        
    def explain(self, word, n_words):
        response = requests.get(self.url + '/explain', {'word': word, 'n_words': n_words})
        if response.status_code == 200:
            word_list = response.json()
        else:
            warnings.warn(f'request failed: {response.status_code}')
            word_list = []
        return word_list
    
    def guess(self, words, n_words):
        response = requests.get(self.url + '/guess', {'words': words, 'n_words': n_words})
        if response.status_code == 200:
            word_list = response.json()
        else:
            warnings.warn(f'request failed: {response.status_code}')
            word_list = []
        return word_list

using latest fasttext version


In [10]:
remote_player = RemotePlayer('https://obscure-everglades-02893.herokuapp.com')
# remote_player = RemotePlayer('http://127.0.0.1:5000')
print(remote_player.explain('zen', 10))
print(remote_player.guess(['zen', 'desk', 'word'], 5))

['zen', 'sin;', '>[i', 'lover', 'mad', 'rant', 'scorn', '>honestly', '*laugh*', 'forever']
['have:', '"boggs"', '>[lotsa', 'qualifier', '[now']


In [11]:
local_player = LocalFasttextPlayer(model_skipgram)
print(local_player.explain('zen', 10))
print(local_player.guess(['zen', 'desk', 'word'], 5))

['forever', '"dear', 'sin;', 'thine', '"someone', 'bear-stearns', 'insane', '"who', 'heartless', 'reminds']
['imagine-32', 'xrastool', 'xptablechildposition', 'in:', 'spx']


## Игра

In [17]:
class Game:
    def __init__(self, players, words, criteria, n_rounds, n_explain_words, n_guessing_words):
        assert len(players) >= 2
        assert criteria in ('hard', 'soft')
        self.players = players
        self.words = words
        self.criteria = criteria
        self.n_rounds = n_rounds
        self.n_explain_words = n_explain_words
        self.n_guessing_words = n_guessing_words
        
    def remove_repeated_words(self, words):
        unique_words = []
        for c in words:
            if not c in unique_words:
                unique_words.append(c)
        return unique_words
        
    def create_word_list(self, player, word, n_words):
        if self.criteria == 'hard':
            explain_words = explain_words[:n_words]
        explain_words = player.explain(word, n_words)
        explain_words = [re.sub(r'[^\w]', '', c) for c in explain_words]
        explain_words = [c for c in explain_words if word not in c]
        explain_words = self.remove_repeated_words(explain_words)
        if self.criteria == 'soft':
            explain_words = explain_words[:n_words]
        return explain_words
    
    def check_criteria(self, word, guessed_words):
        if self.criteria == 'soft':
            guessed = any([word in c for c in guessed_words])
        else:
            guessed = word in guessed_words
        return guessed
    
    def play_round(self, explaining_player, guessing_players, word, sentence, verbose=False):
        game_round = {}
        if verbose:
            print(f"HOST: {sentence}")
        game_round.update({f'Explanation for "{word}" ({explaining_player.name})': sentence})
        for player in guessing_players:
            guessed_words = player.api.guess(sentence, self.n_guessing_words)
            guessed = self.check_criteria(word, guessed_words)
            if guessed:
                guessing_players = [p for p in guessing_players if p != player]
            if verbose:
                print(f'GUESSING PLAYER ({player.name}) to HOST: {guessed_words}')
                print(f'HOST: {guessed}')
            game_round.update({f'Guess ({player.name})': guessed_words})
        return game_round, guessing_players

    def play(self, explaining_player, guessing_players, word, criteria, verbose=False):

        if verbose:
            print(f'HOST to EXPLAINING PLAYER: the word is "{word}"')

        guessing_by = self.create_word_list(explaining_player.api, word, self.n_explain_words)
        if verbose:
            print(f'PLAYER 1 to HOST: my wordlist is {guessing_by}')

        df = []
        for i in range(1, len(guessing_by) + 1):
            if len(guessing_players) == 0:
                break
            if verbose:
                print(f'\n===ROUND {i}===\n')
            game_round, guessing_players = self.play_round(
                explaining_player=explaining_player,
                guessing_players=guessing_players,
                word=word,
                sentence=guessing_by[:i],
                verbose=verbose
            )
            df.append(game_round)
        return pd.DataFrame(df)

    def run(self):
        np.random.shuffle(self.words)
        igame = 0
        for r in range(self.n_rounds):
            for explaining_player in self.players:
                guessing_players = [p for p in self.players if p != explaining_player]
                word = self.words[igame]
                igame += 1
                df = self.play(explaining_player, guessing_players, word, criteria=self.criteria, verbose=False)
                display(df)
            
        
player = namedtuple('Player', ['name', 'api'])

N_EXPLAIN_WORDS = 10
N_GUESSING_WORDS = 5
N_ROUNDS = 1
CRITERIA = 'soft'

PLAYERS = [
    player('skipgram team', RemotePlayer('https://obscure-everglades-02893.herokuapp.com')),
    player('skipgram2 team', LocalFasttextPlayer(model_skipgram2)),
    player('cbow team', LocalFasttextPlayer(model_cbow))
]

WORDS = [
    'play', 'master', 'word', 'cocoa', 'coffee',
    'september', 'jungle', 'spell', 'python',
    'cat', 'joy', 'sadness', 'small', 'stick'
]
# WORDS = ['september'] * 10

game = Game(PLAYERS, WORDS, CRITERIA, N_ROUNDS, N_EXPLAIN_WORDS, N_GUESSING_WORDS)
game.run()

Unnamed: 0,"Explanation for ""cat"" (skipgram team)",Guess (cbow team),Guess (skipgram2 team)
0,[surname],"[basename, >name, stat, rename, checkpoint]","[yip, enter:, >address, turn:, <----excuse]"
1,"[surname, hit]","[surname, row, lumme, stat, snatch]","[28%, yip, ]i, surname, [-h|-]"
2,"[surname, hit, y]","[surname, stat, lumme, iue, redin]","[28%, @u, que:, syd, lgtgah]"
3,"[surname, hit, y, m]","[surname, stat, lumme, iue, redin]","[28%, que:, @u, syd, lgtgah]"
4,"[surname, hit, y, m, r]","[surname, stat, lumme, iue, putnam]","[syd, 28%, @u, [-h|-, que:]"
5,"[surname, hit, y, m, r, gi]","[surname, greenleaf, green$$, putnam, edirne]","[[-h|-, 28%, que:, syd, <alt><f1>]"
6,"[surname, hit, y, m, r, gi, 9]","[surname, stat, iue, putnam, lumme]","[28%, que:, [-h|-, surname, @u]"
7,"[surname, hit, y, m, r, gi, 9, ]","[surname, stat, iue, lumme, putnam]","[@u, [-h|-, que:, 28%, surname]"
8,"[surname, hit, y, m, r, gi, 9, , 4]","[surname, stat, iue, lumme, putnam]","[@u, [-h|-, 28%, que:, surname]"


Unnamed: 0,"Explanation for ""word"" (skipgram2 team)",Guess (cbow team),Guess (skipgram team)
0,[incorrectly],"[correctly, explicitely, #however, explictly, ...","[incorrectly, know;, whenever, >>>does, anyway-]"
1,"[incorrectly, incorrect]","[incorrect"", incorrect, incorrect;, correct, ""...","[have;, ""values"", #your, technically-hip, corr..."
2,"[incorrectly, incorrect, see]","[incorrect"", incorrect;, incorrect, correct;, ...","[yours, question:, ""cure"", ""m"", technically-hip]"
3,"[incorrectly, incorrect, see, bible]","[incorrect"", incorrect, incorrect;, incorrectl...","[sourse, question, word, what, too""]"
4,"[incorrectly, incorrect, see, bible, problem]","[incorrect"", incorrect, incorrect;, mention;, ...",
5,"[incorrectly, incorrect, see, bible, problem, ...","[>interpretation, ""original"", misinterpretatio...",
6,"[incorrectly, incorrect, see, bible, problem, ...","[>interpretation, >description, ""original"", mi...",
7,"[incorrectly, incorrect, see, bible, problem, ...","[>interpretation, >context, interpretation, bi...",


Unnamed: 0,"Explanation for ""spell"" (cbow team)",Guess (skipgram team),Guess (skipgram2 team)
0,[guesser],"[guesser, gekko, ios, ""m, ""catch]","[ahold, s**t, wasnt, hopalonga, dice]"
1,"[guesser, looks]","[trivia:, >btw>>, axer, >mom, umbs]","[:no, labelled, job"", *by, [tt+=-_]]"
2,"[guesser, looks, gobbledygook]","[-when, *vomit*, >rumor, _am_, >btw>>]","[soem, #$^&, gobbledygook, ayoob, wsidom]"
3,"[guesser, looks, gobbledygook, doncha]","[-when, >rumor, *vomit*, >>]i, _am_]","[jurassic, wsidom, soem, gobbledygook, ayoob]"
4,"[guesser, looks, gobbledygook, doncha, reardon]","[misspelt, >>]i, ^~, dweeb, doncha]","[wsidom, gotti, -teddy, >>mark, ""close]"
5,"[guesser, looks, gobbledygook, doncha, reardon...","[-ed, soem, jurassic, ^~, b^]","[wsidom, gotti, >>mark, -teddy, donoghue]"
6,"[guesser, looks, gobbledygook, doncha, reardon...","[b^, fermented, jurassic, jokes:, dweeb]","[wsidom, gotti, -teddy, }>}so, :-]]"
7,"[guesser, looks, gobbledygook, doncha, reardon...","[n>crap, soem, -ed, cornflakes, wrung]","[wsidom, gotti, -teddy, donoghue, doolittle]"
8,"[guesser, looks, gobbledygook, doncha, reardon...","[soem, wrung, jurassic, delhi, -ed]","[wsidom, -teddy, gotti, donoghue, doolittle]"
