# 5 букв
Приложуха для игры в "5 букв" тинькоф
Задача - за 6 ходов отгадать существительное из 5 букв
Вторая версия - автоматическая


## Служебные ячейки
Их нужно запустить, иначе работать не будет

In [1]:
# Imports

import pandas as pd
import re
# from collections import Counter

In [3]:
# Functions

# function to read external source of words
def get_words (url, letters = 5):
    ''' parse local or web-based txt o csv with nouns. Has 2 variables:
    url - e.g. https://raw.githubusercontent.com/Harrix/Russian-Nouns/main/dist/russian_nouns.txt 
    letters - int, 5 by default '''

    df = pd.read_csv(url, names=["noun"]) # read vocabulaty

    df = df.loc[df['noun'].str.len() == letters] # filter 5 letters only
    # s = pd.Series(df['noun'])
    return pd.Series(df['noun'])

# finctions to get frequencies of the words 

def get_rus_nouns_freq(words):
    '''get russina words freq from НКРЯ-based dict http://dict.ruslang.ru/freq.php from local file'''
    Path = 'src\\freqrnc2011.csv'
    WordFreq = pd.read_csv(Path, sep="\t")
    WordFreq = WordFreq[WordFreq['PoS'] == "s"].loc[ :,['Lemma','Freq(ipm)']]
    
    sf = pd.DataFrame(words)
    sf = sf.merge(WordFreq, how='left', left_on='noun', right_on='Lemma')
    return sf.loc[:, ['noun','Freq(ipm)']].sort_values(by=['Freq(ipm)'],ascending=False)

# function to count letter freq in dictionary - returns data frame
def letter_freq(words):
    ''' returns frequencies of letters in passed pandas/numPy series of words '''
    AllWords = "".join(str(i) for i in words)
    # print(AllWords)
    LettersCount = {}
    for i in AllWords:
        LettersCount[i] = LettersCount.get(i,0) + 1
        # print(i, LettersCount.get(i,0))
    # print(LettersCount)
    LettFreq = pd.DataFrame.from_dict(LettersCount, orient="index",columns=['freq']).reset_index().sort_values(by = 'freq', ascending=False) #calculate letters frequency
    # print(LettFreq)
    # LettFreq = LettFreq.sort_values(by = 'freq', ascending=False)
    # print(LettFreq)
    # return LettersCount
    return LettFreq

# function to count letter freq in dictionary - returns dicstionary

def letter_freq_dict(words):
    ''' returns dictionary with frequencies of letters in passed pandas/numPy series of words '''
    AllWords = "".join(str(i) for i in words)
    # print(AllWords)
    LettersCount = {}
    for i in AllWords:
        LettersCount[i] = LettersCount.get(i,0) + 1
        # print(i, LettersCount.get(i,0))
    # print(LettersCount)
    # LettFreq = pd.DataFrame.from_dict(LettersCount, orient="index",columns=['freq']).reset_index().sort_values(by = 'freq', ascending=False) #calculate letters frequency
    # print(LettFreq)
    # LettFreq = LettFreq.sort_values(by = 'freq', ascending=False)
    # print(LettFreq)
    return LettersCount


# Function to get top letters in dictionary

def top_letters (words, topammount=10):
    '''Returns string of lwtters with top frequencies in passed pandas/numPy series of words
        topammount - number of letters (10 by default)'''
    LettersCount = letter_freq_dict(words)
    df = pd.DataFrame.from_dict(LettersCount, orient="index",columns=['freq']).reset_index().sort_values(by = 'freq', ascending=False)
    TopLetters = df['index'][0:topammount].to_string(index=False).replace('\n','')
    return TopLetters

# Function to get one word with most frequent letters

def first_word(words):
    '''function that generates first word with most frequent letters in passed pandas/numPy series of words'''

    TopLeters = top_letters(words) # get letters with highest frequencies
    # WordSeries = pd.Series()
    WordSeries = words[words.str.match( r'(^[{}]+$)'.format(TopLeters)) == True] #filter words contains only top letters
    WordSeries = WordSeries[WordSeries.str.match( r'^(?:([а-я])(?!.*\1))*$') ==True  ] #filter out words with repeating letters
    WordSeries = pd.DataFrame(WordSeries) #convert do Data Frame
    
    #Calculate frequencies of all letters in a word

    # Set temp variables
    WordLettFreq = 0
    WordLettFreqTotal = []
    FreqDict = letter_freq_dict(words) 
    for i in WordSeries['noun']: #cycle through table to get each word
        for l in i:              #cycle through letters in word to get letters grequency summ
            WordLettFreq = WordLettFreq + FreqDict.get(l)
        # print(i,a)
        WordLettFreqTotal.append(WordLettFreq)

    WordSeries['freq'] = WordLettFreqTotal #add word letters frequencies to the table


    return WordSeries.loc[WordSeries['freq'].idxmax()]['noun']  # get the word with largest freq summ


# finction to create mask ouptut string
def create_mask_inst(letter, mask, mask_type):
    ''' 
    create a value for a word mask
    letter - current processed letter
    cu
    types: 
    1 - правильная буква в правильном месте
    2 - правильная буква в неправильном месте
    3 - непавильная буква
    '''
    # d = dict({"1":"","2":"","3":})
    # d = dict()

    if mask_type == "1":
        return letter
    elif mask_type == "2":
        if mask == ".":
            return "[^{}]".format(letter)
        else:
            return "[^{}{}]".format(mask[2:-1],letter)
    
    else:
        return "."

# function that parse responce and return compiled regex

def parse_response2(word, response, source =("", "", [".",".",".",".","."]) ):
    '''Return regex: mask, bad letters, good letters'''
    
    # print(source)
    yes_letters = source[0]
    no_letters = source[1]
    known_word = source[2]
    # print(known_word)
    # print(word)
    LetterNum = 0  #counter for position in a response
    for i in response:
        # print (i)
        LetterNum = LetterNum + 1
        # print(LetterNum)
        l = word[LetterNum-1]
        # print(l)
        known_word[LetterNum-1] = create_mask_inst(l,known_word[LetterNum-1],i )    #create mask
        if i == "3":                                                                #add letters to list of wrong letters
            no_letters = no_letters + l
        elif i == "2":
            yes_letters = yes_letters + word[LetterNum-1]                           #add letters to list of exact letters
    
    return yes_letters, no_letters, known_word

# function that returns a worda to regex

def get_candidate2 (words, source):
    '''Returns single word that matches passed conditions.
        source variable shoult contain 3 strings for regex:
            words - series of words (pandas series)
            regex - list of compiled regex
            word_number - number of words to show, 1 by default
        '''
    
    yes_letters = source[0]
    no_letters = source[1]
    known_word = source[2]

    known_word = "".join(map(str,known_word))
    reg_word = re.compile( r'^{}$'.format(known_word),re.VERBOSE | re.IGNORECASE)   #regex for word mask

    reg_isnot = re.compile( r'(^[^{}]+$)'.format(no_letters),re.VERBOSE | re.IGNORECASE)    #regex for letters not in word


    good_letters_seq = str()                                                        #cycle to create source string for a regex for lettters in a word
    for i in yes_letters:
        # print(i)
        good_letters_seq = good_letters_seq+"(?=.*"+i+")"                           

    reg_isin = re.compile( r"{}".format(good_letters_seq), re.VERBOSE | re.IGNORECASE)  #regex for letters in a word

    regex = (reg_word, reg_isnot, reg_isin)                                             #group resulting regex
    
    candidates = words
    for i in regex:                                                                     #filter dictionary with regex
        candidates = candidates[candidates.str.match(i)== True]
    return candidates.iloc[0]

# function that parse responce and return compiled regex

def parse_response(word, response, yes_letters = "", no_letters = "",known_word = [".",".",".",".","."] ):
    '''Return regex: mask, bad letters, good letters'''
    LetterNum = 0  #counter for position in a response
    for i in response:
        LetterNum = LetterNum + 1
        l = word[LetterNum-1]
        known_word[LetterNum-1] = create_mask_inst(l,known_word[LetterNum-1],i )    #create mask
        if i == "3":                                                                #add letters to list of wrong letters
            no_letters = no_letters + l
        elif i == "2":
            yes_letters = yes_letters + word[LetterNum-1]                           #add letters to list of exact letters
    

    known_word = "".join(map(str,known_word))
    reg_word = re.compile( r'^{}$'.format(known_word),re.VERBOSE | re.IGNORECASE)   #regex for word mask

    reg_isnot = re.compile( r'(^[^{}]+$)'.format(no_letters),re.VERBOSE | re.IGNORECASE)    #regex for letters not in word


    good_letters_seq = str()                                                        #cycle to create source string for a regex for lettters in a word
    for i in yes_letters:
        # print(i)
        good_letters_seq = good_letters_seq+"(?=.*"+i+")"                           

    reg_isin = re.compile( r"{}".format(good_letters_seq), re.VERBOSE | re.IGNORECASE)  #regex for letters in a word

    return reg_word, reg_isnot, reg_isin

# function that returns a worda to regex

def get_candidate (words, regex, word_number = 1):
    '''Returns words that matches passed regex.
        words - series of words
        regex - list of compiled regex
        word_number - number of words to show, 1 by default
        '''
    candidates = words
    for i in regex:
        candidates = candidates[candidates.str.match(i)== True]
    return candidates.head(word_number)


In [82]:
#

# load nouns library
# source: https://github.com/Harrix/Russian-Nouns/tree/main/dist


Url = "https://raw.githubusercontent.com/Harrix/Russian-Nouns/main/dist/russian_nouns.txt"
s = get_words(Url)
FirstWord = first_word(s) # initial word for a game
print(FirstWord)


трико


In [74]:

Moves = 0
CurrentWord = FirstWord
r = ("", "", [".",".",".",".","."])
while Moves < 6:
    Moves += 1
    Response = input()
    if Response == "11111":
        break
    r = parse_response2(CurrentWord, Response,r)
    CurrentWord = get_candidate2(s, r)
    print(CurrentWord)



print(FirstWord)
Response = "33333"
r = parse_response2(FirstWord,Response)
r
SecondWord = get_candidate2(s,r)
SecondWord

трико


'абзац'

In [77]:
r

('', 'трико', ['.', '.', '.', '.', '.'])

In [34]:
create_mask_inst("d",r[2][1],"2")

'[^d]'

In [78]:
Response = "33333"

r2 = parse_response2(SecondWord,Response, source = r )
r2
ThirdWord = get_candidate2(s,r2)
ThirdWord

'ведун'

In [5]:
print(FirstWord)
Response = "33333"
r = parse_response(FirstWord,Response)
get_candidate(s,r)

трико


16    абзац
Name: noun, dtype: object

In [81]:
Moves = 0
CurrentWord = FirstWord
r = ("", "", [".",".",".",".","."])
while Moves < 6:
    Moves += 1
    print(Moves)
    Response = input()
    r = parse_response2(CurrentWord, Response,r)
    CurrentWord = get_candidate2(s, r)
    print(CurrentWord)


0
абзац
0
абзац
0


IndexError: string index out of range