In [14]:
import xml.etree.ElementTree as ET

In [126]:
def Verdict_to_list(dict_name):
    """Make Verdict dictionary machine readable"""
    d = open(dict_name, 'r', encoding = 'utf-8').read()
    lines = d.split('\n')
    words_with_infa = []
    for i in lines:
        words_with_infa.append(i.split('\t'))
    return words_with_infa[:-1]
    
def Verdict_translate_adjectives(adjs_source, dict_name):
    """Translate adjectives from one lng to another with Verdict dictionary"""
    d = Verdict_to_list(dict_name)
    adjs_target = set()
    for adj in adjs_source:
        for word in d:
            if (adj in word[2]) and (word[1] == 'Прилагательное'):
                adjs_target.add(word[0])
    return adjs_target

def Verdict_translate_nouns(nouns_source, dict_name, word_to_word = True):
    """Translate nouns from one lng to another with Verdict dictionary"""
    """If word_to_word parameter is True than returns all possible translates of nouns due to Veridct"""
    d = Verdict_to_list(dict_name)
    nouns_target = [''] * len(nouns_source)
    for i in range(len(nouns_source)):
        flag = False
        if word_to_word == True:
            for word in d:
                if (nouns_source.split(' ')[0] == word[2]) and (word[1] == 'Существительное'):
                    nouns_target[i] = word[0]
                    flag = True
                if flag == False:
                    if nouns_source[i].split(' ')[0] in word[2].split(' ') and (word[1] == 'Существительное'):
                        nouns_target[i] = word[0]
        else:
            nouns_target.append({})
            nouns_target[i][nouns_source[i]] = []
            for words in words:
                if (nouns_source[i].split(' ')[0] == word[2]) and (word[1] == 'Существительное'):
                    nouns_target[i][nouns_source[i]].append(word[0])
                    flag = True
                if flag == False:
                    if nouns_source[i].split(' ')[0] in word[2].split(' ') and (word[1] == 'Существительное'):
                        nouns_target[i][nouns_source[i]].append(word[0])
    return nouns_target

def metrics_accuracy(word_predict, words):
    print(len(words_predict & words), 'нужных прилагательных:', words_predict & words)
    print(len(words_predict - words), 'ненужных прилагательных:', words_predict - words)
    
def parser(file_name):
    tree = ET.parse(file_name)
    root = tree.getroot()
    return root

def adj_translate_freedict(adjs_source, root):
    output = set()
    for entry in root.iter('entry'):
        if entry[0][0].text in adjs_source:
            for sense in entry.iter('sense'):
                if sense.get('n') == '1' or len(sense.attrib) == 0:
                    for quote in sense.iter('quote'):
                        output.add(quote.text)
    return output

def nom_translate_freedict(nouns_source, root):
    output = []
    for i in range(0, len(nouns_source)):
        a = []
        for entry in root.iter('entry'):
            if entry[0][0].text == nouns_source[i]:
                for sense in entry.iter('sense'):
                    if sense.get('n') == '1' or len(sense.attrib) == 0:
                        for quote in sense.iter('quote'):
                            a.append(quote.text)
        output.append(a)
    return output

def nom_back_translate_Free(original, translated, root):
    output = []
    for i in range(0, len(translated)):
        word = ''
        for j in range(0, len(translated[i])):
            for entry in root.iter('entry'):
                if entry[0][0].text == translated[i][j]:
                    for sense in entry.iter('sense'):
                        if sense.get('n') == '1' or len(sense.attrib) == 0:
                            for quote in sense.iter('quote'):
                                if quote.text == original[i]:
                                    word = entry[0][0].text
        output.append(word)
    return output

def adj_back_translate_Free(original, translated, root):
    """обратный перевод прилагательных с помощью FreeDict, учитывает все слова в первом значении и не удаляет невстретившиеся"""
    output = translated.copy()
    for word in translated:
        a = set()
        for entry in root.iter('entry'):
            if entry[0][0].text == word:
                for sense in entry.iter('sense'):
                    if sense.get('n') == '1' or len(sense.attrib) == 0:
                        for quote in sense.iter('quote'):
                            a.add(quote.text)
        if len(a) != 0 and len(set(original)&a) == 0:
            output.remove(word)
    return output


def freedict_holes(Verd, Free):
    for i in range(0, len(Free)):
        if Free[i] == '' and Verd[i] != '':
            Free[i] = Verd[i]
    return Free

def translate_adjs_with_Freedict(dict_source_to_target, dict_target_to_source, adjs_source):
    root_source_to_target = parser(dict_source_to_target)
    root_target_to_source = parser(dict_target_to_source)
    return adj_back_translate_Free(adjs_source, adj_translate_freedict(adjs_source, root_source_to_target), root_target_to_source)

def translate_nouns_with_Freedict(dict_source_to_target, dict_target_to_source, nouns_source):
    root_source_to_target = parser(dict_source_to_target)
    root_target_to_source = parser(dict_target_to_source)
    return nouns_back_translate_Free(nouns_source, nouns_translate_freedict(nouns_source, root_source_to_target), root_target_to_source)

In [137]:
def translate_adjs_with_Freedict_and_Verdict(Freedict_source_to_target, Freedict_target_to_source, adjs_source_en, adjs_source_ru, Verdict_name):
    adjs_Freedict = translate_adjs_with_Freedict(Freedict_source_to_target, Freedict_target_to_source, adjs_source_en)
    adjs_Verdict = Verdict_translate_adjectives(adjs_source_ru, Verdict_name)
    return adjs_Freedict | adjs_Verdict

def translate_nouns_with_Freedict_and_Verdict(Freedict_source_to_target, Freedict_target_to_source, nouns_source, Verdict_name):
    nouns_Freedict = translate_nouns_with_Freedict(Freedict_source_to_target, Freedict_target_to_source, nouns_source)
    nouns_Verdict = Verdict_translate_nouns(nouns_source, Verdict_name)
    nouns_target = freedict_holes(nouns_Verdict, nouns_Freedict)
    return nouns_target