# General preparations

In [1]:
from __future__ import annotations
import codecs
from collections import namedtuple
from enum import Enum
from collections import defaultdict
import re
import random

WordRecord = namedtuple("WordRecord", ["defaultform", "data", "traits"])

PhraseWordRecord = namedtuple("PhraseWordRecord", ["partofspeech", "traits"])

class Animate(Enum):
    Animate = 0
    Inanimate = 1
    def convert(string: str) -> Animate:
        switcher = {
            'anim': Animate.Animate,
            'nanim': Animate.Inanimate
        }
        return switcher[string]
    
class Gender(Enum):
    Male = 0
    Female = 1
    Neuter = 2
    def convert(string: str) -> Gender:
        switcher = {
            'M': Gender.Male,
            'F': Gender.Female,
            'N': Gender.Neuter
        }
        return switcher[string]
    
class Tense(Enum):
    Present = 0
    Past = 1
    Future = 2
    def convert(string: str) -> Gender:
        switcher = {
            'present': Tense.Present,
            'past': Tense.Past,
            'future': Tense.Future
        }
        return switcher[string]
    
class Perfectivity(Enum):
    Perfect = 0
    Imperfect = 1
    def convert(string: str) -> Perfectivity:
        switcher = {
            'perf': Perfectivity.Perfect,
            'imperf': Perfectivity.Imperfect
        }
        return switcher[string]
    
class PartOfSpeech(Enum):
    Noun = 0
    Pronoun = 1
    Adjective = 2
    Verb = 3
    def convert(string: str) -> PartOfSpeech:
        switcher = {
            'N': PartOfSpeech.Noun,
            'Pr': PartOfSpeech.Pronoun,
            'Adj': PartOfSpeech.Adjective,
            'V': PartOfSpeech.Verb
        }
        return switcher[string]
    
class Quantity(Enum):
    Singular = 0
    Plural = 1
    def convert(string: str) -> Quantity:
        switcher = {
            'sing': Quantity.Singular,
            'pl': Quantity.Plural
        }
        return switcher[string]

class Case(Enum):
    Nominative = 0
    Genitive = 1
    Dative = 2
    Accusative = 3
    Instrumental = 4
    Prepositional = 5
    def convert(string: str) -> Case:
        switcher = {
            'nom': Case.Nominative,
            'gen': Case.Genitive,
            'dat': Case.Dative,
            'acc': Case.Accusative,
            'inst': Case.Instrumental,
            'prep': Case.Prepositional
        }
        return switcher[string]

class PhraseHead(Enum):
    Head = 0
    Slave = 1
    def convert(string: str) -> Case:
        switcher = {
            'head': PhraseHead.Head,
            'slave': PhraseHead.Slave
        }
        return switcher[string]
    
class Person(Enum):
    P1 = 0
    P2 = 1
    P3 = 2
    def convert(string: str) -> Person:
        switcher = {
            '1P': Person.P1,
            '2P': Person.P2,
            '3P': Person.P3
        }
        return switcher[string]
    
class Transitivity(Enum):
    Transitive = 0
    Intransitive = 1
    def convert(string: str) -> Transitivity:
        switcher = {
            'tr': Transitivity.Transitive,
            'intr': Transitivity.Intransitive
        }
        return switcher[string]
    
cases = [Case.Nominative, Case.Genitive, Case.Dative, Case.Accusative, Case.Instrumental,
         Case.Prepositional]
persons = [Person.P1, Person.P2, Person.P3]
genders = [Gender.Male, Gender.Female, Gender.Neuter]
wordTraits = [Animate, Gender, Tense, Perfectivity, Quantity, Case, PhraseHead, Person, Transitivity]
partsOfSpeech = [PartOfSpeech.Noun, PartOfSpeech.Pronoun, PartOfSpeech.Adjective, PartOfSpeech.Verb]
quantities = [Quantity.Singular, Quantity.Plural]
perfTenses = [Tense.Past, Tense.Future]
imperfTenses = [Tense.Past, Tense.Present]
animacy = [Animate.Animate, Animate.Inanimate]
perfectivities = [Perfectivity.Perfect, Perfectivity.Imperfect]
transitivities = [Transitivity.Transitive, Transitivity.Intransitive]

startingNT = "S"

# imports contents of dictionary and splits it into lines
def loadFromFile(path):
    with codecs.open(path, 'r', 'utf-8') as dct:
        recs = dct.read().splitlines()
        return recs
    
def parseTrait(tr):
    for traitType in wordTraits:
        try:
            return (traitType, traitType.convert(tr))
        except KeyError:
            continue

def parseTraits(trs):
    res = dict(zip(wordTraits, [None] * len(wordTraits)))
    if len(trs) > 2:
        for trait in trs[1:-1].split(';'):
            trType, val = parseTrait(trait)
            res[trType] = val
    return res

# Loading dictionary

## Parsing

### Custom parsers

#### Nouns

In [2]:
# simple case parser
def parseCases(caseString):
    splitStr = caseString[1:-1].split('|')
    if len(splitStr) == 1:
        return []
    if len(splitStr) < 6:
        raise ValueError("Invalid case data", splitStr)
    return dict(zip(cases, splitStr))

# noun parser
def parseNounRec(splitRec):
    try:
        return WordRecord(splitRec[0], {Quantity.Singular: parseCases(splitRec[1]),
                              Quantity.Plural: parseCases(splitRec[2])}, parseTraits(splitRec[3]))
    except ValueError:
        raise ValueError("Failed to import entry:", splitRec[0])

#### Adjectives

In [3]:
# case parser for male/plural forms of adjectives (with respect to animacy)
def parseAnimCases(caseString):
    temp = caseString[1:-1].split('|')
    temp[3] = temp[3].split(';')
    resDict = dict(zip(cases, temp))
    resDict[Case.Accusative] = {Animate.Animate: temp[3][0], Animate.Inanimate: temp[3][1]}
    return resDict

# case parser for adjectives (with respect to genders)
def parseAdjSingCases(caseString):
    gens = caseString[1:-1].split(' ')
    resCases = defaultdict(Gender)
    for caseStringGen in gens:
        tmp = caseStringGen.split(':')
        gend = Gender.convert(tmp[0])
        if gend == Gender.Male:
            resCases[gend] = parseAnimCases(tmp[1])
        else:
            resCases[gend] = parseCases(tmp[1])
    return resCases

# adjective parser
def parseAdjRec(splitRec):
    return WordRecord(splitRec[0], {Quantity.Singular: parseAdjSingCases(splitRec[1]),
                       Quantity.Plural: parseAnimCases(splitRec[2])}, parseTraits(splitRec[3]))

#### Verbs

In [4]:
def parsePastPersonalForms(formString):
    temp = formString[1:-1].split('|')
    if len(temp) == 3:
        return dict(zip(genders, temp))
    else:
        return temp[0]

def parsePersonalForms(formString):
    temp = formString[1:-1].split('|')
    return dict(zip(persons, temp))

def parseVerbForms(formString):
    tenses = formString[1:-1].split(' ')
    resForms = defaultdict(Tense)
    for formString in tenses:
        tmp = formString.split(':')
        tense = Tense.convert(tmp[0])
        if tense == Tense.Past:
            resForms[tense] = parsePastPersonalForms(tmp[1])
        else:
            resForms[tense] = parsePersonalForms(tmp[1])
    return resForms

def perfectivityParser(data):
    temp = data[1:-1].split('|')
    return Perfectivity.convert(temp[0])
        
def parseVerbRec(splitRec):
    res = WordRecord(splitRec[0], {Quantity.Singular: parseVerbForms(splitRec[1]),
                                     Quantity.Plural: parseVerbForms(splitRec[2])}, parseTraits(splitRec[3]))
    return res

#### Pronouns

In [5]:
def parse3rdPerson(caseString):
    res = defaultdict(Gender)
    gens = caseString[1:-1].split(' ')
    if len(gens) == 1:
        return []
    for gen in gens:
        genCases = gen.split(':')
        res[Gender.convert(genCases[0])] = parseCases(genCases[1])
    return res

def parsePronounRec(splitRec):
    traits = parseTraits(splitRec[3])
    if traits[Person] == Person.P3:
        return WordRecord(splitRec[0], {Quantity.Singular: parse3rdPerson(splitRec[1]),
                           Quantity.Plural: parseCases(splitRec[2])}, traits)
    return WordRecord(splitRec[0], {Quantity.Singular: parseCases(splitRec[1]),
                       Quantity.Plural: parseCases(splitRec[2])}, traits)

### Master parser function

In [6]:
parsers = [parseNounRec, parsePronounRec, parseAdjRec, parseVerbRec]
partOfSpeechParsers = dict(zip(partsOfSpeech, parsers))

def parseRecord(rec):
    splitRec = rec.split('\t')
    #try:
    partofspeech = PartOfSpeech.convert(splitRec[0])
    return (partofspeech, partOfSpeechParsers[partofspeech](splitRec[1:]))
    #except KeyError:
    #    print('Entry will be skipped')
    #return None

# Loading rules

## Loading & parsing phrase structure: words and their traits

In [7]:
def parseRightParts(rightPart):
    res = []
    for part in rightPart.split('|'):
        splitPart = part.split(' ')
        curWords = []
        for _part in splitPart:
            rec = _part.split('-')
            traits = []
            if len(rec) > 1:
                traits = rec[1]
            curWords.append(PhraseWordRecord(PartOfSpeech.convert(rec[0]), parseTraits(traits)))
        res.append(curWords)
    return res

def getPhraseStruct(rules):
    rulesFinal = {}
    for rule in rules:
        splitRule = rule.split(':=')
        rulesFinal[splitRule[0]] = parseRightParts(splitRule[1])
    return rulesFinal

## Loading & parsing non-terminal symbols, denoting overall sentence strucure

In [8]:
def parseSentHeads(rightPart):
    res = []
    for part in rightPart:
        res.append(part)
    return res

def getSentStruct(rules):
    rulesFinal = {startingNT: []}
    for rule in rules:
        sentHead = rule[1:].split(':=')
        for part in sentHead[1].split('|'):
            if sentHead[0] in rulesFinal:
                rulesFinal[sentHead[0]].append(parseSentHeads(part.split()))
            elif sentHead[0] in rulesFinal[startingNT]:
                rulesFinal[startingNT][sentHead[0]].append(parseSentHeads(part.split()))
            else:
                rulesFinal[startingNT][sentHead[0]] = parseSentHeads(part.split())
    return rulesFinal

### Master parser function

In [9]:
def parseRules(rulesSet):
    sentRules = []
    phraseRules = []
    for rule in rulesSet:
        if len(rule) == 0:
            continue
        if rule[0] == '!':
            sentRules.append(rule)
        elif rule[0] != '#':
            phraseRules.append(rule)           
    return (getSentStruct(sentRules), getPhraseStruct(phraseRules))

# Binding the components together: sentence generation

In [10]:
nounTraits = [Animate, Gender, Quantity, Case, PhraseHead]
pronounTraits = [Animate, Gender, Quantity, Case, PhraseHead, Person]
adjectiveTraits = [Animate, Gender, Quantity, Case, PhraseHead]
verbTraits = [Gender, Tense, Perfectivity, Quantity, PhraseHead, Person, Transitivity]
posTraits = dict(zip(partsOfSpeech, [nounTraits, pronounTraits, adjectiveTraits, verbTraits]))

passingHeadNounTraits = [Animate, Gender, Quantity, Person]
passingHeadPronounTraits = [Gender, Quantity, Person]
passingHeadAdjectiveTraits = []
passingHeadVerbTraits = [Transitivity]
passingHeadPosTraits = dict(zip(partsOfSpeech, [passingHeadNounTraits, passingHeadPronounTraits,
                                                passingHeadAdjectiveTraits, passingHeadVerbTraits]))
passingNounTraits = [Animate, Gender, Quantity, Case]
passingPronounTraits = [Gender, Quantity, Person, Case]
passingAdjectiveTraits = [] # ???
passingVerbTraits = []
passingPosTraits = dict(zip(partsOfSpeech, [passingNounTraits, passingPronounTraits,
                                                passingAdjectiveTraits, passingVerbTraits]))

def updateWordTraits(word, agr, pos):
    for tr in posTraits[pos]:
        if word.traits[tr] == None:
            word.traits[tr] = agr[tr]

def updateLingeringAgreement(word, agr, pos):
    for tr in agr:
        if not tr in passingPosTraits[pos]:
            agr[tr] = None
        else:
            agr[tr] = word.traits[tr]
            
def updateHeadLingeringAgreement(word, agr, pos):
    for tr in agr:
        if not tr in passingHeadPosTraits[pos]:
            agr[tr] = None
        else:
            agr[tr] = word.traits[tr]

### Adjective

In [11]:
def adjAgreement(word, lingeringAgreement):
    updateWordTraits(word, lingeringAgreement, word.partofspeech)
    #print('>> adj: agr', lingeringAgreement)
    if word.traits[Quantity] == None:
        word.traits[Quantity] = random.choice(quantities)
    if word.traits[Animate] == None:
        word.traits[Animate] = random.choice(perfectivities)
    if word.traits[Gender] == None:
        if word.traits[Animate] == Animate.Animate:
            word.traits[Gender] = random.choice(genders[:-1])
    word.traits[Person] = Person.P3
    if word.traits[Case] == None:
        if lingeringAgreement[Transitivity] == Transitivity.Transitive:
            word.traits[Case] = Case.Accusative
        elif lingeringAgreement[Transitivity] == None:
            word.traits[Case] = Case.Nominative

## Verb

In [12]:
def verbAgreement(word, lingeringAgreement):
    updateWordTraits(word, lingeringAgreement, word.partofspeech)
    if word.traits[Transitivity] == None:
        word.traits[Transitivity] = random.choice(transitivities)
    if word.traits[Perfectivity] == None:
        word.traits[Perfectivity] = random.choice(perfectivities)
    if word.traits[Tense] == None:
        if word.traits[Perfectivity] == Perfectivity.Perfect:
            word.traits[Tense] = random.choice(perfTenses)
        else:
            word.traits[Tense] = random.choice(imperfTenses)
    if word.traits[Tense] == Tense.Past:
        if word.traits[Gender] == None:
            word.traits[Gender] = random.choice(genders)

## Noun

In [13]:
def nounAgreement(word, lingeringAgreement):
    updateWordTraits(word, lingeringAgreement, word.partofspeech)
    #print('NOUN')
    if word.traits[Quantity] == None:
        word.traits[Quantity] = random.choice(quantities)
    if word.traits[Animate] == None:
        word.traits[Animate] = random.choice(animacy)
    if word.traits[Gender] == None:
        if word.traits[Animate] == Animate.Animate:
            word.traits[Gender] = random.choice(genders[:-1])
        else:
            word.traits[Gender] = Gender.Neuter
    word.traits[Person] = Person.P3
    if word.traits[Case] == None:
        if lingeringAgreement[Transitivity] == Transitivity.Transitive:
            word.traits[Case] = Case.Accusative
        elif lingeringAgreement[Transitivity] == None:
            word.traits[Case] = Case.Nominative

## Pronoun

In [14]:
def pronounAgreement(word, lingeringAgreement):
    updateWordTraits(word, lingeringAgreement, word.partofspeech)
    if word.traits[Quantity] == None:
        word.traits[Quantity] = random.choice(quantities)
    if word.traits[Quantity] == Quantity.Singular:
        if word.traits[Gender] == None:
            word.traits[Gender] = random.choice(genders)
    if word.traits[Case] == None:
        if lingeringAgreement[Transitivity] == Transitivity.Transitive:
            word.traits[Case] = Case.Accusative
        elif lingeringAgreement[Transitivity] == None:
            word.traits[Case] = Case.Nominative
    if word.traits[Animate] == None:
        if word.traits[Person] == None:
            word.traits[Person] = random.choice(persons)
        if word.traits[Person] != Person.P3:
            word.traits[Animate] = Animate.Animate
        else:
            word.traits[Animate] = random.choice(animacy)
    else:
        if word.traits[Person] == None:
            word.traits[Person] = random.choice(persons)

## Dictionaries

In [15]:
agreement = [nounAgreement, pronounAgreement, adjAgreement, verbAgreement]
agreementSetter = dict(zip(partsOfSpeech, agreement))

In [50]:
def findWord(dct, traits, pos):        
    w = None
    if pos == PartOfSpeech.Pronoun:
        for entry in dct[pos]:
            if entry.traits[Person] == traits[Person]:
                if traits[Person] != Person.P3:
                    return entry.data[traits[Quantity]][traits[Case]]
                else:
                    if traits[Quantity] != Quantity.Singular:
                        return entry.data[traits[Quantity]][traits[Case]]
                    else:
                        return entry.data[traits[Quantity]][traits[Gender]][traits[Case]]
    elif pos == PartOfSpeech.Verb:
        pool = []
        for entry in dct[pos]:
            if entry.traits[Transitivity] == traits[Transitivity]:
                if entry.traits[Perfectivity] == traits[Perfectivity]:
                    pool.append(entry)
        finalChoice = random.choice(pool)
        if traits[Quantity] == Quantity.Plural:
            if traits[Tense] == Tense.Past:
                return finalChoice.data[traits[Quantity]][traits[Tense]]
            else:
                 return finalChoice.data[traits[Quantity]][traits[Tense]][traits[Person]]
        else:
            if traits[Tense] == Tense.Past:
                return finalChoice.data[traits[Quantity]][traits[Tense]][traits[Gender]]
            else:
                return finalChoice.data[traits[Quantity]][traits[Tense]][traits[Person]]
    elif pos == PartOfSpeech.Noun:
        pool = []
        for entry in dct[pos]:
            if entry.traits[Animate] == traits[Animate]:
                if entry.traits[Gender] == traits[Gender]:
                    pool.append(entry)
        finalChoice = random.choice(pool)
        return finalChoice.data[traits[Quantity]][traits[Case]]
    elif pos == PartOfSpeech.Adjective:
        pool = []
        for entry in dct[pos]:
            pool.append(entry)
        finalChoice = random.choice(pool)
        if traits[Quantity] == Quantity.Plural:
            if traits[Case] != Case.Accusative:
                return finalChoice.data[traits[Quantity]][traits[Case]]
            else:
                return finalChoice.data[traits[Quantity]][traits[Case]][traits[Animate]]
        else:
            if (traits[Case] == Case.Accusative) and (traits[Gender] == Gender.Male):
                return finalChoice.data[traits[Quantity]][traits[Gender]][traits[Case]][traits[Animate]]
            else:
                return finalChoice.data[traits[Quantity]][traits[Gender]][traits[Case]]
    

def getHeadInd(phraseRule):
    for i in range(0, len(phraseRule)):
        if phraseRule[i].traits[PhraseHead] == PhraseHead.Head:
            return i
    return 0


def sentGenerator(dct, sentPattern, rules):
    lingeringAgreementEmpty = True
    lingeringAgreement = dict()
    finalSentence = []
    for pat in sentPattern:
        curPhrase = random.choice(rules[pat]) # choosing a right-part (phrase) from available for the current non-terminal
        headInd = getHeadInd(curPhrase) # getting the head of the phrase
        if lingeringAgreementEmpty:
            for tr in wordTraits:
                lingeringAgreement.update({tr: curPhrase[headInd].traits[tr]})
            lingeringAgreementEmpty = False
        grammaticalPhrase = [None] * len(curPhrase)
        grammaticalPhrase[headInd] = agreementSetter[curPhrase[headInd].partofspeech](curPhrase[headInd], lingeringAgreement)
        updateLingeringAgreement(curPhrase[headInd], lingeringAgreement, curPhrase[headInd].partofspeech)
        
        for i in range(0, len(curPhrase)):
            if i != headInd:
                grammaticalPhrase[i] = agreementSetter[curPhrase[i].partofspeech](curPhrase[i], lingeringAgreement)
            finalSentence.append(findWord(dct, curPhrase[i].traits, curPhrase[i].partofspeech))
        updateHeadLingeringAgreement(curPhrase[headInd], lingeringAgreement, curPhrase[headInd].partofspeech)
    return finalSentence

## Master "main" function

In [51]:
def main(dctPath = './dictionary.txt', rulesPath = './rules.txt'):
    contents = loadFromFile(dctPath)
    dct = defaultdict(PartOfSpeech)
    for part in partsOfSpeech:
        dct[part] = []
    for entry in contents:
        partOfSpeech, rec = parseRecord(entry)
        dct[partOfSpeech].append(rec)

    contents = loadFromFile(rulesPath)
    sentStruct, rules = parseRules(contents)

    return sentGenerator(dct, random.choice(sentStruct[startingNT]), rules)

# Test area

In [71]:
main()

['мы', 'съедим', 'вас']