In [90]:
import csv


class Verb:
    base: str
    versions: dict[str, str]
    
    def __init__(self):
        self.base = ""
        self.versions = dict()
    
    def __str__(self):
        return (f"base: {self.base}\n"
                f"versions: {self.versions}")
    
    def __repr__(self):
        return self.__str__()
    
class Verbs:
    verbs: list[Verb]
    categorized: dict[str, Verb]
    
    @classmethod
    def from_file(cls):
        instance = cls()
        instance.verbs = list()
        instance.categorized = dict()
        names: list[str] = list()
        with open("verbs.csv", "r", encoding='utf-8') as f:
            r = csv.reader(f, delimiter=',')
            names.extend(next(r))
            for row in r:
                base: str = ""
                verb = Verb()
                instance.verbs.append(verb)
                for i, (name, VERB) in enumerate(zip(names, row)):
                    instance.categorized[VERB] = verb
                    if i == 0:
                        base = VERB.replace(" ", "0")  # 0 to indicate a space in the verb
                        verb.base = base
                        continue
                    grammar_name = base + "_" + name
                    verb.versions[grammar_name.upper()] = VERB
        return instance
                
    
class Noun:
    base: str
    versions: dict[str, str]
    
    def __init__(self):
        self.base = ""
        self.versions = dict()
    
    def __str__(self):
        return (f"base: {self.base}\n"
                f"versions: {self.versions}")
    
    def __repr__(self):
        return self.__str__()
    
class Nouns:
    nouns: list[Noun]
    categorized: dict[str, Verb]
    
    @classmethod
    def from_file(cls):
        instance = cls()
        instance.nouns = list()
        instance.categorized = dict()
        names: list[str] = list()
        with open("nouns.csv", "r", encoding='utf-8') as f:
            r = csv.reader(f, delimiter=',')
            names.extend(next(r))
            for row in r:
                base: str = ""
                for i, (name, NOUN) in enumerate(zip(names, row)):
                    if i % 14 == 0:
                        noun = Noun()
                        instance.nouns.append(noun)
                        base = NOUN
                        noun.base = base
                    grammar_name = base + "_" + name
                    noun.versions[grammar_name.upper()] = NOUN
                    instance.categorized[NOUN] = noun
        return instance
    
verbs = Verbs.from_file()
nouns = Nouns.from_file()
print(nouns.nouns[0])

base: zamek
versions: {'ZAMEK_SG_NOM_M': 'zamek', 'ZAMEK_SG_GEN_M': 'zamku', 'ZAMEK_SG_DAT_M': 'zamkowi', 'ZAMEK_SG_ACC_M': 'zamek', 'ZAMEK_SG_INS_M': 'zamkiem', 'ZAMEK_SG_LOC_M': 'zamku', 'ZAMEK_SG_VOC_M': 'zamku', 'ZAMEK_PL_NOM_M': 'zamki', 'ZAMEK_PL_GEN_M': 'zamków', 'ZAMEK_PL_DAT_M': 'zamkom', 'ZAMEK_PL_ACC_M': 'zamki', 'ZAMEK_PL_INS_M': 'zamkami', 'ZAMEK_PL_LOC_M': 'zamkach', 'ZAMEK_PL_VOC_M': 'zamki'}


In [165]:
class Parser:
    def parse(self, string: str) -> list[str]:
        words = string.split()
        if len(words) == 0:
            return list()
        categorized_words = [self.categorize(word) for word in words]
        # do it recursively, but I am lazy
        # failed to categorized
        if categorized_words[0] is None:
            output: list[str] = list()
            for noun in nouns.nouns:
                n = noun.versions.get(
                    f"{noun.base.upper()}_SG_NOM_M",
                    noun.versions.get(
                    f"{noun.base.upper()}_SG_NOM_F",
                    noun.versions.get(
                    f"{noun.base.upper()}_SG_NOM_N", None
                    ))
                )
                if n and n.startswith(words[0]):
                    output.append(n)
                    
                n = noun.versions.get(
                    f"{noun.base.upper()}_PL_NOM_M",
                    noun.versions.get(
                    f"{noun.base.upper()}_PL_NOM_F",
                    noun.versions.get(
                    f"{noun.base.upper()}_PL_NOM_N", None
                    ))
                )
                if n and n.startswith(words[0]):
                    output.append(n)
            return output
        elif isinstance(categorized_words[0], Verb):
            return ["First word should be a noun not a verb."]
        elif isinstance(categorized_words[0], Noun):
            noun_category: str = [k for k, v in categorized_words[0].versions.items() if v == words[0]][0]
            noun_number: str = noun_category.split("_")[1]
            noun_conjugation: str = noun_category.split("_")[2]
            noun_gender: str = noun_category.split("_")[3]
            if noun_conjugation != "NOM":
                return [f"Subject should be in nominative form. But is {noun_conjugation}"]
        else:
            raise ValueError("Something unexpected happened.")
        
        if len(categorized_words) == 1:
            return list()
        
        if categorized_words[1] is None:
            # TODO suggest not only when the keyword is matched, ma -> mają
            output: list[str] = list()
            for verb in verbs.verbs:
                for con in ["NOM", "GEN", "DAT", "ACC", "INS", "LOC", "VOC"]:
                    for gender in [noun_gender, "-"]:
                        for time in ["PRES", "PAST"]:
                            v = verb.versions.get(f"{verb.base.upper()}_{noun_number}_{con}_{gender}_3_{time}_IND", None)
                            if v and v.startswith(words[1]):
                                output.append(v)
                        v = verb.versions.get(f"{verb.base.upper()}_{noun_number}_{con}_{gender}_3_-_PRE", None)
                        if v and v.startswith(words[1]):
                            output.append(v)
            return output
        elif isinstance(categorized_words[1], Verb):
            # TODO add suggestions to fixes
            verb_category: str = next(iter([k for k, v in categorized_words[1].versions.items() if v == words[1]]), None)
            if verb_category is None:
                return [f"You have to conjugate the verb to be number: {noun_number}, gender: {noun_gender}, person: 3rd"]
            verb_number, verb_conjugation, verb_gender, verb_pronoun, verb_time, verb_type = verb_category.split("_")[1:]
            if verb_number != noun_number:
                return [f"Verb should match the noun number: {noun_number}. But is {verb_number}."]
            if verb_gender != noun_gender and verb_gender != "-":
                return [f"Verb gender should match the noun gender: {noun_gender}. But is {verb_gender}."]
            if verb_pronoun != "3":
                return [f"Verb should be in 3rd person. But is {verb_pronoun}."]
        elif isinstance(categorized_words[1], Noun):
            return ["Second word should be a verb not a noun."]
        else:
            raise ValueError("Something unexpected happened.")
        
        if len(categorized_words) == 2:
            return list()
        
        if categorized_words[2] is None:
            output: list[str] = list()
            for noun in nouns.nouns:
                n = noun.versions.get(
                    f"{noun.base.upper()}_SG_{verb_conjugation}_M",
                    noun.versions.get(
                    f"{noun.base.upper()}_SG_{verb_conjugation}_F",
                    noun.versions.get(
                    f"{noun.base.upper()}_SG_{verb_conjugation}_N", None
                    ))
                )
                if n and n.startswith(words[2]):
                    output.append(n)
                    
                n = noun.versions.get(
                    f"{noun.base.upper()}_PL_{verb_conjugation}_M",
                    noun.versions.get(
                    f"{noun.base.upper()}_PL_{verb_conjugation}_F",
                    noun.versions.get(
                    f"{noun.base.upper()}_PL_{verb_conjugation}_N", None
                    ))
                )
                if n and n.startswith(words[2]):
                    output.append(n)
            return output
        elif isinstance(categorized_words[2], Verb):
            return ["First word should be a noun not a verb."]
        elif isinstance(categorized_words[2], Noun):
            noun_category: str = [k for k, v in categorized_words[2].versions.items() if v == words[2]][0]
            noun_number: str = noun_category.split("_")[1]
            noun_conjugation: str = noun_category.split("_")[2]
            noun_gender: str = noun_category.split("_")[3]
            if noun_conjugation != verb_conjugation:
                return [f"Noun should match the verb conjugation: {verb_conjugation}. But is {noun_conjugation}"]
        else:
            raise ValueError("Something unexpected happened.")
        
    def categorize(self, word: str):
        categorized = nouns.categorized.get(word, None)
        if categorized:
            return categorized
        
        categorized = verbs.categorized.get(word, None)
        if categorized:
            return categorized
        
        return None

In [168]:
my_parser = Parser()

my_parser.parse("pies pies")

['Second word should be a verb not a noun.']