In [None]:
import pandas as pd
import pickle
from datetime import datetime

In [None]:
def declanate(word:str, fem:bool)->list:
    """Provides singular declanation for a given word"""
    
    if word.endswith(" "):
        word = word[:-1]
    
    res = {"nominative" : word}
    if word.endswith("us"):
        res["genitive"] = word
        res["dative"] = word[:-1]+"m"
        res["accusative"] = word[:-1]
        res["locative"] = word[:-2]+"ū"

    elif word.endswith("is"):
        res["genitive"] = word[:-2]+"ja"
        res["dative"] = word[:-1]+"m"
        res["accusative"] = word[:-1]
        res["locative"] = word[:-2]+"ī"  
    
    elif word.endswith("a"):
        res["genitive"] = word+"s"
        res["dative"] = word+"i"
        res["accusative"] = word[:-1]+"u"
        res["locative"] = word[:-1]+"ā"

    elif word.endswith("e"):
        res["genitive"] = word+"s"
        res["dative"] = word+"i"
        res["accusative"] = word[:-1]+"i"
        res["locative"] = word[:-1]+"ē"  
        
    elif word.endswith("s") or word.endswith("š"):
    
        if fem == False:            
            res["genitive"] = word[:-1]+"a"
            res["dative"] = word[:-1]+"am"
            res["accusative"] = word[:-1]+"u"
            res["locative"] = word[:-1]+"ā"      
            
        else:
            res["genitive"] = word
            res["dative"] = word[:-1]+"ij"
            res["accusative"] = word[:-1]+"i"
            res["locative"] = word[:-1]+"ī"      
    else:
        res["genitive"] = word
        res["dative"] = word
        res["accusative"] = word
        res["locative"] = word      

            
    return res

# Cabinet Members and presidents

In [None]:
df = pd.read_excel("./data/CabinetMembersLatvia.ods", engine = "odf")
print(df.shape)
df.head()

# Cabinet Member Forms

In [None]:
def resolve_time(input_str:str):
    parts = [int(i) for i in input_str.split("-")]
    dt = datetime(parts[2], parts[1], parts[0])
    return dt


data = []

cases = ["nominative", "genitive", "dative", "accusative", "locative"]

for index, row in df.iterrows():
    fem = False
    if row["gender"] == "f":
        fem = True
    if " " not in row["person_name"]:
        name_forms = declanate(row["person_name"], fem)
    else:
        parts = row["person_name"].split()
        name_forms = {"nominative" : "", "genitive" : "", "dative" : "", "accusative" : "", "locative" : ""}
        for p in parts:
            pforms = declanate(p, fem)
            for key, value in pforms.items():
                name_forms[key] += value + " "
        for key, value in name_forms.items():
            name_forms[key] = value[:-1]
            
    lastname_forms = declanate(row["person_lastname"], fem)
    
    for case in cases:
        d = {key:value for key, value in row.items()}
        d["from"] = resolve_time(d["from"])
        d["to"]=resolve_time(d["to"])
        
        name_form = name_forms[case]+" "+lastname_forms[case]
        d["names"] = name_form
        data.append(d)
        
        d = {key:value for key, value in row.items()}
        d["from"] = resolve_time(d["from"])
        d["to"]=resolve_time(d["to"])
        
        name_form = row["person_name"][0]+". "+lastname_forms[case]
        d["names"] = name_form
        data.append(d)
        
df2 = pd.DataFrame(data)
print(df2.shape)
df2.head()
    

In [None]:
df2.tail(20)

In [None]:
# Hand correcting VVF

df2.loc[df2.index == 2430, 'names'] = "Vaira Vīķe-Freiberga"
df2.loc[df2.index == 2431, 'names'] = "V. Vīķe-Freiberga"
df2.loc[df2.index == 2432, 'names'] = "Vairas Vīķes-Freibergas"
df2.loc[df2.index == 2433, 'names'] = "V. Vīķes-Freibergas"
df2.loc[df2.index == 2434, 'names'] = "Vairai Vīķei-Freibergai"
df2.loc[df2.index == 2435, 'names'] = "V. Vīķei-Freibergai"
df2.loc[df2.index == 2436, 'names'] = "Vairu Vīķi-Freibergu"
df2.loc[df2.index == 2437, 'names'] = "V. Vīķi-Freibergu"
df2.loc[df2.index == 2438, 'names'] = "Vairā Vīķē-Freibergā"
df2.loc[df2.index == 2439, 'names'] = "V. Vīķē-Freibergā"


df2.tail(10)

In [None]:
df3 = df2.groupby(["person_id", "cabinet_no"]).agg({"cabinet_no": "first", "person_id":"first", "ministry":"first", "from":"first", "to":"first", "names":lambda x: list(x)})
df3["person_name"] = df3.apply(lambda x : x["names"][0], axis = 1)
df3.head()

In [None]:
df3.tail(20)

In [None]:
df4 = pd.DataFrame()
cols = ["cabinet_no", "person_id", "ministry", "from", "to", "names", "person_name"]
for col in cols:
    df4[col] = list(df3[col])
df4 = df4.sort_values(["cabinet_no", "ministry"])
print(df4.shape)
df4.head()

In [None]:
dfl = df4.to_dict(orient= "records")

# Institutions

In [None]:
word = "minitrija"

word_forms = declanate(word, True)

institution_forms = []
bads = ["Prezidente", "Prezidents", "Premjers"]

for item in list(set(df.ministry)):
    if item not in  bads:
        forms = []
        for item2 in word_forms.values():
            plh = item + " " + item2
            forms.append(plh)
        d = {"cabinet_no" : 1, 
            "person_id" : item, 
            "ministry" : item, 
            "from" : datetime(1998,1,1), 
            "to" : datetime(2024, 1, 1), 
            "names" : forms}
            
            
            
        institution_forms.append(d)
        
len(institution_forms)

In [None]:
institution_forms[10]

In [None]:
dfl += institution_forms

In [None]:
with open("Cabinet_data_NER_LV.pkl", "wb") as file:
    pickle.dump(dfl, file)