In [1]:
import pandas as pd
import pickle
from datetime import datetime

In [2]:
def declanate(word:str, fem:bool)->list:
    """Provides singular declanation for a given word"""
    
    if word.endswith(" "):
        word = word[:-1]
    
    res = {"nominative" : word}
    if word.endswith("us"):
        res["genitive"] = word
        res["dative"] = word[:-1]+"m"
        res["accusative"] = word[:-1]
        res["locative"] = word[:-2]+"ū"

    elif word.endswith("is"):
        res["genitive"] = word[:-2]+"ja"
        res["dative"] = word[:-1]+"m"
        res["accusative"] = word[:-1]
        res["locative"] = word[:-2]+"ī"  
    
    elif word.endswith("a"):
        res["genitive"] = word+"s"
        res["dative"] = word+"i"
        res["accusative"] = word[:-1]+"u"
        res["locative"] = word[:-1]+"ā"

    elif word.endswith("e"):
        res["genitive"] = word+"s"
        res["dative"] = word+"i"
        res["accusative"] = word[:-1]+"i"
        res["locative"] = word[:-1]+"ē"  
        
    elif word.endswith("s") or word.endswith("š"):
    
        if fem == False:            
            res["genitive"] = word[:-1]+"a"
            res["dative"] = word[:-1]+"am"
            res["accusative"] = word[:-1]+"u"
            res["locative"] = word[:-1]+"ā"      
            
        else:
            res["genitive"] = word
            res["dative"] = word[:-1]+"ij"
            res["accusative"] = word[:-1]+"i"
            res["locative"] = word[:-1]+"ī"      
    else:
        res["genitive"] = word
        res["dative"] = word
        res["accusative"] = word
        res["locative"] = word      

            
    return res

# Cabinet Members and presidents

In [3]:
df = pd.read_excel("./data/CabinetMembersLatvia.ods", engine = "odf")
print(df.shape)
df.head()

(244, 8)


Unnamed: 0,person_id,person_name,person_lastname,gender,ministry,from,to,cabinet_no
0,a_skele,Andris,Šķēle,m,Premjers,16-07-1999,05-05-2000,27
1,g_kristovskis,Ģirts Valdis,Kristovskis,m,Aizsardzības,16-07-1999,05-05-2000,27
2,i_berzins,Indulis,Bērziņš,m,Ārlietu,16-07-1999,05-05-2000,27
3,v_makarovs,Vladimirs,Makarovs,m,Ekonomikas,16-07-1999,06-04-2000,27
4,e_krastins,Edmunds,Krastiņš,m,Finanšu,16-07-1999,05-05-2000,27


# Cabinet Member Forms

In [22]:
def resolve_time(input_str:str):
    parts = [int(i) for i in input_str.split("-")]
    dt = datetime(parts[2], parts[1], parts[0])
    return dt


data = []

cases = ["nominative", "genitive", "dative", "accusative", "locative"]

for index, row in df.iterrows():
    fem = False
    if row["gender"] == "f":
        fem = True
    if " " not in row["person_name"]:
        name_forms = declanate(row["person_name"], fem)
    else:
        parts = row["person_name"].split()
        name_forms = {"nominative" : "", "genitive" : "", "dative" : "", "accusative" : "", "locative" : ""}
        for p in parts:
            pforms = declanate(p, fem)
            for key, value in pforms.items():
                name_forms[key] += value + " "
        for key, value in name_forms.items():
            name_forms[key] = value[:-1]
            
    lastname_forms = declanate(row["person_lastname"], fem)
    
    for case in cases:
        d = {key:value for key, value in row.items()}
        d["from"] = resolve_time(d["from"])
        d["to"]=resolve_time(d["to"])
        
        name_form = name_forms[case]+" "+lastname_forms[case]
        d["names"] = name_form
        data.append(d)
        
        d = {key:value for key, value in row.items()}
        d["from"] = resolve_time(d["from"])
        d["to"]=resolve_time(d["to"])
        
        name_form = row["person_name"][0]+". "+lastname_forms[case]
        d["names"] = name_form
        data.append(d)
        
df2 = pd.DataFrame(data)
print(df2.shape)
df2.head()
    

(2440, 9)


Unnamed: 0,person_id,person_name,person_lastname,gender,ministry,from,to,cabinet_no,names
0,a_skele,Andris,Šķēle,m,Premjers,1999-07-16,2000-05-05,27,Andris Šķēle
1,a_skele,Andris,Šķēle,m,Premjers,1999-07-16,2000-05-05,27,A. Šķēle
2,a_skele,Andris,Šķēle,m,Premjers,1999-07-16,2000-05-05,27,Andrja Šķēles
3,a_skele,Andris,Šķēle,m,Premjers,1999-07-16,2000-05-05,27,A. Šķēles
4,a_skele,Andris,Šķēle,m,Premjers,1999-07-16,2000-05-05,27,Andrim Šķēlei


In [23]:
df2.tail()

Unnamed: 0,person_id,person_name,person_lastname,gender,ministry,from,to,cabinet_no,names
2435,v_freiberga,Vaira Viķe,Freiberga,f,Prezidente,1999-07-08,2007-07-08,6,V. Freibergai
2436,v_freiberga,Vaira Viķe,Freiberga,f,Prezidente,1999-07-08,2007-07-08,6,Vairu Viķi Freibergu
2437,v_freiberga,Vaira Viķe,Freiberga,f,Prezidente,1999-07-08,2007-07-08,6,V. Freibergu
2438,v_freiberga,Vaira Viķe,Freiberga,f,Prezidente,1999-07-08,2007-07-08,6,Vairā Viķē Freibergā
2439,v_freiberga,Vaira Viķe,Freiberga,f,Prezidente,1999-07-08,2007-07-08,6,V. Freibergā


In [24]:
df3 = df2.groupby(["person_id", "cabinet_no"]).agg({"cabinet_no": "first", "person_id":"first", "ministry":"first", "from":"first", "to":"first", "names":lambda x: list(x)})
df3["person_name"] = df3.apply(lambda x : x["names"][0], axis = 1)
df3.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,cabinet_no,person_id,ministry,from,to,names,person_name
person_id,cabinet_no,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
a_aksenoks,29,29,a_aksenoks,Tieslietu,2002-11-07,2004-03-09,"[Aivars Aksenoks, A. Aksenoks, Aivara Aksenoka...",Aivars Aksenoks
a_aseradens,39,39,a_aseradens,Ekonomikas,2016-02-11,2019-01-23,"[Arvils Ašeradens, A. Ašeradens, Arvila Ašerad...",Arvils Ašeradens
a_auders,29,29,a_auders,Veselības,2003-01-16,2003-03-20,"[Āris Auders, Ā. Auders, Ārja Audera, Ā. Auder...",Āris Auders
a_bastiks,30,30,a_bastiks,Bērnu un ģimenes lietu,2004-05-27,2004-12-02,"[Ainars Baštiks, A. Baštiks, Ainara Baštika, A...",Ainars Baštiks
a_bastiks,31,31,a_bastiks,Bērnu un ģimenes lietu,2004-12-02,2006-11-07,"[Ainars Baštiks, A. Baštiks, Ainara Baštika, A...",Ainars Baštiks


In [25]:
df4 = pd.DataFrame()
cols = ["cabinet_no", "person_id", "ministry", "from", "to", "names", "person_name"]
for col in cols:
    df4[col] = list(df3[col])
df4 = df4.sort_values(["cabinet_no", "ministry"])
print(df4.shape)
df4.head()

(238, 7)


Unnamed: 0,cabinet_no,person_id,ministry,from,to,names,person_name
228,6,v_freiberga,Prezidente,1999-07-08,2007-07-08,"[Vaira Viķe Freiberga, V. Freiberga, Vairas Vi...",Vaira Viķe Freiberga
234,7,v_zatlers,Prezidents,2007-07-08,2011-07-08,"[Valdis Zatlers, V. Zatlers, Valdja Zatlera, V...",Valdis Zatlers
6,8,a_berzins,Prezidents,2011-07-08,2015-07-08,"[Andris Bērziņš, A. Bērziņš, Andrja Bērziņa, A...",Andris Bērziņš
199,9,r_vejonis,Prezidents,2015-07-08,2019-07-08,"[Raimonds Vējonis, R. Vējonis, Raimonda Vējonj...",Raimonds Vējonis
72,10,e_levits,Prezidents,2019-07-08,2023-07-08,"[Eglis Levits, E. Levits, Eglja Levita, E. Lev...",Eglis Levits


In [26]:
dfl = df4.to_dict(orient= "records")

# Institutions

In [27]:
word = "minitrija"

word_forms = declanate(word, True)

institution_forms = []
bads = ["Prezidente", "Prezidents", "Premjers"]

for item in list(set(df.ministry)):
    if item not in  bads:
        forms = []
        for item2 in word_forms.values():
            plh = item + " " + item2
            forms.append(plh)
        d = {"cabinet_no" : 1, 
            "person_id" : item, 
            "ministry" : item, 
            "from" : datetime(1998,1,1), 
            "to" : datetime(2024, 1, 1), 
            "names" : forms}
            
            
            
        institution_forms.append(d)
        
len(institution_forms)

17

In [28]:
institution_forms[10]

{'cabinet_no': 1,
 'person_id': 'Ekonomikas',
 'ministry': 'Ekonomikas',
 'from': datetime.datetime(1998, 1, 1, 0, 0),
 'to': datetime.datetime(2024, 1, 1, 0, 0),
 'names': ['Ekonomikas minitrija',
  'Ekonomikas minitrijas',
  'Ekonomikas minitrijai',
  'Ekonomikas minitriju',
  'Ekonomikas minitrijā']}

In [29]:
dfl += institution_forms

In [30]:
with open("Cabinet_data_NER_LV.pkl", "wb") as file:
    pickle.dump(dfl, file)

In [None]:
list(set(df.ministry))

In [None]:
def declanate(word:str, fem:bool)->list:
    """Provides singular declanation for a given word"""
    
    res = {"nominative" : word}

    
    if fem == False:
        if word.endswith("us"):
            res["genitive"] = word
            res["dative"] = word[:-1]+"m"
            res["accusative"] = word[:-1]
            res["locative"] = word[:-2]+"ū"
            
        elif word.endswith("is"):
            res["genitive"] = word[:-2]+"ja"
            res["dative"] = word[:-1]+"m"
            res["accusative"] = word[:-1]
            res["locative"] = word[:-2]+"ī"  
            
        elif word.endswith("s") or word.endswith("š"):
            res["genitive"] = word[:-1]+"a"
            res["dative"] = word[:-1]+"am"
            res["accusative"] = word[:-1]+"u"
            res["locative"] = word[:-1]+"ā"      
            
    else:
        if word.endswith("a"):
            res["genitive"] = word+"s"
            res["dative"] = word+"i"
            res["accusative"] = word[:-1]+"u"
            res["locative"] = word[:-1]+"ā"
            
        elif word.endswith("e"):
            res["genitive"] = word+"s"
            res["dative"] = word+"i"
            res["accusative"] = word[:-1]+"i"
            res["locative"] = word[:-1]+"ē"  
            
        elif word.endswith("s") or word.endswith("s"):
            res["genitive"] = word
            res["dative"] = word[:-1]+"ij"
            res["accusative"] = word[:-1]+"i"
            res["locative"] = word[:-1]+"ī"      
            
            
    return res