In [1]:
import requests
import pandas as pd

In [None]:
headers = {'x-api-key': ""}

In [3]:
def parse_as_list(s):
    # remove braces, split on comma, strip whitespace
    return [x.strip() for x in s.strip("{}").split(",")]

In [None]:
# read lemma lists
nno_lemmata = pd.read_csv("../../data/lemma-multi/nno.csv", converters={
    "lemma_id": lambda s: [int(x) for x in s.strip("{}").split(",")],
    "lemma": parse_as_list
})
nob_lemmata = pd.read_csv("../../data/lemma-multi/nob.csv", converters={
    "lemma_id": lambda s: [int(x) for x in s.strip("{}").split(",")],
    "lemma": parse_as_list
})

In [5]:
def query_ordbank_api(api_url):
    r = requests.get(api_url, headers=headers)
    if r.status_code == 200:
        return r.json()
    else:
        return None

In [6]:
def get_linked_lemmata(target_id, lang="nob"):
    if lang == "nob":
        df = nob_lemmata
    elif lang == "nno":
        df = nno_lemmata
        
    # Find the row containing the target id, return None if it doesn't exist
    try:
        row = df[df["lemma_id"].apply(lambda ids: target_id in ids)].iloc[0]
    except:
        return None

    # Align lemma_id and lemma as pairs
    pairs = list(zip(row["lemma_id"], row["lemma"]))

    # Extract all pairs
    all_lemma_pairs = [(i, l) for i, l in pairs]
    
    return all_lemma_pairs

In [7]:
def get_inflection_forms(lemma_id):
    wordform_tag_set = set()
    lemma_URL = f"https://clarino.uib.no/ordbank-api-prod/lemmas?query={lemma_id}&stubs=false&include_dict_links=true&extended_vocabulary=true&language={lang}&search_inflection=false"
    lemma_response = requests.get(lemma_URL, headers=headers)
    for paradigm in lemma_response.json()[0]["paradigm_info"]:
        for entry in paradigm["inflection"]:
            word = entry["word_form"]
            tags = "_".join(entry["tags"])
            yield (word, tags)

In [8]:
def search(lang="nob", word_form=None, word_class=None):
    search_URL = f"https://clarino.uib.no/ordbank-api-prod/lemmas?query={word_form}&stubs=false&include_dict_links=true&extended_vocabulary=true&language={lang}&search_inflection=true&word_class={word_class}"
    search_response = query_ordbank_api(search_URL)

    wordform_tag_set = set()

    if search_response:
        for entry in search_response:
            if entry["word_class"] == word_class:
                target_lemma_id = entry["id"]
                # we try to find linked lemmata
                linked_lemmata = get_linked_lemmata(target_lemma_id, lang=lang)
                if linked_lemmata == None:
                    linked_lemmata = [(target_lemma_id,)]
                for alternative in linked_lemmata:
                    alternative_lemma_id = alternative[0]
                    for x in get_inflection_forms(alternative_lemma_id):
                        wordform_tag_set.add(x)
    
    return wordform_tag_set

# First example: bru in bokmål

In [15]:
# choose a language and a word_form (may be any inflectional form) to start with
lang = "nob"
word_form = "sykehus"
word_class="NOUN"

In [16]:
search(lang=lang, word_form=word_form, word_class=word_class)

{('sjukehus', 'Plur_Ind'),
 ('sjukehus', 'Sing_Ind'),
 ('sjukehusa', 'Plur_Def'),
 ('sjukehusene', 'Plur_Def'),
 ('sjukehuset', 'Sing_Def'),
 ('sykehus', 'Plur_Ind'),
 ('sykehus', 'Sing_Ind'),
 ('sykehusa', 'Plur_Def'),
 ('sykehusene', 'Plur_Def'),
 ('sykehuset', 'Sing_Def')}

# Second example: kasta

In [11]:
# choose a language and a word_form (may be any inflectional form) to start with
lang = "nob"
word_form = "kaste"
word_class="VERB"

In [12]:
search(lang=lang, word_form=word_form, word_class=word_class)

{('kast', 'Imp'),
 ('kasta', '<PerfPart>'),
 ('kasta', 'Adj_<PerfPart>_Def_Sing'),
 ('kasta', 'Adj_<PerfPart>_Masc/Fem_Ind_Sing'),
 ('kasta', 'Adj_<PerfPart>_Neuter_Ind_Sing'),
 ('kasta', 'Adj_<PerfPart>_Plur'),
 ('kasta', 'Past'),
 ('kaste', 'Inf'),
 ('kastede', 'Adj_<PerfPart>_Def_Sing'),
 ('kastede', 'Adj_<PerfPart>_Plur'),
 ('kastende', 'Adj_<PresPart>'),
 ('kaster', 'Pres'),
 ('kastes', 'Inf_Pass'),
 ('kastes', 'Pres_Pass'),
 ('kastet', '<PerfPart>'),
 ('kastet', 'Adj_<PerfPart>_Masc/Fem_Ind_Sing'),
 ('kastet', 'Adj_<PerfPart>_Neuter_Ind_Sing'),
 ('kastet', 'Past'),
 ('kastete', 'Adj_<PerfPart>_Def_Sing'),
 ('kastete', 'Adj_<PerfPart>_Plur'),
 (None, '')}

# Third example: vi/me

In [13]:
# choose a language and a word_form (may be any inflectional form) to start with
lang = "nno"
word_form = "me"
word_class="PRON"

In [14]:
search(lang=lang, word_form=word_form, word_class=word_class)

{('me', 'Nom'), ('oss', 'Acc'), ('vi', 'Nom')}