In [197]:
import requests
import json
import re
from bs4 import BeautifulSoup

In [311]:
url = "http://localhost:8765"
articles = ["der", "die", "das"]
pronouns = ["etwas/jemanden", "jemanden/etwas", "sich", "jemanden", "jemandem", "etwas"]
prepositions = ["an", "auf", "hinter", "neben", "in", "über", "unter", "vor", "zwischen", "bei"]

In [262]:
import json
import urllib.request

def request(action, **params):
    return {'action': action, 'params': params, 'version': 6}

def invoke(action, **params):
    requestJson = json.dumps(request(action, **params)).encode('utf-8')
    response = json.load(urllib.request.urlopen(urllib.request.Request('http://127.0.0.1:8765', requestJson)))
    if len(response) != 2:
        raise Exception('response has an unexpected number of fields')
    if 'error' not in response:
        raise Exception('response is missing required error field')
    if 'result' not in response:
        raise Exception('response is missing required result field')
    if response['error'] is not None:
        raise Exception(response['error'])
    return response['result']

In [314]:
def extract_main_word(phrase):
    word = ''
    phrase = re.sub(r'\[sound:.*?\]|,|\||\(.*?\)', '', phrase)
    pronouns_patter = re.compile(r'\b(?:' + '|'.join(re.escape(word) for word in pronouns) + r')\b')
    regex_pattern = '|'.join(re.escape(word) for word in pronouns)
    phrase = re.sub(r'\b(' + regex_pattern + r')\b', '', phrase)
    preposition_patter = re.compile(r'\b(?:' + '|'.join(prepositions) + r')\b')
    phrase = re.sub(preposition_patter, '', phrase)
    words = phrase.split()
    if len(words) == 1:
        word = words[0]
    elif words[0] in articles and len(words) > 1:
        word = words[1]
    return word

def add_note_frequency(note):
    if (note['fields']['Frequency']['value'] != ''):
        return
    phrase = note['fields']['Front']['value']
    word = extract_main_word(phrase)
    if word != '':
        freq = get_freuqency(word)
        print(phrase, word, freq)
        newnote = {'id': note["noteId"], 'fields': {"Frequency": str(freq)}}
        invoke('updateNoteFields', note=newnote)

def get_freuqency(word):
    freq = requests.get('https://www.dwds.de/api/frequency/?q=' + word).json()['frequency']
    return freq
    
def get_proved_ipa(word):
    res = requests.get('https://www.dwds.de/api/ipa/?q=' + word)
    if (res.status_code != 200):
        print("Error: ", word, res.status_code, res.text)
        return ' '
    
    res = res.json()[0]

    if res['status'] == 'proved':
        return res['ipa']
    else:
        return ' '

def add_note_ipa(note):
    if (note['fields']['IPA']['value'] != ''):
        return
    phrase = note['fields']['Front']['value']
    word = extract_main_word(phrase)
    if word != '':
        res = get_proved_ipa(word)
        print(word, res)
        newnote = {'id': note["noteId"], 'fields': {"IPA": res}}
        invoke('updateNoteFields', note=newnote)

In [319]:
deckname = "A2-18"
ids = invoke('findNotes', query="deck:"+deckname)
print(len(ids))
notes = invoke('notesInfo', notes=ids)

for note in notes:
    add_note_ipa(note)
    add_note_frequency(note)

27


In [320]:
de_class = "sc-giDImq cdzKxF"
en_class = "sc-iwCbjw kqDtNB"

def add_note(phrase_item, deckname):
    # item: (de_phrase, en_phrase, freq, ipa)
    de_phrase, en_phrase, freq, ipa = phrase_item
    invoke('addNote', note={'deckName': deckname, 'modelName': 'Smart', 'fields': {'Front': de_phrase, 'Back': en_phrase, 'Frequency': freq, 'IPA': ipa}})

def get_dw_words(dw_url):
    ### return items: [(de_phrase, en_phrase, freq, ipa)]
    response = requests.get(dw_url)
    html_content = response.text
    soup = BeautifulSoup(html_content, 'html.parser')
    de_phrases = [phrase.text for phrase in soup.find_all("a", class_=de_class)]
    en_phrases = [phrase.text for phrase in soup.find_all("span", class_=en_class)]

    if (len(de_phrases) != len(en_phrases)):
        print("Error: The number of German and English")
        return

    items = []
    for i in range(len(de_phrases)):
        de_phrase = de_phrases[i]
        en_phrase = en_phrases[i]
        en_phrase = re.sub(r'\n', '', en_phrase)
        de_word = extract_main_word(de_phrase)
        if (de_word == ''):
            items.append((de_phrase, en_phrase, '', ''))
        else:
            freq = get_freuqency(de_word)
            ipa = get_proved_ipa(de_word)
            items.append((de_phrase, en_phrase, str(freq), ipa))
    return items

In [322]:
dw_url = "https://learngerman.dw.com/en/feste-und-feiertage/l-38263408/lv"
deckname = "A2-11-15"

def add_dw_words(dw_url, deckname):
    items = get_dw_words(dw_url)
    for item in items:
        add_note_item(item, deckname)