In [3]:
import csv
import requests
from pathlib import Path
from random import shuffle
from bs4 import BeautifulSoup
from datetime import datetime
from audio_builder import AzureAudio

In [4]:
BASE_URL = "https://context.reverso.net/"
HEADERS = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:87.0) Gecko/20100101 Firefox/87.0'
}

In [28]:
class ReversoFavs2Anki():
    def __init__(self, username: str,
                    audio: bool=False, 
                    src_lang: str='de', 
                    trg_lang: str='es',
                    headers=None) -> None:
        self.username = username
        self.start = 0
        self.length = 50
        self.src_lang = src_lang
        self.trg_lang = trg_lang
        self.audio = audio
        self.headers = headers

    def proccess_favs(self):
        data = self.get_favs(self.username, self.start, self.length)
        content = data['results']
        total_results = data['numTotalResults'] 

        if total_results > self.length:
            new_start = self.length
            new_length = total_results - self.length
            data = self.get_favs(self.username, new_start, new_length)
            content.extend(data['results']) 
        
        words_list = self.create_word_list(content)

        if len(words_list) < 1:
            print('No hay palabras nuevas para añadir')
            return

        self.create_csv(words_list)
        self.update_last_exec_date()
        return
    
    def create_csv(self, words_list):
        csv_file = Path("../files/words_list.csv")
        if csv_file.exists():
            csv_file.unlink()
        
        csv_file.touch()

        print(words_list)
        fieldnames = dict(words_list[0].keys())

        with open(csv_file, 'w', encoding='utf-8', newline='') as file:
            writer = csv.writer(file)
            writer.writerow(list(fieldnames))
            writer = csv.DictWriter(file, fieldnames=fieldnames)
            for w in words_list:
                writer.writerow(w)
        
        print(
        f'Se ha completado el proceso correctamente, se han añadido {len(words_list)} palabras')


    def create_word_list(self, data):
        src_l = self.src_lang
        trg_l = self.trg_lang
        words_dict = {}
        words_list = []

        last_date_exec = self.get_last_exec_date()
        if last_date_exec:
            try:
                last_date_exec = datetime.strptime(last_date_exec, "%Y-%m-%dT%H:%M:%SZ")
            except Exception as e:
                print(e) # TODO: cambiar a logging

        # Remove html tags in the sentences of the API
        clean_sentence = lambda w:BeautifulSoup(w, features="html.parser").get_text()
        
        for word in data:
            word_date = datetime.strptime(word['creationDate'], "%Y-%m-%dT%H:%M:%SZ")
            if word_date < last_date_exec:
                return words_list

            words_dict = {
                f'{src_l}_word': word[f'srcText'],
                f'{trg_l}_word': word[f'trgText'],
                f'{src_l}_sentence': clean_sentence(word['srcContext']),
                f'{trg_l}_sentence': clean_sentence(word['trgContext']),
                f'{src_l}_audio': '',
                'tag': '',
            }

            tag = self.get_word_tag(words_dict[f'{src_l}_word'], words_dict[f'{trg_l}_word'])
            words_dict['tag'] = tag

            if tag == 'sustantivo' and src_l == 'de':
                words_dict[f'{src_l}_word'] = self.get_noun_article(words_dict[f'{src_l}_word'])

            if self.audio:
                words_dict[f'{src_l}_audio'] = self.get_sentence_audio(f'{src_l}_sentence')

            words_list.append(words_dict)

        return words_list


    def get_favs(self, username: str, start: int, length: int):
        favs_url = "bst-web-user/user/favourites/shared"
        params = {
            'userName': username,
            'start': start,
            'length': length,
            'order': 10
        }

        try:
            req = requests.get(BASE_URL + favs_url, params=params, headers=self.headers)
            req.raise_for_status()

            return req.json()
        except requests.exceptions.HTTPError as e:
            print(e) # TODO: change to logging

    def get_word_tag(self, src_word, trg_word):
        tags = {'adj.': 'adjetivo', 
                'nn.': 'sustantivo',
                'nm.': 'sustantivo',
                'nf.': 'sustantivo', 
                'adv.': 'adverbio', 
                'v.': 'verbo', 
                'conj./prep.': 'conjuncion/preposicion'}

        query_url = "bst-query-service"
        data = {
            'source_lang': self.src_lang,
            'source_text': src_word,
            'target_lang': self.trg_lang,
            'target_text': trg_word,
            'mode': 0,
            'npage': 1,
        }

        try:
            req = requests.post(BASE_URL + query_url, json=data, headers=self.headers)
            req.raise_for_status()
            json = req.json()
            
            if len(json['dictionary_entry_list']):
                tag = json['dictionary_entry_list'][0]['pos']

                if tag in tags.keys():
                    tag = tags[tag] or ''
                    return tag

            return ''
        except requests.exceptions.HTTPError as e:
            print(e) # TODO cambiar a logging

    def get_noun_article(self, de_noun):
        leo_url = f"https://dict.leo.org/alemán-español/{de_noun}"

        try:
            req = requests.get(leo_url, headers=self.headers)
            soup = BeautifulSoup(req.text, "html.parser")
            de_noun = soup.select("#section-subst td[lang='de'] samp")

            de_article = de_noun[0].text.split(' ')[0] or ''
            de_plural = de_noun[0].find('small').text or ''
            de_noun = f"{de_article} {de_noun} - {de_plural}"
            return de_noun

        except requests.exceptions.HTTPError as e:
            print(e) # TODO: cambiar a logging

    def get_sentence_audio(self, sentence):
        audio_builder = AzureAudio()
        audio_name = audio_builder.get_audio(sentence, self.src_lang)
        
        return audio_name


    def get_last_exec_date(self):
        file_path = Path("../files/exec_date.txt")
        
        if not file_path.exists():
            file_path.touch()
            return
        
        with open(file_path, 'r') as file:
            date = file.readline()
        
        return date

    def update_last_exec_date(self):
        pass
            
            

In [29]:
from pprint import pprint
if __name__ == '__main__':
    rev = ReversoFavs2Anki('lmirandam07', audio=False, headers=HEADERS)

In [30]:
rev.proccess_favs()

[{'de_word': 'davon abhängen', 'es_word': 'depender', 'de_sentence': '"Die Zukunft deines Sohnes könnte davon abhängen".', 'es_sentence': 'El futuro de tu hijo podría depender de ello.', 'de_audio': 'azure-c2639ea4-041f-433b-9fe6-96de72606196.mp3', 'tag': 'verbo'}, {'de_word': 'Ungefähr', 'es_word': 'alrededor', 'de_sentence': 'Ungefähr fünfzig Leute starben vor Ort.', 'es_sentence': 'Alrededor de 50 personas murieron en el lugar.', 'de_audio': 'azure-dcea2a6e-c539-471f-b5aa-93fe8cf371d1.mp3', 'tag': 'adverbio'}, {'de_word': 'beeindrucken', 'es_word': 'impresionar', 'de_sentence': 'Damit ich einen Typen beeindrucken kann.', 'es_sentence': 'Así puedo impresionar a un tipo.', 'de_audio': 'azure-3ecf60f6-0569-4e5a-9523-4ec60aff7eba.mp3', 'tag': 'verbo'}, {'de_word': 'Kriegen', 'es_word': 'conseguir', 'de_sentence': 'Du musst nur zehn Punkte kriegen.', 'es_sentence': 'Lo único que debes conseguir son 10 puntos.', 'de_audio': 'azure-699d26d9-9a4d-4b90-bfa4-229c2869ca1a.mp3', 'tag': 'verbo'}

ValueError: dictionary update sequence element #0 has length 7; 2 is required