In [19]:
import os
import requests
from bs4 import BeautifulSoup
from unidecode import unidecode

### Sinônimos

In [46]:
def find_synonyms(word):
    """Função para buscar os sinônimos encontrados para uma palavra no domínio 'sinonimos.com.br'. """
    url = f'https://www.sinonimos.com.br/{unidecode(word)}/'
    
    req = requests.get(url)
    soup = BeautifulSoup(req.content, "html.parser")
    
    # Get all <a> tags with class 'sinonimo'
    synonyms = [s.text for s in soup.find_all('a', {'class': 'sinonimo'})]    
    synonyms.sort()
    
    synonyms = list(set(synonyms))
    
    if len(synonyms) == 0:
        print ('Nenhum resultado encontrado.')
        print (f"Verificar ortografia da palavra '{word}' e fazer a busca novamente.")
        return [word]
    
    return synonyms

In [49]:
directory = 'dicts'

# Loop over files in the directory
for filename in os.scandir(directory):
    # Store synonims for each file
    words = list()

    if not filename.path.endswith('_adjectives.txt'):
        continue

    # Read Initial Set of Words
    with open(filename.path) as f:
        lines = f.readlines()
        for line in lines:
            words.append(line.strip())

    # Get Synonyms and Store in the End of File
    for word in words:
        # Find synonyms list
        synonyms = find_synonyms(word)

        # Open file
        with open(filename.path, 'a') as f_in:
            for synonym in synonyms:
                f_in.write(synonym + '\n')
    
    # Remove duplicates
    lines_seen = set() # holds lines already seen
    outfile = open(directory + f'/final/{filename.name[:3]}_adj.txt', 'w')
    for line in open(filename.path, 'r'):
        if line not in lines_seen: # not a duplicate
            outfile.write(line)
            lines_seen.add(line)
    outfile.close()

dicts\neg_adjectives.txt
dicts\pos_adjectives.txt


### Conjugações Verbais

In [2]:
def find_verb_conjugations(verb):
    """Função para obter a conjugação de verbos em português. """
    url = f'https://www.conjugacao.com.br/verbo-{verb}/'
    
    req = requests.get(url)
    soup = BeautifulSoup(req.content, "html.parser")
    
    # Get all <a> tags with class 'sinonimo'
    verbs = [s.text for s in soup.find_all('span', {'class': 'f'})]    
    verbs = list(set(verbs))
    verbs.sort()
    
    if len(verbs) == 0:
        print ('Nenhum resultado encontrado.')
        print (f"Verificar ortografia da palavra '{verb}' e fazer a busca novamente.")
        return [verb]
    
    return verbs