## Exercício de Lematização em Processamento de Linguagem Natural

In [3]:
import string
from nltk.stem import WordNetLemmatizer


In [4]:
lemas_manuais = {
    'running': 'run',
    'better': 'good',
    'studies': 'study',
    'wolves': 'wolf',
    'mice': 'mouse',
    'children': 'child',
    'was': 'be',
    'were': 'be',
    'ate': 'eat',
    'swimming': 'swim',
    'parties': 'party',
    'leaves': 'leaf',
    'knives': 'knife',
    'happier': 'happy',
    'studying': 'study',
    'played': 'play',
    'goes': 'go',
    'driving': 'drive',
    'talked': 'talk',
    'playing': 'play',
    'howled': 'howl',
    'scurried': 'scurry',
    'used': 'use',
    'taking': 'take',
    'cars': 'car',
    'chefs': 'chef',
    'dishes': 'dish'
}


In [5]:
frases = [
    "The children were playing in the leaves yesterday.",
    "She studies computer science and is taking three courses.",
    "The wolves howled at the moon while mice scurried in the grass.",
    "He was driving faster than the cars around him.",
    "The chefs used sharp knives to prepare the tastiest dishes."
]


In [6]:
def limpar_frase(frase):
    frase = frase.lower()  # Coloca em minúsculas
    frase = ''.join(caractere for caractere in frase if not caractere.isdigit())  # Remove dígitos
    for pontuacao in string.punctuation:
        frase = frase.replace(pontuacao, '')  # Remove pontuação
    frase = frase.strip()
    return frase


In [7]:
def lematizar_frase(frase, dicionario_lemas):
    frase_limpa = limpar_frase(frase)
    palavras = frase_limpa.split()
    palavras_lematizadas = [dicionario_lemas.get(palavra, palavra) for palavra in palavras]  # Substitui se estiver no dicionário
    return ' '.join(palavras_lematizadas)


In [8]:
frases_lematizadas = [lematizar_frase(frase, lemas_manuais) for frase in frases]

In [11]:
for frase_original, frase_lematizada in zip(frases, frases_lematizadas):
    print({frase_lematizada})



{'the child be play in the leaf yesterday'}
{'she study computer science and is take three courses'}
{'the wolf howl at the moon while mouse scurry in the grass'}
{'he be drive faster than the car around him'}
{'the chef use sharp knife to prepare the tastiest dish'}


In [12]:
lemmatizador = WordNetLemmatizer()


In [13]:
def lematizar_com_nltk(frase):
    frase_limpa = limpar_frase(frase)
    palavras = frase_limpa.split()
    # Para simplificar, consideraremos todas como verbo (pos='v')
    palavras_lematizadas = [lemmatizador.lemmatize(palavra, pos='v') for palavra in palavras]
    return ' '.join(palavras_lematizadas)


In [19]:
frases_lematizadas_nltk = [lematizar_com_nltk(frase) for frase in frases]
frases_lematizadas_manual = [lematizar_frase(frase, lemas_manuais) for frase in frases]


In [23]:
print("== Comparação das frases ==")

for frase_original, frase_manual, frase_nltk in zip(frases, frases_lematizadas_manual, frases_lematizadas_nltk):
    print(f"Frase Original: {frase_original}")
    print(f"Lematização Manual: {frase_manual}")
    print(f"Lematização NLTK: {frase_nltk}")
    print('*' * 80)


== Comparação das frases ==
Frase Original: The children were playing in the leaves yesterday.
Lematização Manual: the child be play in the leaf yesterday
Lematização NLTK: the children be play in the leave yesterday
********************************************************************************
Frase Original: She studies computer science and is taking three courses.
Lematização Manual: she study computer science and is take three courses
Lematização NLTK: she study computer science and be take three course
********************************************************************************
Frase Original: The wolves howled at the moon while mice scurried in the grass.
Lematização Manual: the wolf howl at the moon while mouse scurry in the grass
Lematização NLTK: the wolves howl at the moon while mice scurry in the grass
********************************************************************************
Frase Original: He was driving faster than the cars around him.
Lematização Manual: he b