# Ejercisio Practico de utilizacion de la libreria NLTK

In [2]:
# INSTALACION DE LA LIBRERIA
!pip install nltk



In [8]:
#Importacion de las funciones de la libreria para su uso
import nltk
import warnings
warnings.filterwarnings("ignore")

**EJEMPLO DE TOKENIZACION:**

División de una oración en palabras
individuales. Usando NLTK, se puede tokenizar
una oración en palabras individuales, lo que
facilita el análisis posterior del texto.

In [9]:
from nltk.tokenize import word_tokenize
nltk.download('punkt')
sentence = "NLTK es una biblioteca de procesamiento de lenguaje natural."
tokens = word_tokenize(sentence)
print(tokens)

['NLTK', 'es', 'una', 'biblioteca', 'de', 'procesamiento', 'de', 'lenguaje', 'natural', '.']


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


**EJEMPLO DE DERIVACIÓN**

Reducción de palabras a su forma base. Con
NLTK, se puede realizar la derivación de
palabras para reducirlas a su forma base.

In [10]:
from nltk.stem import PorterStemmer

words = ["running", "plays", "jumped"]
stemmer = PorterStemmer()
stems = [stemmer.stem(word) for word in words]
print(stems)

['run', 'play', 'jump']


**EJEMPLO DE ETIQUETADO**

Etiquetado gramatical de palabras en una
oración. Utilizando NLTK, se puede etiquetar
cada palabra en una oración con su función
gramatical.

In [11]:
nltk.download('averaged_perceptron_tagger')
from nltk import pos_tag
from nltk.tokenize import word_tokenize

sentence = "NLTK es una biblioteca de procesamiento de lenguaje natural."
tokens = word_tokenize(sentence)
tagged_words = pos_tag(tokens)
print(tagged_words)

[('NLTK', 'NNP'), ('es', 'CC'), ('una', 'JJ'), ('biblioteca', 'NN'), ('de', 'IN'), ('procesamiento', 'FW'), ('de', 'FW'), ('lenguaje', 'FW'), ('natural', 'JJ'), ('.', '.')]


[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


# Ejemplo básico de clasicación de texto utilizando el clasicador Naive Bayes de NLTK

In [12]:
import nltk
import random

In [13]:
#Definición del conjunto de datos etiquetados:

data = [
("I love this movie", "positive"),
("This movie is terrible", "negative"),
("This movie is great", "positive"),
("I dislike this movie", "negative"),
("This film is amazing", "positive"),
("The acting in this movie is phenomenal", "positive"),
("I enjoyed watching my time on this film", "positive"),
("Thoroughly enjoyed this movie", "positive"),
("The plot of this movie was captivating", "positive"),
("I found this movie to be very dull", "negative"),
("This film was good but needed more substance", "positive"),
("I found this movie fun but with issues", "positive"),
("The storyline was predictable and unoriginal", "negative"),
("I was disappointed by the lack of character development", "negative"),
("The dialogue felt forced and unnatural", "negative"),
("I was pleasantly surprised by how much I enjoyed this film", "positive"),
("The acting left me feeling unsatisfied and confused", "negative"),
("This movie exceeded my expectations", "positive"),
("The performances by the actors were lackluster", "negative")
]

In [14]:
# Preprocesamiento de datos: tokenización y extracción de características
def preprocess(text):
    tokens = nltk.word_tokenize(text)
    return {word: True for word in tokens}

In [15]:
#Aplicacion de la preprocesamiento de los datos

featuresets = [(preprocess(text), label) for (text, label) in data]

In [16]:
# Dividimos los datos en conjuntos de entrenamiento y prueba
train_set, test_set = featuresets[:16], featuresets[16:]

In [17]:
# Entrenamos un clasificador utilizando Naive Bayes
classifier = nltk.NaiveBayesClassifier.train(train_set)

In [18]:
# Evaluamos el clasificador en el conjunto de prueba
accuracy = nltk.classify.accuracy(classifier, test_set)
print("Accuracy:", accuracy)

Accuracy: 1.0


In [19]:
# Clasificamos un nuevo texto
new_text = "This movie is amazing"
new_text_features = preprocess(new_text)
predicted_label = classifier.classify(new_text_features)
print("Predicted label:", predicted_label)

Predicted label: positive
