<a href="https://colab.research.google.com/github/khimaja/Natural-Language-Processing-/blob/main/NLP_file1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from nltk.corpus import words

# Download NLTK resources
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('words')

# Sample text
# text = "NLTK is a powerful Python library for natural language processing tasks."
text = "Submiting the 4 lab expermens before the dedline."

# Tokenization
tokens = word_tokenize(text)
print("Tokenization:")
print(tokens)

# Filtration (remove punctuation)
filtered_tokens = [word for word in tokens if word.isalnum()]
print("\nFiltration:")
print(filtered_tokens)

# Script Validation
english_words = set(words.words())
valid_words = [word for word in filtered_tokens if word.lower() in english_words]
print("\nScript Validation:")
print(valid_words)

# Stop Word Removal
stop_words = set(stopwords.words('english'))
filtered_words = [word for word in valid_words if word.lower() not in stop_words]
print("\nStop Word Removal:")
print(filtered_words)

# Stemming
porter = PorterStemmer()
stemmed_words = [porter.stem(word) for word in filtered_words]
print("\nStemming:")
print(stemmed_words)


Tokenization:
['Submiting', 'the', '4', 'lab', 'expermens', 'before', 'the', 'dedline', '.']

Filtration:
['Submiting', 'the', '4', 'lab', 'expermens', 'before', 'the', 'dedline']

Script Validation:
['the', 'lab', 'before', 'the']

Stop Word Removal:
['lab']

Stemming:
['lab']


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package words to /root/nltk_data...
[nltk_data]   Package words is already up-to-date!


EXPERMENT 2


In [None]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import words
from nltk.stem import WordNetLemmatizer

# Download NLTK resources
nltk.download('punkt')
nltk.download('words')
nltk.download('wordnet')

# Sample text
text = "The rusty key turned with a groan, revealing a dusty attic. Sunlight streamed across Amelia, illuminating forgotten treasures. A chipped music box, a faded teddy bear, and a worn leather journal whispered tales of a life long past. A smile tugged at Amelia's lips - a treasure trove of memories awaited."

# Tokenization
tokens = word_tokenize(text)

# Morphological analysis (Lemmatization)
lemmatizer = WordNetLemmatizer()
morphological_analysis = []

for word in tokens:
    if word.lower() in words.words():
        lemma = lemmatizer.lemmatize(word)
        morphological_analysis.append((word, lemma))

# Displaying output
print("Morphological Analysis (Lemmatization):")
for word, lemma in morphological_analysis:
    print(f"{word}: {lemma}")


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package words to /root/nltk_data...
[nltk_data]   Package words is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


Morphological Analysis (Lemmatization):
The: The
rusty: rusty
key: key
turned: turned
with: with
a: a
groan: groan
revealing: revealing
a: a
dusty: dusty
attic: attic
Sunlight: Sunlight
across: across
Amelia: Amelia
illuminating: illuminating
forgotten: forgotten
A: A
chipped: chipped
music: music
box: box
a: a
faded: faded
bear: bear
and: and
a: a
worn: worn
leather: leather
journal: journal
whispered: whispered
tales: tale
of: of
a: a
life: life
long: long
past: past
A: A
smile: smile
at: at
Amelia: Amelia
a: a
treasure: treasure
trove: trove
of: of


EXPERMENT 3


In [None]:
import nltk
from nltk.corpus import movie_reviews
from nltk import bigrams, word_tokenize
from collections import Counter
import random

# Download NLTK resources
nltk.download('movie_reviews')
nltk.download('punkt')

# Load movie reviews corpus
documents = [(list(movie_reviews.words(fileid)), category)
             for category in movie_reviews.categories()
             for fileid in movie_reviews.fileids(category)]

# Shuffle the documents
random.shuffle(documents)

# Tokenization
all_words = [word.lower() for word in movie_reviews.words()]
tokens = [word.lower() for word in all_words if word.isalpha()]

# Create bigrams
ngrams = list(bigrams(tokens))

# Count frequencies of bigrams
bigram_counts = Counter(ngrams)

# Function to predict next word
def predict_next_word(previous_word):
    candidates = [bigram[1] for bigram in bigram_counts if bigram[0] == previous_word]
    if candidates:
        return max(set(candidates), key=candidates.count)
    else:
        return None

# Test prediction
previous_word = 'movie'
next_word = predict_next_word(previous_word)
print(f"Predicted next word after '{previous_word}': {next_word}")


[nltk_data] Downloading package movie_reviews to /root/nltk_data...
[nltk_data]   Package movie_reviews is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


Predicted next word after 'movie': made


EXPERMENT 4

In [None]:
import nltk
from nltk.tokenize import word_tokenize
from nltk import pos_tag

# Sample text
# text = "NLTK is a powerful Python library for natural language processing tasks."
text = "Imagine all the people living life in peace."

# Tokenization
tokens = word_tokenize(text)

# POS tagging
pos_tags = pos_tag(tokens)

# Displaying output
print("POS tagging:")
print(pos_tags)


POS tagging:
[('Imagine', 'NNP'), ('all', 'PDT'), ('the', 'DT'), ('people', 'NNS'), ('living', 'VBG'), ('life', 'NN'), ('in', 'IN'), ('peace', 'NN'), ('.', '.')]
