<a href="https://colab.research.google.com/github/prakashksarangi/Python-Project-Code/blob/main/nlp_project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# prompt: Automatic Text Summarization Methods using machine Learning

import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.probability import FreqDist
from nltk.tokenize import RegexpTokenizer
import heapq
from nltk.tokenize import sent_tokenize, word_tokenize
import string
nltk.download('punkt')

def preprocess_text(text):
    # Tokenize sentences
    sentences = sent_tokenize(text)

    # Tokenize words, remove punctuation, and convert to lowercase
    tokenizer = RegexpTokenizer(r'\w+')
    words = [word.lower() for sentence in sentences for word in tokenizer.tokenize(sentence)]

    # Remove stopwords
    stop_words = set(stopwords.words('english'))
    words = [word for word in words if word not in stop_words]

    return words, sentences

def calculate_sentence_scores(words, sentences):
    word_freq = FreqDist(words)
    sentence_scores = {}

    for i, sentence in enumerate(sentences):
        for word in word_tokenize(sentence.lower()):
            if word in word_freq:
                if sentence not in sentence_scores:
                    sentence_scores[sentence] = word_freq[word]
                else:
                    sentence_scores[sentence] += word_freq[word]

    return sentence_scores

def generate_summary(text, num_sentences=5):
    words, sentences = preprocess_text(text)
    sentence_scores = calculate_sentence_scores(words, sentences)

    # Select top 'num_sentences' sentences with highest scores
    summary_sentences = heapq.nlargest(num_sentences, sentence_scores, key=sentence_scores.get)
    summary = ' '.join(summary_sentences)
    return summary

# Example text
text = '''
The Amazon Rainforest, often referred to as the "Lungs of the Earth," is the largest rainforest on the planet, covering approximately 5.5 million square kilometers. It is home to an incredible diversity of flora and fauna, with millions of species of plants, animals, and insects. The Amazon plays a crucial role in maintaining the global climate by absorbing carbon dioxide and producing oxygen. However, it faces severe threats from deforestation, illegal logging, mining, and agricultural expansion.
The rainforest's rich biodiversity is under constant threat due to human activities. Deforestation, primarily for cattle ranching and agriculture, has led to the destruction of vast areas of the Amazon. This loss of habitat endangers numerous species, some of which may become extinct before they are even discovered by science. Additionally, the indigenous communities living in the Amazon rely on the forest for their traditional way of life, and the destruction of their lands has severe consequences for their cultures and livelihoods.
Efforts to protect the Amazon Rainforest involve conservation initiatives, legal regulations, and international agreements aimed at preserving this invaluable ecosystem. Organizations and governments worldwide are working to combat deforestation, enforce stricter environmental laws, and support sustainable practices. It's crucial to raise awareness and take action to safeguard the Amazon Rainforest for future generations and the health of the planet.
'''

# Generate summary
summary = generate_summary(text)
print("Summary:")
print(summary)


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


Summary:

The Amazon Rainforest, often referred to as the "Lungs of the Earth," is the largest rainforest on the planet, covering approximately 5.5 million square kilometers. Efforts to protect the Amazon Rainforest involve conservation initiatives, legal regulations, and international agreements aimed at preserving this invaluable ecosystem. Additionally, the indigenous communities living in the Amazon rely on the forest for their traditional way of life, and the destruction of their lands has severe consequences for their cultures and livelihoods. It's crucial to raise awareness and take action to safeguard the Amazon Rainforest for future generations and the health of the planet. The Amazon plays a crucial role in maintaining the global climate by absorbing carbon dioxide and producing oxygen.


In [None]:
import nltk
nltk.download('punkt')
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.lsa import LsaSummarizer

def extractive_summarization(text, num_sentences=3):
    # Parse the text
    parser = PlaintextParser.from_string(text, Tokenizer("english"))

    # Create an LSA Summarizer
    summarizer = LsaSummarizer()

    # Get the summary with a specified number of sentences
    summary = summarizer(parser.document, num_sentences)

    # Combine the selected sentences into the final summary
    summarized_text = " ".join(str(sentence) for sentence in summary)

    return summarized_text

# Example usage
original_text = """
The Amazon Rainforest, often referred to as the "Lungs of the Earth," is the largest rainforest on the planet, covering approximately 5.5 million square kilometers. It is home to an incredible diversity of flora and fauna, with millions of species of plants, animals, and insects. The Amazon plays a crucial role in maintaining the global climate by absorbing carbon dioxide and producing oxygen. However, it faces severe threats from deforestation, illegal logging, mining, and agricultural expansion.
The rainforest's rich biodiversity is under constant threat due to human activities. Deforestation, primarily for cattle ranching and agriculture, has led to the destruction of vast areas of the Amazon. This loss of habitat endangers numerous species, some of which may become extinct before they are even discovered by science. Additionally, the indigenous communities living in the Amazon rely on the forest for their traditional way of life, and the destruction of their lands has severe consequences for their cultures and livelihoods.
Efforts to protect the Amazon Rainforest involve conservation initiatives, legal regulations, and international agreements aimed at preserving this invaluable ecosystem. Organizations and governments worldwide are working to combat deforestation, enforce stricter environmental laws, and support sustainable practices. It's crucial to raise awareness and take action to safeguard the Amazon Rainforest for future generations and the health of the planet.
"""

summary = extractive_summarization(original_text)
print("Original Text:\n", original_text)
print("\nSummarized Text:\n", summary)


Original Text:
 
The Amazon Rainforest, often referred to as the "Lungs of the Earth," is the largest rainforest on the planet, covering approximately 5.5 million square kilometers. It is home to an incredible diversity of flora and fauna, with millions of species of plants, animals, and insects. The Amazon plays a crucial role in maintaining the global climate by absorbing carbon dioxide and producing oxygen. However, it faces severe threats from deforestation, illegal logging, mining, and agricultural expansion.
The rainforest's rich biodiversity is under constant threat due to human activities. Deforestation, primarily for cattle ranching and agriculture, has led to the destruction of vast areas of the Amazon. This loss of habitat endangers numerous species, some of which may become extinct before they are even discovered by science. Additionally, the indigenous communities living in the Amazon rely on the forest for their traditional way of life, and the destruction of their lands 

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [None]:
!pip install sumy


Collecting sumy
  Downloading sumy-0.11.0-py2.py3-none-any.whl (97 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m97.3/97.3 kB[0m [31m893.9 kB/s[0m eta [36m0:00:00[0m
[?25hCollecting docopt<0.7,>=0.6.1 (from sumy)
  Downloading docopt-0.6.2.tar.gz (25 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting breadability>=0.1.20 (from sumy)
  Downloading breadability-0.1.20.tar.gz (32 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting pycountry>=18.2.23 (from sumy)
  Downloading pycountry-23.12.11-py3-none-any.whl (6.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.2/6.2 MB[0m [31m35.3 MB/s[0m eta [36m0:00:00[0m
Building wheels for collected packages: breadability, docopt
  Building wheel for breadability (setup.py) ... [?25l[?25hdone
  Created wheel for breadability: filename=breadability-0.1.20-py2.py3-none-any.whl size=21691 sha256=7ec336c194b635fcdbf0b1b77eb217ff4c509783672ea71f2cbc9127834a0807
  St

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense

# Sample dataset (replace with your own dataset)
questions = ["What is the capital of France?", "How does photosynthesis work?", "What are the benefits of exercise?"]
tags = ["geography", "biology", "health"]

# Tokenize and pad sequences
max_words = 10000
max_len = 20

tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(questions)
sequences = tokenizer.texts_to_sequences(questions)
X = pad_sequences(sequences, maxlen=max_len)

# Convert tags to one-hot encoding
num_classes = len(tags)
y = np.zeros((len(questions), num_classes), dtype=np.int32)

for i, tag in enumerate(tags):
    y[:, i] = [1 if tag in q.lower() else 0 for q in questions]

# Define the CNN model
embedding_dim = 50
filter_sizes = [3, 4, 5]
num_filters = 64

model = Sequential()
model.add(Embedding(input_dim=max_words, output_dim=embedding_dim, input_length=max_len))

for filter_size in filter_sizes:
    model.add(Conv1D(num_filters, filter_size, activation='relu'))

model.add(GlobalMaxPooling1D())
model.add(Dense(num_classes, activation='sigmoid'))

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
epochs = 10
batch_size = 32

model.fit(X, y, epochs=epochs, batch_size=batch_size)

# Example prediction
new_question = ["What are the symptoms of a common cold?"]
new_sequence = tokenizer.texts_to_sequences(new_question)
new_X = pad_sequences(new_sequence, maxlen=max_len)

predictions = model.predict(new_X)
predicted_tags = [tags[i] for i in range(num_classes) if predictions[0, i] >= 0.5]

print("Input Question:", new_question[0])
print("Predicted Tags:", predicted_tags)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Input Question: What are the symptoms of a common cold?
Predicted Tags: []


In [None]:
from nltk.corpus import wordnet
import nltk
nltk.download('wordnet')
def dictionary_lookup(word):
    # Query WordNet for synsets related to the word
    synsets = wordnet.synsets(word)

    # Print information about each synset
    for synset in synsets:
        print("Synset:", synset.name())
        print("Definition:", synset.definition())
        print("Examples:", synset.examples())
        #print()

# Example usage
word_to_lookup = "play"
print(dictionary_lookup(word_to_lookup))



[nltk_data] Downloading package wordnet to /root/nltk_data...


Synset: play.n.01
Definition: a dramatic work intended for performance by actors on a stage
Examples: ['he wrote several plays but only one was produced on Broadway']
Synset: play.n.02
Definition: a theatrical performance of a drama
Examples: ['the play lasted two hours']
Synset: play.n.03
Definition: a preset plan of action in team sports
Examples: ['the coach drew up the plays for her team']
Synset: maneuver.n.03
Definition: a deliberate coordinated movement requiring dexterity and skill
Examples: ['he made a great maneuver', 'the runner was out on a play by the shortstop']
Synset: play.n.05
Definition: a state in which action is feasible
Examples: ['the ball was still in play', "insiders said the company's stock was in play"]
Synset: play.n.06
Definition: utilization or exercise
Examples: ['the play of the imagination']
Synset: bid.n.02
Definition: an attempt to get something
Examples: ['they made a futile play for power', 'he made a bid to gain attention']
Synset: play.n.08
Definit