<a href="https://colab.research.google.com/github/madhura2024/deep_learning/blob/main/summarization_nlp.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import nltk
nltk.download('punkt')
nltk.download('punkt_tab')
nltk.download('averaged_perceptron_tagger_eng')
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('averaged_perceptron_tagger')
nltk.download('maxent_ne_chunker')
nltk.download('maxent_ne_chunker_tab')
nltk.download('words')

!pip install gensim

import re
import numpy as np

from nltk.tokenize import sent_tokenize, word_tokenize, WordPunctTokenizer
from nltk.stem import PorterStemmer, RegexpStemmer, SnowballStemmer
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
from nltk import pos_tag, ne_chunk

from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from gensim.models import Word2Vec


def summarize_text(text, summary_ratio=0.3):
    sentences = sent_tokenize(text)
    for s in sentences:
        word_tokenize(s)
        WordPunctTokenizer().tokenize(s)
    porterstem = PorterStemmer()
    snowball = SnowballStemmer('english')
    regexp = RegexpStemmer('ing$|s$|e$|able$', min=4)
    lemmatizer = WordNetLemmatizer()

    for s in sentences:
        for w in word_tokenize(s.lower()):
            porterstem.stem(w)
            snowball.stem(w)
            regexp.stem(w)
            lemmatizer.lemmatize(w, pos='v')
            lemmatizer.lemmatize(w, pos='n')


    stop_words = set(stopwords.words('english'))
    cleaned_sentences = []

    for s in sentences:
        s = s.lower()
        s = re.sub('[^a-zA-Z]', ' ', s)
        s = re.sub('\s+', ' ', s).strip()

        words = []
        for w in s.split():
            if w not in stop_words:
                words.append(lemmatizer.lemmatize(w))

        cleaned_sentences.append(" ".join(words))


    for s in cleaned_sentences:
        ne_chunk(pos_tag(word_tokenize(s)))


    cv = CountVectorizer()
    bow = cv.fit_transform(cleaned_sentences)

    ngram = CountVectorizer(ngram_range=(1,2))
    ngram_bow = ngram.fit_transform(cleaned_sentences)

    tfidf = TfidfVectorizer()
    tfidf_matrix = tfidf.fit_transform(cleaned_sentences)

    word2vec = Word2Vec(
        sentences=[s.split() for s in cleaned_sentences],
        vector_size=50,
        window=3,
        min_count=1
    )

    sentence_scores = {}
    for i in range(len(sentences)):
        sentence_scores[sentences[i]] = np.sum(tfidf_matrix[i].toarray())

    summary_len = max(1, int(len(sentences) * summary_ratio))
    ranked_sentences = sorted(sentence_scores, key=sentence_scores.get, reverse=True)
    summary = ranked_sentences[:summary_len]

    print("\nORIGINAL TEXT:\n")
    print(text)

    print("\nSUMMARY:\n")
    for s in summary:
        print(s)

user_text = input("Enter text to summarize:\n")
summarize_text(user_text)


  s = re.sub('\s+', ' ', s).strip()
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger_eng is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package maxent_ne_chunker to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package maxent

Enter text to summarize:
Ilya glanced back, dark eyes flashing just for Shane. “He gets that from you.”  “Obviously,” Shane deadpanned. “All the best parts.”  It smelled of yeast, honey, and butter. Anya snored under the table, defeated after three hopeful sniffs at fallen crumbs. Outside, a few yellow leaves scraped along the deck. The weather report promised rain tonight.  Ilya glanced at the recipe on his phone. “Mom used to sing while she cooked,” he mumbled. “We had an old junky TV in the kitchen corner. The remote didn’t work half the time, but Mom would keep it tuned to one of those music channels. She’d hum along… sometimes sing, if she liked the song enough. Ahh—” He laughed under his breath. “I wish I remembered the words now.”  Shane said nothing at first. He reached over, brushed flour from Ilya’s knuckles, thumb lingering where the pulse beat steady and warm. “I think you are a lot like her, you know.”  Ilya’s head snapped up, startled, eyes meeting Shane’s. His brows knit