In [1]:
data = "My name is Bhaskar Rai. It is my pleasure to take part in Infosys Springboard Internship. And I am working on Text Summarization Project."

In [2]:
# functions from NLTK (Natural Language Toolkit)
from nltk.tokenize import word_tokenize, sent_tokenize  # Used for splitting the text into words and sentences
from nltk.corpus import stopwords  # Stopwords are common words (e.g., 'is', 'the') that don't add much meaning

In [3]:
# Define a function that will summarize the text
def solve(text):

  # Load English stopwords (common words that are not important for summarization)
  stopwords1 = set(stopwords.words("english"))

  # Tokenize the text into words
  words = word_tokenize(text)

  # Create a dictionary to store the frequency of each word
  freqTable = {}

  # Iterate through each word in the text
  for word in words:

    # Convert the word to lowercase to avoid case sensitivity
    word = word.lower()

    # Skip the word if it is a stopword (common word like 'is', 'the')
    if word in stopwords1:
      continue

    # If the word is already in the frequency table, increase its count
    if word in freqTable:
      freqTable[word] += 1
    else:
      # If the word is not in the table, add it and set its count to 1
      freqTable[word] = 1

  # Tokenize the text into sentences
  sentences = sent_tokenize(text)

  # Create a dictionary to store the value (importance score) of each sentence
  sentenceValue = {}

  # For each sentence in the text
  for sentence in sentences:

    # For each word and its frequency from the freqTable
    for word, freq in freqTable.items():

      # If the word is found in the sentence (convert to lowercase for case-insensitive match)
      if word in sentence.lower():

        # Add the frequency of the word to the sentence's value
        if sentence in sentenceValue:
          sentenceValue[sentence] += freq
        else:
          # If it's the first time encountering this sentence, initialize its value with the word's frequency
          sentenceValue[sentence] = freq

  # Sum up all the sentence values to calculate the total score
  sumValues = 0
  for sentence in sentenceValue:
    sumValues += sentenceValue[sentence]

  # Calculate the average sentence value to decide which sentences are important
  average = int(sumValues / len(sentenceValue))

  # Create a summary by selecting sentences with a score higher than 1.2 times the average value
  summary = ''
  for sentence in sentences:
    if (sentence in sentenceValue) and (sentenceValue[sentence] > (1.2 * average)):
      # Add high-scoring sentences to the summary
      summary += " " + sentence

  # Return the final summary
  return summary

In [5]:
import nltk
nltk.download('popular')

[nltk_data] Downloading collection 'popular'
[nltk_data]    | 
[nltk_data]    | Downloading package cmudict to /root/nltk_data...
[nltk_data]    |   Unzipping corpora/cmudict.zip.
[nltk_data]    | Downloading package gazetteers to /root/nltk_data...
[nltk_data]    |   Unzipping corpora/gazetteers.zip.
[nltk_data]    | Downloading package genesis to /root/nltk_data...
[nltk_data]    |   Unzipping corpora/genesis.zip.
[nltk_data]    | Downloading package gutenberg to /root/nltk_data...
[nltk_data]    |   Unzipping corpora/gutenberg.zip.
[nltk_data]    | Downloading package inaugural to /root/nltk_data...
[nltk_data]    |   Unzipping corpora/inaugural.zip.
[nltk_data]    | Downloading package movie_reviews to
[nltk_data]    |     /root/nltk_data...
[nltk_data]    |   Unzipping corpora/movie_reviews.zip.
[nltk_data]    | Downloading package names to /root/nltk_data...
[nltk_data]    |   Unzipping corpora/names.zip.
[nltk_data]    | Downloading package shakespeare to /root/nltk_data...
[nlt

True

In [6]:
# Call the solve function with the text data
summary = solve(data)

# Print the summary generated by the function
print(summary)

 It is my pleasure to take part in Infosys Springboard Internship.
