In [3]:
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.stem import PorterStemmer
import re

def preprocess_text(text):
    # Convert to lowercase
    text = text.lower()

    # Remove special characters
    text = re.sub(r'[^a-zA-Z\s]', '', text)

    # Tokenize the text into sentences
    sentences = sent_tokenize(text)

    # Tokenize each sentence into words
    words = [word_tokenize(sentence) for sentence in sentences]

    # Remove stop words
    stop_words = set(stopwords.words('english'))
    filtered_words = [[word for word in sentence if word not in stop_words] for sentence in words]

    # Perform stemming
    stemmer = PorterStemmer()
    stemmed_words = [[stemmer.stem(word) for word in sentence] for sentence in filtered_words]

    return stemmed_words

def calculate_sentence_scores(sentences, important_words):
    sentence_scores = {}
    for sentence in sentences:
        word_count = len(sentence)
        important_word_count = 0
        for word in important_words:
            important_word_count += sentence.count(word)
        sentence_scores[' '.join(sentence)] = important_word_count / word_count
    return sentence_scores

def summarize_text(text, max_sentences=5):
    # Preprocess the text
    processed_text = preprocess_text(text)

    # Calculate word frequencies
    word_frequencies = {}
    for sentence in processed_text:
        for word in sentence:
            if word in word_frequencies:
                word_frequencies[word] += 1
            else:
                word_frequencies[word] = 1

    # Calculate sentence scores
    important_words = sorted(word_frequencies.items(), key=lambda x: x[1], reverse=True)[:100]
    important_words = [word[0] for word in important_words]
    sentence_scores = calculate_sentence_scores(processed_text, important_words)

    # Select top-scoring sentences
    top_sentences = sorted(sentence_scores.items(), key=lambda x: x[1], reverse=True)[:max_sentences]
    summary = ' '.join([sentence for sentence, _ in top_sentences])

    return summary

# Example usage
text = """
This is a sample text for demonstrating text summarization using NLP techniques.
The text summarization process involves several steps, including text preprocessing,
word frequency calculation, sentence scoring, and selecting the top-scoring sentences.
First, the text is preprocessed by converting it to lowercase, removing special characters,
tokenizing it into sentences and words, removing stop words, and performing stemming.
Next, word frequencies are calculated to identify the most important words in the text.
Sentence scores are then calculated based on the presence of important words in each sentence.
Finally, the top-scoring sentences are selected and combined to form the summary.
"""

summary = summarize_text(text, max_sentences=3)
print("Summary:")
print(summary)

Summary:
sampl text demonstr text summar use nlp techniqu text summar process involv sever step includ text preprocess word frequenc calcul sentenc score select topscor sentenc first text preprocess convert lowercas remov special charact token sentenc word remov stop word perform stem next word frequenc calcul identifi import word text sentenc score calcul base presenc import word sentenc final topscor sentenc select combin form summari


In [7]:
ms = '''
Mahendra Singh Dhoni ( born 7 July 1981)is an Indian professional cricketer. He is a right handed batter and 
a wicket-keeper. Widely regarded as one of the most prolific wicket-keeper-batsmen and captains, 
he represented the Indian cricket team and was the captain of the side in limited-overs formats from 2007 to 
2017 and in test cricket from 2008 to 2014. Dhoni has captained the most international matches and is the most 
successful Indian captain. He has led India to victory in the 2011 Cricket World Cup, the 2007 ICC World Twenty20 and 
the 2013 ICC Champions Trophy, the only captain to win three different limited overs tournaments. He also led the teams
that won the Asia Cup in 2010, 2016 and was a member of the title winning squad in 2018.

Born in Ranchi, Dhoni made his first class debut for Bihar in 1999. He made his debut for the Indian cricket team on 23 
December 2004 in an ODI against Bangladesh and played his first test a year later against Sri Lanka. In 2007, he became the
captain of the ODI side before taking over in all formats by 2008. Dhoni retired from test cricket in 2014, but continued
playing in limited overs cricket till 2019. He has scored 17,266 runs in international cricket including 10,000 plus runs at
an average of more than 50 in ODIs.

Dhoni plays for Chennai Super Kings in the IPL, leading them to the final on ten occasions and winning it five times 
(2010, 2011, 2018, 2021 and 2023). He has also led CSK to two Champions League T20 titles in 2010 and 2014. He is amongst the
few batsmen to have scored more than five thousand runs in the IPL, as well as being the first wicket-keeper to do so.

In 2008, Dhoni was awarded India's highest sport honor Major Dhyan Chand Khel Ratna Award by Government of India. He received
the fourth highest civilian award Padma Shri in 2009 and third highest civilian award Padma Bhushan in 2018. Dhoni holds an
honorary rank of Lieutenant Colonel in the Parachute Regiment of the Indian Territorial Army which was presented to him by 
the Indian Army in 2011. He is one of the most popular cricketers in the world.
'''

summary = summarize_text(ms, max_sentences=1)
print("Summary:")
print(summary)

Summary:
mahendra singh dhoni born juli indian profession cricket right hand batter wicketkeep wide regard one prolif wicketkeeperbatsmen captain repres indian cricket team captain side limitedov format test cricket dhoni captain intern match success indian captain led india victori cricket world cup icc world twenti icc champion trophi captain win three differ limit over tournament also led team asia cup member titl win squad born ranchi dhoni made first class debut bihar made debut indian cricket team decemb odi bangladesh play first test year later sri lanka becam captain odi side take format dhoni retir test cricket continu play limit over cricket till score run intern cricket includ plu run averag odi dhoni play chennai super king ipl lead final ten occas win five time also led csk two champion leagu titl amongst batsmen score five thousand run ipl well first wicketkeep dhoni award india highest sport honor major dhyan chand khel ratna award govern india receiv fourth highest civi