Q-4. Take any text file and now your task is to Text Summarization without using
hugging transformer library


In [9]:
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from collections import defaultdict
import heapq


def preprocess_text(text):
    # Tokenize text 
    sentences = sent_tokenize(text)
    words = word_tokenize(text.lower())

    # Remove stopwords
    stop_words = set(stopwords.words("english"))
    words = [word for word in words if word not in stop_words]

    # Lemmatize words
    lemmatizer = WordNetLemmatizer()
    words = [lemmatizer.lemmatize(word) for word in words]

    return sentences, words

def calculate_sentence_scores(sentences, words):
    # Calculate word frequency
    word_freq = defaultdict(int)
    for word in words:
        word_freq[word] += 1

    # Calculate sentence 
    sentence_scores = defaultdict(int)
    for i, sentence in enumerate(sentences):
        for word in word_tokenize(sentence.lower()):
            if word in word_freq:
                sentence_scores[i] += word_freq[word]

    return sentence_scores

def generate_summary(text, num_sentences=3):
    sentences, words = preprocess_text(text)
    sentence_scores = calculate_sentence_scores(sentences, words)

    # Select the top N sentences with highest scores
    summary_sentences = heapq.nlargest(num_sentences, sentence_scores, key=sentence_scores.get)
    summary = ' '.join([sentences[i] for i in summary_sentences])

    return summary


file_path = "IPL.txt"
with open(file_path, "r") as file:
    text = file.read()


summary = generate_summary(text)
print(summary)


IPL season results
Season
	Winner
	Winning margin
	Runner-up
	Final venue
	Teams
	Player of the series
	2008
Details
	Rajasthan Royals[73]
164/7 (20 overs)
	Won by 3 wickets
	Chennai Super Kings
163/5 (20 overs)
	DY Patil Stadium, Navi Mumbai
	8[74]
	Shane Watson (RR)
	2009
Details
	Deccan Chargers[75]
143/6 (20 overs)
	Won by 6 runs
	Royal Challengers Bangalore
137/9 (20 overs)
	Wanderers Stadium, Johannesburg
	8[76]
	Adam Gilchrist (DC)
	2010
Details
	Chennai Super Kings[77]
168/5 (20 overs)
	Won by 22 runs
	Mumbai Indians
146/9 (20 overs)
	DY Patil Stadium, Navi Mumbai
	8[78]
	Sachin Tendulkar (MI)
	2011
Details
	Chennai Super Kings[79]
205/5 (20 overs)
	Won by 58 runs
	Royal Challengers Bangalore
147/8 (20 overs)
	M. A. Chidambaram Stadium, Chennai
	10[80]
	Chris Gayle (RCB)
	2012
Details
	Kolkata Knight Riders[81]
192/5 (19.4 overs)
	Won by 5 wickets
	Chennai Super Kings
190/3 (20 overs)
	9[82]
	Sunil Narine (KKR)
	2013
Details
	Mumbai Indians[83]
148/9 (20 overs)
	Won by 23 runs
