In [1]:
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.stem import PorterStemmer
from string import punctuation

In [2]:
nltk.download('punkt', quiet=True)
nltk.download('punkt_tab', quiet=True)
nltk.download('stopwords', quiet=True)

True

In [3]:
text = """AI, or artificial intelligence, is a field of computer science that creates systems capable of performing tasks that normally require human intelligence,
such as learning, problem-solving, and decision-making. These systems can analyze vast amounts of data to recognize patterns, understand language, and make predictions,
all without being explicitly programmed for every situation. AI is built using technologies like machine learning and deep learning, which allow machines to learn from
experience and adapt to new information. Human-like capabilities: AI enables machines to perform tasks such as understanding speech, analyzing data, and providing
helpful suggestions. Data-driven learning: Many AI systems are trained on large datasets to learn to perform tasks, recognizing patterns and making decisions based
on that data. Problem-solving: Instead of being programmed for every specific task, AI can solve problems autonomously by finding answers and generating solutions."""

print("Original Text:\n", text)

Original Text:
 AI, or artificial intelligence, is a field of computer science that creates systems capable of performing tasks that normally require human intelligence, 
such as learning, problem-solving, and decision-making. These systems can analyze vast amounts of data to recognize patterns, understand language, and make predictions,
all without being explicitly programmed for every situation. AI is built using technologies like machine learning and deep learning, which allow machines to learn from
experience and adapt to new information. Human-like capabilities: AI enables machines to perform tasks such as understanding speech, analyzing data, and providing 
helpful suggestions. Data-driven learning: Many AI systems are trained on large datasets to learn to perform tasks, recognizing patterns and making decisions based 
on that data. Problem-solving: Instead of being programmed for every specific task, AI can solve problems autonomously by finding answers and generating solutions.

In [4]:
# Tokenization
sentences = sent_tokenize(text)
print("\nSentence Tokenization:\n", sentences)


Sentence Tokenization:
 ['AI, or artificial intelligence, is a field of computer science that creates systems capable of performing tasks that normally require human intelligence, \nsuch as learning, problem-solving, and decision-making.', 'These systems can analyze vast amounts of data to recognize patterns, understand language, and make predictions,\nall without being explicitly programmed for every situation.', 'AI is built using technologies like machine learning and deep learning, which allow machines to learn from\nexperience and adapt to new information.', 'Human-like capabilities: AI enables machines to perform tasks such as understanding speech, analyzing data, and providing \nhelpful suggestions.', 'Data-driven learning: Many AI systems are trained on large datasets to learn to perform tasks, recognizing patterns and making decisions based \non that data.', 'Problem-solving: Instead of being programmed for every specific task, AI can solve problems autonomously by finding an

In [5]:
# Word Tokenization
words = word_tokenize(text.lower())
print("\nWord Tokenization:\n", words[:20], "...")


Word Tokenization:
 ['ai', ',', 'or', 'artificial', 'intelligence', ',', 'is', 'a', 'field', 'of', 'computer', 'science', 'that', 'creates', 'systems', 'capable', 'of', 'performing', 'tasks', 'that'] ...


In [6]:
# Stopword Removal
stop_words = set(stopwords.words("english"))
filtered_words = [word for word in words if word not in stop_words and word not in punctuation]
print("\nAfter Stopword Removal:\n", filtered_words[:20], "...")


After Stopword Removal:
 ['ai', 'artificial', 'intelligence', 'field', 'computer', 'science', 'creates', 'systems', 'capable', 'performing', 'tasks', 'normally', 'require', 'human', 'intelligence', 'learning', 'problem-solving', 'decision-making', 'systems', 'analyze'] ...


In [7]:
# Stemming
stemmer = PorterStemmer()
stemmed_words = [stemmer.stem(word) for word in filtered_words]
print("\nAfter Stemming:\n", stemmed_words[:20], "...")


After Stemming:
 ['ai', 'artifici', 'intellig', 'field', 'comput', 'scienc', 'creat', 'system', 'capabl', 'perform', 'task', 'normal', 'requir', 'human', 'intellig', 'learn', 'problem-solv', 'decision-mak', 'system', 'analyz'] ...


In [8]:
# Frequency Distribution
word_freq = {}
for word in stemmed_words:
    word_freq[word] = word_freq.get(word, 0) + 1

# Normalize frequencies
max_freq = max(word_freq.values()) if word_freq else 1
for word in list(word_freq.keys()):
    word_freq[word] = word_freq[word] / max_freq

print("\nWord Frequencies:\n", word_freq)


Word Frequencies:
 {'ai': 0.8333333333333334, 'artifici': 0.16666666666666666, 'intellig': 0.3333333333333333, 'field': 0.16666666666666666, 'comput': 0.16666666666666666, 'scienc': 0.16666666666666666, 'creat': 0.16666666666666666, 'system': 0.5, 'capabl': 0.3333333333333333, 'perform': 0.5, 'task': 0.6666666666666666, 'normal': 0.16666666666666666, 'requir': 0.16666666666666666, 'human': 0.16666666666666666, 'learn': 1.0, 'problem-solv': 0.3333333333333333, 'decision-mak': 0.16666666666666666, 'analyz': 0.3333333333333333, 'vast': 0.16666666666666666, 'amount': 0.16666666666666666, 'data': 0.5, 'recogn': 0.3333333333333333, 'pattern': 0.3333333333333333, 'understand': 0.3333333333333333, 'languag': 0.16666666666666666, 'make': 0.3333333333333333, 'predict': 0.16666666666666666, 'without': 0.16666666666666666, 'explicitli': 0.16666666666666666, 'program': 0.3333333333333333, 'everi': 0.3333333333333333, 'situat': 0.16666666666666666, 'built': 0.16666666666666666, 'use': 0.16666666666

In [9]:
# Sentence Score
sentence_scores = {}
for sent in sentences:
    for word in word_tokenize(sent.lower()):
        if word in word_freq:
            sentence_scores[sent] = sentence_scores.get(sent, 0) + word_freq[word]

print("\nSentence Scores:\n", sentence_scores)


Sentence Scores:
 {'AI, or artificial intelligence, is a field of computer science that creates systems capable of performing tasks that normally require human intelligence, \nsuch as learning, problem-solving, and decision-making.': 1.1666666666666667, 'These systems can analyze vast amounts of data to recognize patterns, understand language, and make predictions,\nall without being explicitly programmed for every situation.': 1.5, 'AI is built using technologies like machine learning and deep learning, which allow machines to learn from\nexperience and adapt to new information.': 2.833333333333333, 'Human-like capabilities: AI enables machines to perform tasks such as understanding speech, analyzing data, and providing \nhelpful suggestions.': 2.0, 'Data-driven learning: Many AI systems are trained on large datasets to learn to perform tasks, recognizing patterns and making decisions based \non that data.': 3.0, 'Problem-solving: Instead of being programmed for every specific task, 

In [10]:
# Summary
import heapq
summary_sentences = heapq.nlargest(2, sentence_scores, key=sentence_scores.get)
summary = " ".join(summary_sentences)

print("\n----- SUMMARY -----\n")
print(summary)


----- SUMMARY -----

Data-driven learning: Many AI systems are trained on large datasets to learn to perform tasks, recognizing patterns and making decisions based 
on that data. AI is built using technologies like machine learning and deep learning, which allow machines to learn from
experience and adapt to new information.
