In [None]:
text = """
Artificial Intelligence is one of the most transformative technologies of our time.
It is changing the way we work, communicate, and live.
Machine learning is a subset of AI that learns patterns from data.
Natural language processing enables machines to understand human language.
Many industries are adopting AI to improve efficiency and reduce costs.
"""
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import stopwords
from collections import defaultdict
import string

nltk.download("punkt")
nltk.download("averaged_perceptron_tagger")
nltk.download("stopwords")

# Explicitly download the English averaged perceptron tagger
nltk.download("averaged_perceptron_tagger_eng")

sentences = sent_tokenize(text)
stop_words = set(stopwords.words("english"))

# Helper: clean and extract important tokens (nouns only)
def preprocess_sentence(sentence):
    words = word_tokenize(sentence.lower())
    words = [w for w in words if w not in stop_words and w not in string.punctuation]
    tagged = nltk.pos_tag(words)
    nouns = [word for word, pos in tagged if pos.startswith("NN")]
    return nouns

processed_sentences = [preprocess_sentence(s) for s in sentences]

import networkx as nx

# Compute sentence similarity (basic Jaccard for now)
def sentence_similarity(sen1, sen2):
    set1, set2 = set(sen1), set(sen2)
    if not set1 or not set2:
        return 0
    return len(set1 & set2) / len(set1 | set2)

# Build similarity graph
graph = nx.Graph()
for i in range(len(processed_sentences)):
    for j in range(len(processed_sentences)):
        if i != j:
            sim = sentence_similarity(processed_sentences[i], processed_sentences[j])
            if sim > 0:
                graph.add_edge(i, j, weight=sim)
# Handle cases where the graph might be empty or disconnected
if graph.number_of_nodes() == 0:
    scores = {} # No sentences, no scores
elif graph.number_of_edges() == 0:
    # If no edges, PageRank might not work or be meaningful.
    # Assign equal score to all nodes in this case.
    num_nodes = graph.number_of_nodes()
    scores = {node: 1.0 / num_nodes for node in graph.nodes()}
else:
    scores = nx.pagerank(graph)

print("Initial PageRank scores:", scores)

# Add position score (e.g., earlier sentences get small boost)
# Ensure all sentence indices are in scores, initializing to 0 if not present
for i in range(len(sentences)):
    # Use .get(key, default) to safely access scores
    current_score = scores.get(i, 0.0) # Get current score, default to 0 if key 'i' doesn't exist
    scores[i] = current_score + (1 / (i + 1)) * 0.1 # Update or add the score

print("Scores after adding position boost:", scores)
# Rank and select top 2 sentences
ranked_sentences = sorted(scores.items(), key=lambda x: x[1], reverse=True)
summary_indices = [idx for idx, _ in ranked_sentences[:2]]
summary = " ".join([sentences[i] for i in sorted(summary_indices)])

print("📌 TextRank Summary:\n", summary)



: 