In [1]:
!pip install nltk
import nltk



In [2]:
from nltk.tokenize import word_tokenize
from nltk.tokenize import sent_tokenize
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [3]:
nltk.download('punkt')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [4]:
def preprocess(text):
    tokens = word_tokenize(text.lower())
    stop_words = set(stopwords.words('english'))
    tokens = [token for token in tokens if token not in stop_words]

    return tokens

In [5]:
def vectorize(tokens):
    vectorizer = CountVectorizer()
    token_matrix = vectorizer.fit_transform([" ".join(tokens), text])

    return token_matrix


In [6]:
def summarize(text, top_n=2):

    tokens = preprocess(text)

    token_matrix = vectorize(tokens)

    similarity = cosine_similarity(token_matrix)[0] # Calculate similarity between the tokenized text and each sentence
    print("Vector Similarity Scores:")
    for i, score in enumerate(similarity):
        print(f"Sentence {i+1}: {score}")

    top_indices = similarity.argsort()[-top_n:][::-1] # Get indices of most similar sentences
    sentences = sent_tokenize(text)
    summary = [sentences[i] for i in top_indices] # Extract most similar sentences

    return ' '.join(summary)

In [7]:
text = """
Artificial intelligence (AI), in its broadest sense, is intelligence exhibited by machines, particularly computer systems, as opposed to the natural intelligence of living beings. It is a field of research in computer science that develops and studies methods and software which enable machines to perceive their environment and uses learning and intelligence to take actions that maximize their chances of achieving defined goals."""


In [8]:
summary = summarize(text)
print("\nSummary:")
print(summary)

Vector Similarity Scores:
Sentence 1: 0.9999999999999994
Sentence 2: 0.7006490497453702

Summary:

Artificial intelligence (AI), in its broadest sense, is intelligence exhibited by machines, particularly computer systems, as opposed to the natural intelligence of living beings. It is a field of research in computer science that develops and studies methods and software which enable machines to perceive their environment and uses learning and intelligence to take actions that maximize their chances of achieving defined goals.
