In [2]:
import re

import nltk

import heapq

from nltk.corpus import stopwords

from nltk.tokenize import word_tokenize, sent_tokenize


# Download necessary data (Stopwords and punkt tokenizer)

nltk.download('punkt')

nltk.download('stopwords')


# Function to clean and preprocess the text

def preprocess_text(text):

    # Step 1: Converting to lower case

    text = text.lower()


    # Step 2: Removing special characters and numbers

    text = re.sub(r'\[[0-9]*\]', ' ', text)  # Removing numbers in brackets (like [12])

    text = re.sub(r'\s+', ' ', text)  # Removing multiple spaces

    text = re.sub(r'[^a-zA-Z]', ' ', text)  # Removing special characters


    # Step 3: Tokenize into words

    words = word_tokenize(text)


    # Step 4: Removing stopwords

    stop_words = set(stopwords.words('english'))

    filtered_words = [word for word in words if word not in stop_words]


    return filtered_words


# Function to compute word frequency

def compute_word_frequencies(words):

    word_frequencies = {}

    for word in words:

        if word not in word_frequencies.keys():

            word_frequencies[word] = 1

        else:

            word_frequencies[word] += 1


    # Step 5: Normalize word frequencies (dividing by max frequency)

    max_frequency = max(word_frequencies.values())

    for word in word_frequencies.keys():

        word_frequencies[word] = word_frequencies[word] / max_frequency


    return word_frequencies


# Function to score sentences based on word frequency

def score_sentences(text, word_frequencies):

    # Step 6: Tokenizing into sentences

    sentences = sent_tokenize(text)


    # Step 7: Scoring sentences based on word frequency

    sentence_scores = {}

    for sentence in sentences:

        sentence_word_count = len(sentence.split(' '))

        for word in word_tokenize(sentence.lower()):

            if word in word_frequencies.keys():

                if sentence not in sentence_scores.keys():

                    sentence_scores[sentence] = word_frequencies[word]

                else:

                    sentence_scores[sentence] += word_frequencies[word]


        # Normalizing the score by sentence length

        sentence_scores[sentence] = sentence_scores[sentence] / sentence_word_count


    return sentence_scores


# Function to summarize text

def summarize_text(text, num_sentences=3):

    # Preprocess the text

    words = preprocess_text(text)


    # Compute word frequencies

    word_frequencies = compute_word_frequencies(words)


    # Score sentences

    sentence_scores = score_sentences(text, word_frequencies)


    # Step 8: Extract top 'n' sentences

    summary_sentences = heapq.nlargest(num_sentences, sentence_scores, key=sentence_scores.get)

    summary = ' '.join(summary_sentences)


    return summary


# Sample text for summarization

text = """

Yes, artificial intelligence (AI) can be a threat in several ways, including:
Job losses
AI can replace routine and creative jobs, such as writing news stories and producing art. 
Social manipulation
AI can be used to create echo chambers that only show content a person would like, which can polarize the public sphere. AI can also be used to create fake news and videos, known as deepfakes, which can influence elections. 
Data privacy
AI can be used to track and profile individuals, and to merge information a person has given into new data. 
Environmental harm
The production and mining of metals used in AI hardware can lead to pollution and soil erosion. 
Degrading human experience
AI can automate judgment-making tasks, which can lead to people losing the ability to make those judgments themselves. 
Cybersecurity threats
Bad actors can use AI to launch cyberattacks. 
Bias
AI can reflect human biases, which can lead to decisions influenced by data on sex, ethnicity, or age. 
However, AI is also a powerful tool that can offer promising solutions in healthcare. 

"""


# Summarize the sample text

summary = summarize_text(text)


print(summary)

[nltk_data] Error loading punkt: <urlopen error [Errno -3] Temporary
[nltk_data]     failure in name resolution>
[nltk_data] Error loading stopwords: <urlopen error [Errno -3]
[nltk_data]     Temporary failure in name resolution>
Cybersecurity threats
Bad actors can use AI to launch cyberattacks. 

Yes, artificial intelligence (AI) can be a threat in several ways, including:
Job losses
AI can replace routine and creative jobs, such as writing news stories and producing art. AI can also be used to create fake news and videos, known as deepfakes, which can influence elections.
