In [1]:
import re
import nltk
import heapq
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize, sent_tokenize

# Download necessary data for text processing
nltk.download('punkt')
nltk.download('stopwords')

# Function to clean and preprocess the text
def preprocess_text(text):
    """
    Preprocesses the input text by converting it to lowercase, removing special characters,
    numbers, and stopwords, and then tokenizing it into words.
    """
    # Convert text to lowercase
    text = text.lower()

    # Remove numbers in brackets and extra spaces
    text = re.sub(r'\[[0-9]*\]', ' ', text)  # Example: [12]
    text = re.sub(r'\s+', ' ', text)

    # Remove special characters and keep only letters
    text = re.sub(r'[^a-zA-Z]', ' ', text)

    # Tokenize the text into words
    words = word_tokenize(text)

    # Remove stopwords
    stop_words = set(stopwords.words('english'))
    filtered_words = [word for word in words if word not in stop_words]

    return filtered_words

# Function to compute word frequency
def compute_word_frequencies(words):
    """
    Computes the frequency of each word in the list and normalizes the frequencies.
    """
    word_frequencies = {}

    # Count the occurrences of each word
    for word in words:
        if word not in word_frequencies:
            word_frequencies[word] = 1
        else:
            word_frequencies[word] += 1

    # Normalize the frequencies by dividing by the maximum frequency
    max_frequency = max(word_frequencies.values())
    for word in word_frequencies:
        word_frequencies[word] /= max_frequency

    return word_frequencies

# Function to score sentences based on word frequency
def score_sentences(text, word_frequencies):
    """
    Scores each sentence based on the frequency of words it contains,
    normalized by the length of the sentence.
    """
    sentences = sent_tokenize(text)  # Tokenize text into sentences
    sentence_scores = {}

    for sentence in sentences:
        sentence_word_count = len(sentence.split(' '))  # Count words in the sentence
        for word in word_tokenize(sentence.lower()):
            if word in word_frequencies:
                if sentence not in sentence_scores:
                    sentence_scores[sentence] = word_frequencies[word]
                else:
                    sentence_scores[sentence] += word_frequencies[word]

        # Normalize sentence score by sentence length
        sentence_scores[sentence] /= sentence_word_count

    return sentence_scores

# Function to summarize text
def summarize_text(text, num_sentences=3):
    """
    Summarizes the given text by selecting the top sentences based on their scores.
    """
    # Step 1: Preprocess the text
    words = preprocess_text(text)

    # Step 2: Compute word frequencies
    word_frequencies = compute_word_frequencies(words)

    # Step 3: Score sentences
    sentence_scores = score_sentences(text, word_frequencies)

    # Step 4: Select the top sentences for the summary
    summary_sentences = heapq.nlargest(num_sentences, sentence_scores, key=sentence_scores.get)
    summary = ' '.join(summary_sentences)

    return summary

# Sample text for summarization
text = """The Threats of Artificial Intelligence
Artificial Intelligence (AI) is one of the most revolutionary advancements in technology, with the potential to transform industries, enhance human capabilities, and address pressing global challenges. However, as we progress into an AI-driven era, it’s crucial to examine the significant threats and risks associated with its widespread adoption.

1. Job Displacement and Economic Disruption
AI's ability to automate tasks has already begun reshaping the job market. While it can lead to increased efficiency, many industries face the risk of massive job displacement. Tasks once requiring human skill, such as manufacturing, data entry, or even customer service, are now being handled by AI systems. This could widen the gap between skilled and unskilled workers, leading to economic inequality.

2. Ethical Concerns and Bias
AI algorithms are only as unbiased as the data they are trained on. If training data contains biases—whether racial, gender-based, or otherwise—AI systems will perpetuate and even amplify these biases. For example, AI in hiring systems or judicial sentencing could lead to unfair treatment of certain groups, raising serious ethical concerns.

3. Privacy and Surveillance
AI-powered tools like facial recognition and data analysis are increasingly used for surveillance, often without public consent. Governments and corporations can exploit these tools to monitor citizens, eroding privacy rights. In authoritarian regimes, AI-enhanced surveillance could stifle dissent and suppress freedoms.

4. Autonomous Weapons and Warfare
AI's potential for military applications poses grave risks. Autonomous weapons, or "killer robots," could conduct warfare with minimal human oversight, leading to unintended escalations and civilian casualties. Moreover, the proliferation of such technology could fall into the hands of non-state actors or rogue states, heightening global security threats.

5. Misinformation and Deepfakes
AI-generated content, including deepfakes, can be used to spread misinformation on an unprecedented scale. From altering political speeches to creating convincing but fake videos, AI tools have the potential to manipulate public opinion, disrupt elections, and erode trust in media institutions.

6. Lack of Accountability
As AI systems become more complex, understanding their decision-making processes becomes increasingly challenging. This "black box" problem makes it difficult to hold AI developers or users accountable for harmful outcomes. For instance, if an autonomous car causes an accident, determining responsibility can become a legal and ethical quagmire.

7. Over-Reliance on AI
Dependence on AI systems can lead to vulnerabilities. In critical industries like healthcare, finance, or defense, over-reliance on AI could result in catastrophic consequences if these systems fail, are hacked, or produce erroneous outputs.

8. Existential Risk
Prominent thinkers like Elon Musk and Stephen Hawking have warned about the existential risks of AI. Superintelligent systems, if not aligned with human values, could act in ways detrimental to humanity. While this remains speculative, the potential for AI to surpass human control cannot be ignored.

Addressing the Threats
While the threats posed by AI are significant, they are not insurmountable. Policymakers, technologists, and ethicists must work together to create robust regulations, ethical guidelines, and technological safeguards. This includes:

Ensuring transparency and fairness in AI algorithms.
Establishing global treaties to prevent misuse of AI in warfare.
Promoting public awareness and education about AI risks and benefits.
Encouraging responsible AI development aligned with societal values.
Conclusion
AI is a powerful tool that promises immense benefits, but its threats are equally significant. The future of AI depends on our ability to navigate its risks responsibly and ensure that it serves humanity’s best interests. By fostering a collaborative approach, we can harness AI’s potential while safeguarding against its perils.
"""

# Generate a summary for the sample text
summary = summarize_text(text)

# Display the summary
print("Original Text:")
print(text)
print("\nSummary:")
print(summary)


[nltk_data] Error loading punkt: <urlopen error [Errno -3] Temporary
[nltk_data]     failure in name resolution>
[nltk_data] Error loading stopwords: <urlopen error [Errno -3]
[nltk_data]     Temporary failure in name resolution>


KeyError: '1.'