In [None]:
!pip install transformers
!pip install torch
!pip install bs4
!pip install requests

In [2]:
import requests
from bs4 import BeautifulSoup
from transformers import pipeline

In [3]:

def get_news_article(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    paragraphs = soup.find_all('p')
    article_text = ' '.join([paragraph.text for paragraph in paragraphs])
    return article_text

def summarize_article(article_text):
    summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
    summary = summarizer(article_text, max_length=150, min_length=40, do_sample=False)[0]['summary_text']
    return summary

if __name__ == '__main__':
    # Example news article URL
    url = 'https://techcrunch.com/2023/04/08/generative-ai-enterprise-software'
    
    article_text = get_news_article(url)
    summary = summarize_article(article_text)
    
    print("Original Article:")
    print(article_text)
    print("\nSummarized Article:")
    print(summary)

Original Article:
Over the last several months, OpenAI, and ChatGPT in particular, has shown what’s possible with a user interface built on top of a large language model that can answer questions and create code or pictures. While that alone is remarkable, we can also interact with and adjust the byproduct by having a conversation of sorts with the AI. It’s amazing really, but think about how transformative this could be by applying it to the enterprise applications you use on a daily basis. What if you could build an interface on top of your existing applications, so that instead of pointing and clicking, you could simply ask the computer to do a task for you and it would do it, based on the applications’ underlying model or your company’s internal language model. That would be a huge leap forward in computing. Before now, the biggest leap happened in 1984, when Apple introduced the graphical user interface that began a slow shift from the command line approach and eventually went mai

# Extractive text summarization using the frequency-based method

In [None]:
# install the necessary libraries

!pip install nltk
!pip install bs4
!pip install requests
!pip install tabulate

In [24]:
# import the necessary libraries
import requests
import heapq
import re
import nltk
from bs4 import BeautifulSoup
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize, sent_tokenize
from tabulate import tabulate

nltk.download('punkt')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Oriname\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Oriname\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [25]:
# define the functions

def get_news_article(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    paragraphs = soup.find_all('p')
    article_text = ' '.join([paragraph.text for paragraph in paragraphs])
    return article_text

def extractive_summarization(article_text, summary_length=7):
    article_text = re.sub(r'\s+', ' ', article_text)
    formatted_article_text = re.sub('[^a-zA-Z]', ' ', article_text)
    formatted_article_text = re.sub(r'\s+', ' ', formatted_article_text)

    sentence_list = sent_tokenize(article_text)
    word_frequencies = {}

    for word in word_tokenize(formatted_article_text):
        if word.lower() not in stopwords.words('english'):
            if word.lower() not in word_frequencies.keys():
                word_frequencies[word.lower()] = 1
            else:
                word_frequencies[word.lower()] += 1

    maximum_frequency = max(word_frequencies.values())

    for word in word_frequencies.keys():
        word_frequencies[word] = (word_frequencies[word] / maximum_frequency)

    sentence_scores = {}
    for sent in sentence_list:
        for word in word_tokenize(sent.lower()):
            if word in word_frequencies.keys():
                if sent not in sentence_scores.keys():
                    sentence_scores[sent] = word_frequencies[word]
                else:
                    sentence_scores[sent] += word_frequencies[word]

    summary_sentences = heapq.nlargest(summary_length, sentence_scores, key=sentence_scores.get)
    summary = ' '.join(summary_sentences)
    return summary, sentence_scores 




In [26]:
# test the functions
if __name__ == '__main__':
    url = 'https://techcrunch.com/2023/04/08/generative-ai-enterprise-software'

    article_text = get_news_article(url)
    summary, sentence_scores = extractive_summarization(article_text)

    print("Original Article:")
    print(article_text)
    print("\nSentence Scores:")

    table_data = [(sentence, score) for sentence, score in sentence_scores.items()]
    print(tabulate(table_data, headers=["Sentence", "Score"]))  # Add this line

    print("\nSummarized Article:")
    print(summary)

Original Article:
Over the last several months, OpenAI, and ChatGPT in particular, has shown what’s possible with a user interface built on top of a large language model that can answer questions and create code or pictures. While that alone is remarkable, we can also interact with and adjust the byproduct by having a conversation of sorts with the AI. It’s amazing really, but think about how transformative this could be by applying it to the enterprise applications you use on a daily basis. What if you could build an interface on top of your existing applications, so that instead of pointing and clicking, you could simply ask the computer to do a task for you and it would do it, based on the applications’ underlying model or your company’s internal language model. That would be a huge leap forward in computing. Before now, the biggest leap happened in 1984, when Apple introduced the graphical user interface that began a slow shift from the command line approach and eventually went mai