Run next cell only once


In [None]:
!pip install newspaper3k
!pip install sumy

Collecting newspaper3k
  Downloading newspaper3k-0.2.8-py3-none-any.whl (211 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.1/211.1 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
Collecting cssselect>=0.9.2 (from newspaper3k)
  Downloading cssselect-1.2.0-py2.py3-none-any.whl (18 kB)
Collecting feedparser>=5.2.1 (from newspaper3k)
  Downloading feedparser-6.0.10-py3-none-any.whl (81 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.1/81.1 kB[0m [31m10.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting tldextract>=2.0.1 (from newspaper3k)
  Downloading tldextract-5.1.0-py3-none-any.whl (97 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m97.7/97.7 kB[0m [31m10.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting feedfinder2>=0.0.4 (from newspaper3k)
  Downloading feedfinder2-0.0.4.tar.gz (3.3 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting jieba3k>=0.35.1 (from newspaper3k)
  Downloading jieba3k-0.35.1.zip

Start running here

In [None]:
import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import newspaper
from requests.exceptions import Timeout
from textblob import TextBlob

from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.lsa import LsaSummarizer

import nltk
from nltk.sentiment import SentimentIntensityAnalyzer


# Download the VADER lexicon
nltk.download('vader_lexicon')

def summarize_text(text, max_sentences=3):
    # Create a parser using PlaintextParser
    parser = PlaintextParser.from_string(text, Tokenizer("english"))
    # Create an LSA summarizer
    summarizer = LsaSummarizer()
    # Summarize the text
    summary = summarizer(parser.document, max_sentences)
    # Return the summary as a string
    return ' '.join(str(sentence) for sentence in summary)

# Initialize the sentiment analyzer
sia = SentimentIntensityAnalyzer()

def get_sentiment(text):
    sentiment_scores = sia.polarity_scores(text)
    return sentiment_scores['compound']

def web_scrape_clients(clients, max_number_news=5):
    news_dfs = []

    for client in clients:
        # Remove commas and replace spaces with plus signs
        client = client.replace(",", "").replace(" ", "+")

        url = f"https://www.bing.com/news/search?q={client}+-msn"
        response = requests.get(url)

        if response.status_code == 200:
            soup = BeautifulSoup(response.content, "html.parser")

            headers = [a.text for a in soup.select(".t_t a.title")]
            journals = [a["data-author"] for a in soup.select(".t_t a.title")]
            links = [a["href"] for a in soup.select(".t_t a.title")]
            snippet = [s.text for s in soup.select(".snippet")]
            summary = []
            sentiment = []

            num_news = min(len(headers), max_number_news)

            for link in links:

                try:
                    # Use requests to download the page content with a timeout of 5 seconds
                    response = requests.get(link, timeout=20)
                    page_content = response.content

                    article = newspaper.Article(link)
                    article.download(input_html=page_content)
                    article.parse()
                    article.nlp()

                    if not article.text:
                        sentiment.append("-")
                        summary.append("Could not get the summary for this article: Subscription needed.")
                        continue  # Skip articles with empty text

                    sentiment.append(get_sentiment(article.text))
                    summary.append(summarize_text(article.text))

                    if len(summary) == max_number_news:
                        break  # Stop when desired number of non-empty summaries is reached

                except Timeout:
                    summary.append("error")
                except Exception as e:
                    summary.append("error")


            news_df = pd.DataFrame({
                "Client": [client] * num_news,
                "Header": headers[:num_news],
                "Snippet": snippet[:num_news],
                "Journal": journals[:num_news],
                "Link": links[:num_news],
                "Summary" : summary[:num_news],
                "Sentiment": sentiment[:num_news]
            })

            news_dfs.append(news_df)

    final_news_df = pd.concat(news_dfs, ignore_index=True)
    final_news_df["Client"] = final_news_df["Client"].str.replace("+", " ")
    final_news_df["Link"] = final_news_df["Link"].apply(lambda x: f'<a href="{x}" target="_blank">{x}</a>')



    return final_news_df


[nltk_data] Downloading package vader_lexicon to /root/nltk_data...


After running the following line, it will ask you to type a client name and press enter. Once the table loads, you can click on the little table icon on the right to read the summaries better

In [None]:
def get_user_input():
    clients = input("Enter a list of clients separated by commas: ")
    clients = clients.split(",")
    clients = [client.strip() for client in clients]
    return clients

def main():
    clients = get_user_input()
    final_news_df = web_scrape_clients(clients, max_number_news=5)

    # Display the DataFrame
    from IPython.display import display, HTML
    # Display the DataFrame with clickable hyperlinks
    display(HTML(final_news_df.to_html(escape=False)))
    #display(final_news_df)
    #final_news_df.to_csv('news_data.csv', index=False)  # Save as CSV

if __name__ == "__main__":
    main()


Enter a list of clients separated by commas: Hiscox, Beazley


  final_news_df["Client"] = final_news_df["Client"].str.replace("+", " ")


Unnamed: 0,Client,Header,Snippet,Journal,Link,Summary,Sentiment
0,Hiscox,Hiscox Ltd (LON:HSX) is a favorite amongst institutional investors who own 72%,"If you want to know who really controls Hiscox Ltd ( LON:HSX ), then you'll have to look at the makeup of its share registry.",YAHOO!Finance,https://finance.yahoo.com/news/hiscox-ltd-lon-hsx-favorite-093008386.html,"Institutional investors own over 50% of the company, so together than can probably strongly influence board decisions. We've identified 1 warning sign with Hiscox , and understanding them should be part of your investment process. If you would prefer discover what analysts are predicting in terms of future growth, do not miss this free report on analyst forecasts.",0.9959
1,Hiscox,Hiscox Ltd: Syndicates 33 and 6104 – results and estimates,"Syndicates 33 and 6104 - results and estimates Hamilton, Bermuda (10 November 2023) - Hiscox Ltd (LSE: HSX), the ...",Cbonds,https://cbonds.com/news/2578985/,"Full data on over 800,000 bonds and stocks worldwide Powerful bond screener Ratings from the top 3 global ratings agencies, plus over 70 local ones",0.5574
2,Hiscox,Hiscox Ltd HSX,Morningstar Quantitative Ratings for Stocks are generated using an algorithm that compares companies that are not under analyst coverage to peer companies that do receive analyst-driven ratings ...,Morningstar,https://www.morningstar.com/stocks/xlon/hsx/quote,"Companies with quantitative ratings are not formally covered by a Morningstar analyst, but are statistically matched to analyst-rated companies, allowing our models to calculate a quantitative moat, fair value, and uncertainty rating. Is it the right time to buy, sell, or hold? Start a free trial of Morningstar Investor to unlock exclusive ratings and continuous analyst coverage to help you decide if HSX is a good fit for your portfolio.",0.9628
3,Hiscox,Hiscox reports cyber-attacks continue to rise year-on-year,"UK insurer, Hiscox has released its seventh annual Cyber Readiness Report. The key finding from the report was that 53% of ...",enterprisetimes.co.uk,https://www.enterprisetimes.co.uk/2023/11/02/hiscox-reports-cyber-attacks-continue-to-rise-year-on-year/,"This website is using a security service to protect itself from online attacks. There are several actions that could trigger this block including submitting a certain word or phrase, a SQL command or malformed data. Please include what you were doing when this page came up and the Cloudflare Ray ID found at the bottom of this page.",0.745
4,Hiscox,Hiscox reports strong written premium growth,Specialist insurer Hiscox reported strong growth in group insurance contract written premiums (ICWP) on Wednesday in the ...,Sharecast,https://www.sharecast.com/news/news-and-announcements/hiscox-reports-strong-written-premium-growth--15248966.html,"The London Market segment of Hiscox saw net ICWP increase by an impressive 18.1% to reach $676.7m. Furthermore, Hiscox ILS funds delivered record performance, generating an increasing fee income for the group. Despite an active third quarter, aggregate natural catastrophe losses year-to-date remained within budget.",0.9985
5,Beazley,"Beazley cyber cat bond hits the market, $75m PoleStar Re Ltd.","Beazley, the London headquartered specialty insurance and reinsurance underwriter, has now entered the 144A catastrophe bond ...",Artemis,https://www.artemis.bm/news/beazley-cyber-cat-bond-hits-the-market-75m-polestar-re-ltd/,"Beazley had already sponsored three private cyber catastrophe bonds so far this year, but we were told that a renewal of the coverage would likely come in full 144a cat bond form, and this new PoleStar Re cyber cat bond appears to be that deal. With the AXIS Capital deal progressing and expected to be priced in the next day or so, this sends a strong signal that insurance-linked securities (ILS) investors are welcoming this new peril and ready to assess it, investing in it where appropriate for their portfolios. In addition, Gallagher Securities is structuring and offering this deal, so that’s a second broker involved as well (Aon is working on the AXIS deal), again very positive for the cyber cat bond market’s growth potential",0.0457
6,Beazley,Beazley: Holding(s) in Company,TR-1: Standard form for notification of major holdings 1. Issuer Details ISIN GB00BYQ0JC66 Issuer Name BEAZLEY PLC UK or Non-UK Issuer UK 2. Reason for Notification An acquisition or disposal of ...,Cbonds,https://cbonds.com/news/2573855/,"Full data on over 800,000 bonds and stocks worldwide Powerful bond screener Ratings from the top 3 global ratings agencies, plus over 70 local ones",0.5574
7,Beazley,Beazley publishes Q3 financials,Beazley has unveiled its financials for the nine-month period ending Sept. 30. The specialist insurance group underscored robust financial performance with insurance written premiums climbing by 9% to ...,insurancebusinessmag,https://www.insurancebusinessmag.com/asia/news/breaking-news/beazley-publishes-q3-financials-465876.aspx,"In the cyber risk segment, despite a moderate rate decrease in 2023, the current pricing levels are considered sufficient, particularly against the backdrop of the significant rate rises that have occurred since 2019. While the US mid-market shows promise for growth, competition has intensified, particularly in the SME space, leading to a more moderate growth rate in the US. However, the company has seen substantial growth in other regions where market penetration rates are lower.",0.8126
8,Beazley,Beazley takes advantage of “exceptional” property market in 9M 2023,Specialist insurer Beazley has reported insurance written premium growth of 9% to $4.3 billion for the first nine months of ...,reinsurancene,https://www.reinsurancene.ws/beazley-takes-advantage-of-exceptional-property-market-in-9m-2023/,"In Cyber Risks, premiums rose 4% to $872 million, although there’s been a moderate rate decrease during 2023. Natural catastrophe losses have so far been within the margins held in its reserves for such events. “With sustained discipline and agility in our underwriting I look forward to reporting a strong profit at year end.”",0.9873
9,Beazley,Beazley flags strong year after solid third quarter,"Specialist insurer Beazley announced a positive trading performance for the first nine months of 2023 on Tuesday, with a 9% ...",Sharecast,https://www.sharecast.com/news/market-pulse/beazley-flags-strong-year-after-solid-third-quarter--15229850.html,"It also saw growth in its cyber, marine, political risks and contingency lines of business. Cox described the insurance business as cyclical, adding that market conditions were rapidly evolving. “With sustained discipline and agility in our underwriting, I look forward to reporting a strong profit at year-end.”",0.9926
