In [1]:
import time
import pandas as pd
from serpapi import GoogleSearch
from newspaper import Article
import openai
import os
from dotenv import load_dotenv


load_dotenv()
openai_api_key = os.getenv("OPENAI_KEY")
openai.api_key = openai_api_key


In [2]:

params = {
    "api_key": os.getenv("SERP_KEY"),
    "q": "business & politics in us",
    "engine": "bing_news",
    "first": 1,
    "count": 10,
    "qft": 'interval="9"',
}

search = GoogleSearch(params)
results = search.get_dict()

bing_news_results = []

page_limit = 5
page_count = 0

while "error" not in results and page_count < page_limit:
    bing_news_results.extend(results.get("organic_results", []))
    params["first"] += params["count"]
    page_count += 1
    search = GoogleSearch(params)
    results = search.get_dict()

df_bing_news_results = pd.DataFrame(bing_news_results)

In [3]:
urls = [result.get("link") for result in bing_news_results if "link" in result]

def scrape_article(url):
    article = Article(url)
    try:
        article.download()
        article.parse()
        return article.text
    except Exception as e:
        print(f"Failed to scrape {url}: {e}")
        return None

for url in urls:
    content = scrape_article(url)
    if content:
        df_bing_news_results.loc[df_bing_news_results["link"] == url, "content"] = (content)
    time.sleep(1)

df_bing_news_results.dropna(how="any", inplace=True)

Failed to scrape https://www.reuters.com/markets/us/us-political-churn-muddies-corporate-horizon-mike-dolan-2024-07-24/: Article `download()` failed with Status code 401 for url None on URL https://www.reuters.com/markets/us/us-political-churn-muddies-corporate-horizon-mike-dolan-2024-07-24/
Failed to scrape https://www.realclearpolitics.com/articles/2024/07/23/the_most_important_42_miles_in_american_politics_151314.html: Article `download()` failed with Status code 403 for url None on URL https://www.realclearpolitics.com/articles/2024/07/23/the_most_important_42_miles_in_american_politics_151314.html




In [4]:
df_bing_news_results

Unnamed: 0,title,link,snippet,source,date,thumbnail,content
6,Milwaukee RNC an 'unprecedented' moment combin...,https://www.usatoday.com/story/news/politics/2...,"""It was part hospitality, part politics, a big...",USA Today,4d,https://www.bing.com/th?id=OVFT.6cwGZegs5QGDP5...,"For four days, deep blue Milwaukee became a br..."
11,Employees Sue American Airlines Over “Socio-Po...,https://www.motherjones.com/politics/2024/07/a...,A class-action lawsuit against American Airlin...,Mother Jones,4d,https://serpapi.com/searches/66a0cd389f55b3ae6...,This story was originally published by Inside ...
15,The small business community’s crucial role in...,https://www.dailyherald.com/20240721/business/...,"Elections often are abrasive, divisive, and un...",Daily Herald,3d,https://serpapi.com/searches/66a0cd389f55b3ae6...,"Elections often are abrasive, divisive, and un..."
17,Speak up or keep quiet: When politics and busi...,https://www.businessobserverfl.com/news/2024/j...,Business owners are often taught to not speak ...,Business Observer,5d,https://www.bing.com/th?id=OVFT.JGHhQId_6JzwlN...,Politics used to be a third rail of sorts for ...
21,MTG and Lauren Boebert are incendiary lawmaker...,https://www.businessinsider.com/mtg-boebert-he...,A political expert told Politico that the Trum...,Business Insider,9d,https://serpapi.com/searches/66a0cd393d6919f97...,"Kleinfeld, a senior fellow at the Carnegie End..."
23,How does U.S. political violence affect global...,https://www.marketplace.org/2024/07/16/us-poli...,The assassination attempt against Donald Trump...,Marketplace,7d,https://serpapi.com/searches/66a0cd393d6919f97...,The International Monetary Fund’s released its...
24,"Investors React to Election Upheaval, Reassess...",https://www.nytimes.com/2024/07/22/us/politics...,Along with the political upheaval in the Unite...,The New York Times,2d,https://serpapi.com/searches/66a0cd393d6919f97...,Investors around the world are trying to gauge...
25,The Ohio Steel Town That Shaped J.D. Vance’s L...,https://www.nytimes.com/2024/07/16/us/jd-vance...,"His memoir, “Hillbilly Elegy,” told the down-a...",The New York Times,8d,https://serpapi.com/searches/66a0cd393d6919f97...,"Middletown, Ohio, a small city of tree-lined s..."
30,"Investors React to Election Upheaval, Reassess...",https://www.nytimes.com/2024/07/22/us/politics...,Along with the political upheaval in the Unite...,The New York Times,2d,https://www.bing.com/th?id=OVFT.OvaumUCksUSoZP...,Investors around the world are trying to gauge...
32,The Ohio Steel Town That Shaped J.D. Vance’s L...,https://www.nytimes.com/2024/07/16/us/jd-vance...,"His memoir, “Hillbilly Elegy,” told the down-a...",The New York Times,7d,https://www.bing.com/th?id=OVFT.Lx-wFGEgnOenqS...,"Middletown, Ohio, a small city of tree-lined s..."


In [5]:
def sentiment_analysis(text):
    prompt = (
        'For the following industry sectors, provide the sentiment (good or bad). '
        'If it is not related to the sector, mention the sentiment as (None): '
        '1. Technology industry 2. Business industry 3. Sports industry '
        '4. Healthcare industry 5. Transportation industry 6. Film industry 7. Fashion industry. '
        'If the context is there the response should look like (Good - statement) otherwise it has to just be (None)\n\n'
        f'"{text}"'
    )

    response = openai.chat.completions.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": prompt},
        ],
        max_tokens=150,
        n=1,
        stop=None,
        temperature=0.7
    )

    response_text = response.choices[0].message.content
    
    return response_text

In [6]:
def sector_sentiment_split(response_text):
    
    sentiments = {}
    sectors = [
        "Technology industry", "Business industry", "Sports industry",
        "Healthcare industry", "Transportation industry", "Film industry", "Fashion industry"
    ]

    for sector in sectors:
        if sector in response_text:
            start_index = response_text.find(sector) + len(sector) + 1
            end_index = response_text.find('\n', start_index)
            if end_index == -1:
                end_index = len(response_text)
            sentiment_and_reason = response_text[start_index:end_index].strip()
            if sentiment_and_reason == "None":  # Check if the string is not empty
                sentiments[sector] = None
            else:
                sentiments[sector] = sentiment_and_reason

    for sector, sentiment_and_reason in sentiments.items():
        globals()[sector] = sentiment_and_reason

    return sentiments

In [7]:
# response_text = pd.DataFrame()
for idx, row in df_bing_news_results.iterrows():
    if pd.notna(row.get("content")):
        response_text = sentiment_analysis(row["content"])
        sentiments = sector_sentiment_split(response_text)
        for sector, sentiment in sentiments.items():
            df_bing_news_results.at[idx, sector] = sentiment


df_bing_news_results.fillna(value="None", inplace=True)


In [9]:

df_bing_news_results[[
    "Technology industry", "Business industry", "Sports industry",
    "Healthcare industry", "Transportation industry", "Film industry", "Fashion industry"
]]


Unnamed: 0,Technology industry,Business industry,Sports industry,Healthcare industry,Transportation industry,Film industry,Fashion industry
6,,,,,,,
11,,,,,,,
15,,,,,,,
17,- None,- Good (Business owners are expressing their p...,- None,- None,- None,- None,- None
21,,,,,,,
23,,,,,,,
24,- Bad (Taiwan’s main stock index dropped sharp...,- Mixed (Investors around the world are trying...,,,,,
25,,,,,,,
30,- (Bad - Taiwan’s main stock index dropped sha...,- (None),- (None),- (None),- (None),- (None),- (None)
32,,,,,,,
