# Install/Import dependencies

In [None]:
!pip install google.generativeai

In [1]:
import yfinance as yf
import requests
from bs4 import BeautifulSoup
import pathlib
import textwrap
import requests
import os
import json
import pandas as pd
import numpy as np
import time

import google.generativeai as genai

from IPython.display import display
from IPython.display import Markdown
# from google.colab import userdata

import nltk
from nltk.classify import NaiveBayesClassifier
from nltk.corpus import subjectivity
from nltk.sentiment import SentimentAnalyzer
from nltk.sentiment.util import *
from nltk import tokenize
from nltk.sentiment.vader import SentimentIntensityAnalyzer

Load the Gemini Model and set up RapidAPI to pull data/news articles

In [2]:
#getting creating gemini model
def to_markdown(text):
  text = text.replace('•', '  *')
  return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))

GOOGLE_API_KEY='___'
X_RapidAPI_Key='___'

genai.configure(api_key=GOOGLE_API_KEY)
model = genai.GenerativeModel('gemini-pro')

In [None]:
# RapidAPI NewsNow
url = "https://newsnow.p.rapidapi.com/newsv2"

payload = {
	"query": "Nvidia",
	"time_bounded": True,
	"from_date": "01/02/2024",
	"to_date": "03/06/2024",
	"location": "us",
	"language": "en",
	"page": 1
}
headers = {
	"content-type": "application/json",
	"X-RapidAPI-Key": X_RapidAPI_Key,
	"X-RapidAPI-Host": "newsnow.p.rapidapi.com"
}

responses = []
for i in range(1, 4):
	temp_payload = payload
	temp_payload['page'] = i
	responses.append(requests.post(url, json=temp_payload, headers=headers))

response = requests.post(url, json=payload, headers=headers)

# print(response.json())

View the format of the data collected

In [5]:
data = response.json()
news_list = None

if 'news' in data:
    news_list = data['news']
    for news_item in news_list:
        title = news_item['title']
        top_image = news_item.get('top_image', 'No top image found')
        images = news_item.get('images', [])
        about = news_item.get('short_description')
        url = news_item.get('url')
        text = news_item.get('text')

        print("Title:", title)
        print("Top Image:", top_image)
        print("Additional Images:", images)
        print("About: ", about)
        print("URL: ", url)
        # print("TXT: ", text)
        print("-------------")
else:
    print("No news found in the response.")

Title: Nvidia announces AI-powered health care 'agents' that outperform nurses — and cost $9 an hour
Top Image: https://a57.foxnews.com/static.foxbusiness.com/foxbusiness.com/content/uploads/2021/05/0/0/Nurse-iStock.jpg?ve=1&tl=1
Additional Images: ['https://a57.foxnews.com/static.foxbusiness.com/foxbusiness.com/content/uploads/2021/05/0/0/Nurse-iStock.jpg?ve=1&tl=1', 'https://static.foxnews.com/foxnews.com/content/uploads/2023/09/532x120-NEWSLETTER_BIZ-Rundown.png', 'https://a57.foxnews.com/static.foxbusiness.com/foxbusiness.com/content/uploads/2024/03/931/523/Nvidia-CEO-Jensen-Huang-Blackwell-GTC-2024.jpg?ve=1&tl=1', 'https://a57.foxnews.com/static.foxbusiness.com/foxbusiness.com/content/uploads/2021/05/931/523/Nurse-iStock.jpg?ve=1&tl=1', 'https://smetrics.foxbusiness.com/b/ss/%OMNITURE_ACCOUNT_ID%/1/H22.1--NS/1518822074763?pageName=technology:subsection:article&g=https://www.foxbusiness.com/technology/nvidia-announces-ai-powered-health-care-agents-outperform-nurses-cost-9-hour', 'h

## Analysis Method <br>
1: (outer level) feed paragraphs <br>
2: (inner level) feed sentences within a paragaph -> return polarity/compound of each sentence (make a list) <br>
3: average the polarity of sentences within the paragaph -> returns sentiment of the paragaph  <br>
4: average the polarity of paragraphs within the documnet -> returns sentiment of the document <br>
5: make threshold for selling and buying by analyzing compound/polarity ex: if 0.8 -> buy if -0.6 sell -0.3<->0.3 retain <br>


In [7]:
nltk.download('punkt')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\sanka\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping tokenizers\punkt.zip.


True

Store the data in an organized manner <br>
+ ```document structure -> document = {[url]: {'paragraphs': [], 'sentences': [[], [], ...]}}``` <br>
+ where ```url``` (key) is the url of the article, ```paragraphs``` (list) contains a list of all the paragraphs in the article, and ```sentences``` (list) contains a list for sentences of each paragraph. <br>
+ the index of each paragraph is same as the index of its list of sentences

In [8]:
documents = {}

for news_item in news_list:
    title = news_item['title']
    txt = news_item['text']
    url = news_item.get('url')

    # since url unique and only once, intialize directly
    documents[url] = {'paragraphs': txt.split("\n\n"), 'sentences': []}

    for paragraph in documents[url]['paragraphs']:
        documents[url]['sentences'].append(tokenize.sent_tokenize(paragraph))

View the stored structure

In [9]:
# for every article in document, viewing format (5 paragraphs, 5 sentences per paragraph for view simplicity)
for url in documents:
    print(url)
    for i, paragraph in enumerate(documents[url]['paragraphs'][:5]):
        print("\t", "Paragraph ->", paragraph)
        print("\t\t", "Sentences ->")
        for sentence in documents[url]['sentences'][i]:
            print("\t\t", sentence.strip())

    print("-"*80)


https://www.foxbusiness.com/technology/nvidia-announces-ai-powered-health-care-agents-outperform-nurses-cost-9-hour
	 Paragraph -> High-powered chipmaker Nvidia has teamed up with artificial intelligence health care company Hippocratic AI to develop generative AI "agents" that not only outperform human nurses on video calls but cost a lot less per hour.
		 Sentences ->
		 High-powered chipmaker Nvidia has teamed up with artificial intelligence health care company Hippocratic AI to develop generative AI "agents" that not only outperform human nurses on video calls but cost a lot less per hour.
	 Paragraph -> The two companies on Thursday announced their collaboration to build "empathetic health care agents" powered by Nvidia and trained on Hippocratic's health care-focused large language model (LLM) that are better able to form a human connection with patients through "super-low latency conversational reactions."
		 Sentences ->
		 The two companies on Thursday announced their collabora

In [11]:
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\sanka\AppData\Roaming\nltk_data...


True

# Perform additional Preprocessing for refined sentiment scoring

In [None]:
# Preprocess

# Calculate Polarity/Compound scores

In [12]:
# apply sentiment through nltk
# Initialize Vader sentiment analyzer
sid = SentimentIntensityAnalyzer()

for url in documents:
    document_sentiments = []

    for i, paragraph in enumerate(documents[url]['paragraphs']):
        paragraph_sentiments = [] # stores sentiment of every sentence in the paragraph

        for sentence in documents[url]['sentences'][i]:
            # calculate sentiment polarity of the sentence
            sentence_sentiment_score = sid.polarity_scores(sentence.strip())
            compound_score = sentence_sentiment_score['compound']
            paragraph_sentiments.append(compound_score)

        # calculate avg sentiment polarity for the paragraph
        if paragraph_sentiments:
            paragraph_sentiment = sum(paragraph_sentiments) / len(paragraph_sentiments)
            document_sentiments.append(paragraph_sentiment)

    if document_sentiments:
        document_sentiment = sum(document_sentiments) / len(document_sentiments)
        print(f"Document URL: {url}")
        print(f"Sentiment: {document_sentiment}")

Document URL: https://www.foxbusiness.com/technology/nvidia-announces-ai-powered-health-care-agents-outperform-nurses-cost-9-hour
Sentiment: 0.310884375
Document URL: https://www.aboutamazon.com/news/aws/amazon-aws-nvidia-collaboration
Sentiment: 0.3004503030303031
Document URL: https://nvidianews.nvidia.com/news/nvidia-accelerates-quantum-computing-exploration-at-australias-pawsey-supercomputing-centre
Sentiment: 0.42408333333333337
Document URL: https://nvidianews.nvidia.com/news/nvidia-announces-upcoming-events-for-financial-community-6900012
Sentiment: 0.08356363636363637
Document URL: https://www.crowdstrike.com/press-releases/crowdstrike-nvidia-generative-ai-collaboration/
Sentiment: 0.07590433333333334
Document URL: https://coe.gatech.edu/news/2024/04/georgia-tech-unveils-new-ai-makerspace-collaboration-nvidia
Sentiment: 0.30451666666666666
Document URL: https://azure.microsoft.com/en-us/blog/microsoft-and-nvidia-partnership-continues-to-deliver-on-the-promise-of-ai/
Sentiment: 