In [1]:
from bs4 import BeautifulSoup
import requests
from selenium import webdriver
import time

In [2]:
url = 'https://markets.businessinsider.com/news/nvda-stock?p=1'

page = requests.get(url)

In [3]:
soup = BeautifulSoup(page.text, 'lxml')

In [4]:
articles = soup.find_all('div', class_ ='latest-news__story')

In [5]:
articles

[<div class="latest-news__story">
 <div><div class="latest-news__meta"><span class="latest-news__source">Seeking Alpha</span> <time class="latest-news__date" datetime="2/15/2025 5:09:53 PM">18h</time></div></div>
 <div><a class="news-link" href="https://seekingalpha.com/news/4408946-trending-stocks-in-a-mixed-week-for-wall-street?utm_source=businessinsider&amp;utm_medium=referral&amp;feed_item_type=news" rel="sponsored" target="_blank">Trending stocks in a mixed week for Wall Street</a></div>
 <div class="latest-news__clear"></div>
 </div>,
 <div class="latest-news__story">
 <div><div class="latest-news__meta"><span class="latest-news__source">Seeking Alpha</span> <time class="latest-news__date" datetime="2/15/2025 2:35:46 PM">20h</time></div></div>
 <div><a class="news-link" href="https://seekingalpha.com/news/4408554-notable-analyst-calls-this-week-snap-peloton-and-skyworks-stocks-among-top-picks?utm_source=businessinsider&amp;utm_medium=referral&amp;feed_item_type=news" rel="sponsor

In [6]:
for row in articles:
    print(row.text)
    


Seeking Alpha 18h
Trending stocks in a mixed week for Wall Street



Seeking Alpha 20h
Notable analyst calls this week: Snap, Peloton and Skyworks stocks among top picks



Seeking Alpha 2d
Tudor Investment's Q4 moves include buying Intel stake, trimming Nvidia



TipRanks 2d
Nvidia (NVDA) Reduces Stake in Chipmaker Arm Holdings



TipRanks 2d
Dell Is Close to Securing a Massive Contract with Elon Musk’s xAI



Seeking Alpha 2d
Dell rises after company nears $5B AI server deal with Musk's xAI - report



TipRanks 2d
AI Daily: Dell said to be near $5B server deal with xAI



TipRanks 2d
Dell near $5B server deal with Musk’s xAI, Bloomberg reports



TipRanks 2d
Airbnb, Moderna report Q4 results: Morning Buzz



TipRanks 2d
WeRide jumps 135% to $40.41 after Nvidia disclosed stake at end of 2024



TipRanks 2d
Carnage for SOUN and SERV as WRD Jumps 135% on Nvidia Holdings Update



TipRanks 2d
Notable open interest changes for February 14th



TipRanks 2d
SoundHound, Serve Robotics, Nano

In [7]:
import pandas as pd


data = []
page_number = 1

while True:
    try:
        url = f'https://markets.businessinsider.com/news/nvda-stock?p={page_number}'

        page = requests.get(url)    

        soup = BeautifulSoup(page.text, 'lxml')
        articles = soup.find_all('div', class_ ='latest-news__story')

        if not articles:
            # Break the loop if no articles are found on the current page
            break

        for row in articles:
            datetime = row.find('time', class_= 'latest-news__date').get('datetime')
            title = row.find('a', class_= 'news-link').text
            source = row.find('span', class_= 'latest-news__source').text
            link = row.find('a',class_= 'news-link').get('href')
            data.append([datetime,title,source,link])
        
        page_number +=1
    
    except Exception as e:
        print(e)
        break



df = pd.DataFrame(data, columns = [ 'DateTime', 'Title', 'Source', 'Link'])

df.to_csv('source.csv', index=False)


In [8]:
df

Unnamed: 0,DateTime,Title,Source,Link
0,2/15/2025 5:09:53 PM,Trending stocks in a mixed week for Wall Street,Seeking Alpha,https://seekingalpha.com/news/4408946-trending...
1,2/15/2025 2:35:46 PM,"Notable analyst calls this week: Snap, Peloton...",Seeking Alpha,https://seekingalpha.com/news/4408554-notable-...
2,2/14/2025 9:59:14 PM,Tudor Investment's Q4 moves include buying Int...,Seeking Alpha,https://seekingalpha.com/news/4408850-tudor-in...
3,2/14/2025 9:25:46 PM,Nvidia (NVDA) Reduces Stake in Chipmaker Arm H...,TipRanks,/news/stocks/nvidia-nvda-reduces-stake-in-chip...
4,2/14/2025 8:21:39 PM,Dell Is Close to Securing a Massive Contract w...,TipRanks,/news/stocks/dell-is-close-to-securing-a-massi...
...,...,...,...,...
10396,9/28/2016 3:10:26 PM,BRIEF-Nvidia and TomTom develop mapping system...,Reuters,/news/stocks/brief-nvidia-and-tomtom-develop-m...
10397,1/5/2016 6:03:35 AM,Nvidia says Volvo is first customer for new au...,Reuters,http://feeds.reuters.com/~r/reuters/businessNe...
10398,6/1/2015 5:31:48 AM,Nvidia says it sees revenue from cloud computi...,Reuters,http://feeds.reuters.com/~r/reuters/businessNe...
10399,4/23/2014 11:26:52 PM,Ex-Nvidia manager settles U.S. SEC charges on ...,Reuters,http://feeds.reuters.com/~r/reuters/businessNe...


In [9]:
from transformers import pipeline

pipe = pipeline("text-classification", model="ProsusAI/finbert", framework="pt")

  from .autonotebook import tqdm as notebook_tqdm





Device set to use cpu


In [10]:
def sentiment_analysis(text):
    data = pipe(text)
    return data


In [11]:
text = df['Title']

In [12]:
sentiment = []

for i in text:
        sentiment.append(sentiment_analysis(str(i)))

In [13]:
labels = [res[0]['label'] for res in sentiment]
scores = [res[0]['score'] for res in sentiment]

In [14]:
df['Label'] = labels
df['Score'] = scores

In [15]:
def sentiment_score(label):
    if label == 'positive':
        return 1
    if label == 'neutral':
        return 0
    if label == 'negative':
        return -1


In [16]:
df['Label'] = df['Label'].apply(sentiment_score)

In [17]:
df.to_csv('sentiment_score.csv', index = False)

In [18]:
# import pandas as pd 

# df = pd.read_csv('sentiment_score.csv')

In [19]:
# df.drop('Unnamed: 0', axis= 1, inplace=True)

In [20]:
df['Date'] = pd.to_datetime(df['DateTime'])
df['Date'] = df['Date'].dt.date

  df['Date'] = pd.to_datetime(df['DateTime'])


In [21]:
df.drop('DateTime', axis = 1, inplace=True)

In [22]:
df = df.set_index(df['Date'])

In [23]:
df.drop('Date', axis = 1, inplace=True)

In [24]:
def count_positive(value):

    if value == 1:
        return 1
    else:
        return 0

def count_neg(value):

    if value == -1:
        return 1
    else:
        return 0

def count_neu(value):

    if value == 0:
        return 1
    else:
        return 0


In [25]:
df['Positive'] = df['Label'].apply(count_positive)
df['Negative'] = df['Label'].apply(count_neg)
df['Neutral'] = df['Label'].apply(count_neu)

In [26]:
df_score = df.groupby(df.index).agg({
    'Score': 'mean',
    'Positive': 'sum',
    'Negative': 'sum',
    'Neutral': 'sum'
})

In [27]:
df_score['Total_Sentiment'] = df_score.apply(lambda row: row['Positive'] - row['Negative']/(row['Positive'] + row['Negative'] + row['Neutral']), axis=1)

In [28]:
df_score

Unnamed: 0_level_0,Score,Positive,Negative,Neutral,Total_Sentiment
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2014-01-06,0.891770,0,0,1,0.000000
2014-04-23,0.523616,0,1,0,-1.000000
2015-06-01,0.661305,0,0,1,0.000000
2016-01-05,0.580586,1,0,0,1.000000
2016-09-28,0.868531,0,0,1,0.000000
...,...,...,...,...,...
2025-02-11,0.840833,4,2,16,3.909091
2025-02-12,0.788741,5,4,8,4.764706
2025-02-13,0.772654,10,3,9,9.863636
2025-02-14,0.801732,6,1,7,5.928571


In [29]:
import mysql.connector as connector

In [30]:
try:
    conn = connector.connect(user='root', password='123456', host='localhost', port='3306', database='my_db')
except connector.Error as er:
    print('Error code:', er.errorno)
    print('Error msg:', er.msg)

mycursor = conn.cursor()

In [31]:

mycursor.execute('SELECT * FROM stock_price')
price =  mycursor.fetchall()

In [32]:
df_price = pd.DataFrame(price, columns=['Date', 'Open',	'High',	'Low',	'Close',	'Volume',	'Dividends',	'Stock Splits'])

In [33]:
df_price.set_index('Date', drop=True)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1999-01-22,0.040122,0.044779,0.035585,0.037615,2.714690e+09,0.0,0.0
1999-01-25,0.040600,0.042033,0.037615,0.041556,5.104800e+08,0.0,0.0
1999-01-26,0.042033,0.042869,0.037734,0.038331,3.432000e+08,0.0,0.0
1999-01-27,0.038451,0.039406,0.036301,0.038212,2.443680e+08,0.0,0.0
1999-01-28,0.038212,0.038451,0.037854,0.038092,2.275200e+08,0.0,0.0
...,...,...,...,...,...,...,...
2025-02-10,130.090000,135.000000,129.960000,133.570000,2.169890e+08,0.0,0.0
2025-02-11,132.580000,134.480000,131.020000,132.800000,1.789020e+08,0.0,0.0
2025-02-12,130.020000,132.240000,129.080000,131.140000,1.602790e+08,0.0,0.0
2025-02-13,131.560000,136.500000,131.170000,135.290000,1.974300e+08,0.0,0.0


In [34]:
df_merge = df_price.merge(df_score, on = 'Date')

In [35]:
df_merge.set_index('Date', drop=True, inplace=True)

In [36]:
df_merge

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits,Score,Positive,Negative,Neutral,Total_Sentiment
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2014-01-06,0.373259,0.377267,0.369722,0.374438,409492000.0,0.0,0.0,0.891770,0,0,1,0.000000
2014-04-23,0.447659,0.453344,0.446949,0.452159,261580000.0,0.0,0.0,0.523616,0,1,0,-1.000000
2015-06-01,0.539048,0.547035,0.535659,0.541710,372660000.0,0.0,0.0,0.661305,0,0,1,0.000000
2016-01-05,0.804687,0.815911,0.792976,0.802491,490272000.0,0.0,0.0,0.580586,1,0,0,1.000000
2016-09-28,1.639140,1.643820,1.621680,1.642590,319788000.0,0.0,0.0,0.868531,0,0,1,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...
2025-02-10,130.090000,135.000000,129.960000,133.570000,216989000.0,0.0,0.0,0.793545,6,4,15,5.840000
2025-02-11,132.580000,134.480000,131.020000,132.800000,178902000.0,0.0,0.0,0.840833,4,2,16,3.909091
2025-02-12,130.020000,132.240000,129.080000,131.140000,160279000.0,0.0,0.0,0.788741,5,4,8,4.764706
2025-02-13,131.560000,136.500000,131.170000,135.290000,197430000.0,0.0,0.0,0.772654,10,3,9,9.863636


In [None]:
mycursor.execute("CREATE TABLE price_sentiment (Date DATE, Open FLOAT(24), High FLOAT(24), Low FLOAT(24), Close FLOAT(24), Volume FLOAT(24), Dividends FLOAT(24), Stock_Splits FLOAT(24), Score FLOAT(24), Positive	INT, Negative INT,	Neutral INT, 	Total_Sentiment FLOAT(24))")

In [38]:
sql = "INSERT INTO  price_sentiment (Date, Open, High, Low, Close, Volume, Dividends, Stock_Splits, Score,	Positive,	Negative,	Neutral,	Total_Sentiment) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"

In [39]:
tuples_list = list(df_merge.itertuples(index=True, name=None))


In [40]:
mycursor.executemany(sql, tuples_list)

In [41]:
conn.commit()

mycursor.close()
conn.close()