## Web Scaping the Budget

In [5]:
import requests
from bs4 import BeautifulSoup

In [6]:
url = 'https://www.livemint.com/economy/budget-2024-25-key-highlights-live-updates-interim-budget-agriculture-infra-fiscal-deficit-nirmala-sitharaman-11706695416199.html'

In [7]:
response = requests.get(url)
soup = BeautifulSoup(response.content,'html.parser')
text = soup.get_text()

In [8]:
text

'\n\n\n\n          \n\n \n\n\n\n\nBudget 2024 Key Highlights: India to cut aid to the Maldives by 22% | Mint\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n  \n\n\n\n\n\n  \n\n\n\n\n \n\n\n\n\n\n\n\n          Explore    Sign in e-paper Subscribe Thursday, 8 February 2024              Stocks       Mutual Funds       News                     Home Budget-2024 News Markets Premium Money Mutual Fund Industry Companies Technology Web Stories Opinion Videos      All Companies  Technology Markets Money Mutual Funds Insurance Auto  Industry  Personal Finance        Hello User  Sign in      Sign Out     My Account My Account Subscribe  My Watchlist   Newsletters   Notifications      My Reads   For You   View Less -  View More +   Data Insights   Market Dashboard   Bullion      Gold   Silver     Fuel      Petrol   Diesel     Commodities      Gold   GoldM   Aluminum   Menthaoil   Silver   SilverMIC   GoldPetal   Natural Gas   Copper   Zinc   SilverM   CrudeOil   GoldGinuea   Lead  

### Text PreProcessing

In [9]:
import re

In [10]:
text = re.sub(r'[^\w\s.]','',text)
text = re.sub(r'\n',' ',text)
text = re.sub(r'\s+',' ',text)

In [None]:
text

sentences = text.split('.')

sentences

In [17]:
sentences = [sentence.strip() for sentence in sentences if sentence.strip()]

In [18]:
sentences

['Budget 2024 Key Highlights India to cut aid to the Maldives by 22 Mint Explore Sign in epaper Subscribe Thursday 8 February 2024 Stocks Mutual Funds News Home Budget2024 News Markets Premium Money Mutual Fund Industry Companies Technology Web Stories Opinion Videos All Companies Technology Markets Money Mutual Funds Insurance Auto Industry Personal Finance Hello User Sign in Sign Out My Account My Account Subscribe My Watchlist Newsletters Notifications My Reads For You View Less View More Data Insights Market Dashboard Bullion Gold Silver Fuel Petrol Diesel Commodities Gold GoldM Aluminum Menthaoil Silver SilverMIC GoldPetal Natural Gas Copper Zinc SilverM CrudeOil GoldGinuea Lead CryptoCurrencies View Less View More Top Sections News India News World News Economy Companies IPO News Startups Company Results Top Company Leader Money Personal Finance QA Opinion Markets Stock Markets Commodity News Mark To Market IPO News Live Blog Elections 2024 Assembly Elections 2023 Industry Bankin

## using the model(FinBert)

In [19]:
from transformers import BertTokenizer,BertForSequenceClassification

In [20]:
from transformers import pipeline

In [21]:
finbert = BertForSequenceClassification.from_pretrained('yiyanghkust/finbert-tone',num_labels=3)

In [22]:
tokenizer = BertTokenizer.from_pretrained('yiyanghkust/finbert-tone')

In [23]:
def createSentiment(sen):
    nlp = pipeline("sentiment-analysis", model=finbert, tokenizer=tokenizer)
    results = nlp(sen)
    return results

In [28]:
#truncating the sentences as in FinBert the maximum input sentences tokens is 512 tokens

In [25]:
truncatedSentences = [sentence[:512] for sentence in sentences]
sentimentResult = createSentiment(truncatedSentences)


In [31]:
#counting no of positive and negative news
pos = 0
neg = 0

In [32]:
for ind, result in enumerate(sentimentResult):
    print(f"Sentence {idx + 1}: {sentences[ind]}")
    print("Sentiment:", result)
    if result['label'] == 'Positive':
        pos += 1
    elif result['label'] == 'Negative':
        neg += 1
    print()

Sentence 277: Budget 2024 Key Highlights India to cut aid to the Maldives by 22 Mint Explore Sign in epaper Subscribe Thursday 8 February 2024 Stocks Mutual Funds News Home Budget2024 News Markets Premium Money Mutual Fund Industry Companies Technology Web Stories Opinion Videos All Companies Technology Markets Money Mutual Funds Insurance Auto Industry Personal Finance Hello User Sign in Sign Out My Account My Account Subscribe My Watchlist Newsletters Notifications My Reads For You View Less View More Data Insights Market Dashboard Bullion Gold Silver Fuel Petrol Diesel Commodities Gold GoldM Aluminum Menthaoil Silver SilverMIC GoldPetal Natural Gas Copper Zinc SilverM CrudeOil GoldGinuea Lead CryptoCurrencies View Less View More Top Sections News India News World News Economy Companies IPO News Startups Company Results Top Company Leader Money Personal Finance QA Opinion Markets Stock Markets Commodity News Mark To Market IPO News Live Blog Elections 2024 Assembly Elections 2023 Ind

### No of Positive and Negative News

In [33]:
print("No. of Positive News is: ",pos)
print("No. of Negative News is: ",neg)

No. of Positive News is:  83
No. of Negative News is:  13


## Text Summarization using BART

In [27]:
#summarization
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

model.safetensors: 100%|███████████████████| 1.63G/1.63G [03:09<00:00, 8.58MB/s]
generation_config.json: 100%|██████████████████| 363/363 [00:00<00:00, 3.00MB/s]


In [30]:
print(summarizer(text[:1024], max_length=130, min_length=30, do_sample=False))

[{'summary_text': 'Budget 2024 Key Highlights India to cut aid to the Maldives by 22 Mint Explore Sign in epaper Subscribe Thursday 8 February 2024 Stocks Mutual Funds News Home Budget2024 News Markets Premium Money Mutual Fund Industry Companies Technology Web Stories Opinion Videos All Companies Technology Markets Money Mutual Funds Insurance Auto Industry Personal Finance Hello User Sign in Sign Out My Account My Account Subscribe My Watchlist Newsletters Notifications My Reads For You View Less View More Data Insights Market Dashboard Bullion Gold Silver Fuel Petrol Diesel Commodities Gold GoldM Aluminum Menthaoil Silver SilverMIC GoldPetal Natural Gas Copper Zinc SilverM CrudeOil'}]
