In [1]:
import os
import requests
import yfinance as yf
from bs4 import BeautifulSoup
from dotenv import load_dotenv
from scipy.special import softmax
from transformers import AutoTokenizer
from transformers import AutoModelForSequenceClassification
from langchain.text_splitter import RecursiveCharacterTextSplitter
from transformers import BartTokenizer, BartForConditionalGeneration

In [2]:
load_dotenv()
api_token=os.getenv('HUGGINGFACEHUB_API_TOKEN')
os.environ['HF_TOKEN']=api_token
os.environ['HUGGINGFACEHUB_API_TOKEN']=api_token

In [3]:
def get_stock_sector(stock_symbol):
    try:
        stock = yf.Ticker(stock_symbol)
        info = stock.info
        sector = info.get('sector', 'Sector information not available')
        industry=info.get('industry','Industry information not available')        
        return sector,industry
    except Exception as e:
        print("Error:", e)
        return None

In [4]:
stock_symbol = input("Enter Stock Symbol: ")
sector,industry = get_stock_sector(stock_symbol)
if sector and industry:
    print("Stock:",stock_symbol)
    print("Sector:", sector)
    print('Industry:',industry)
elif sector:
    print("Stock:",stock_symbol)
    print("Sector:", sector)
    print('Industry:',"Falied to retrive industry information")
elif industry:
    print("Stock:",stock_symbol)
    print("Sector:", "Failed to retrieve sector information.")
    print('Industry:',industry)   
else:
    print("Failed to retrieve sector information.")

Stock: AAPL
Sector: Technology
Industry: Consumer Electronics


In [5]:
def scraping_article(url):
    headers = {
    'User-Agent': 'Your User Agent String',
    }
    r=requests.get(url,headers=headers)
    soup=BeautifulSoup(r.text,'html.parser')
    paragraphs=soup.find_all('p')
    text= [paragraph.text for paragraph in paragraphs]
    words=' '.join(text).split(' ')
    article = ' '.join(words)
    return article

In [6]:
def find_url(keyword):
    
    root = "https://www.google.com/"
    search_query = keyword.replace(" ", "+")
    link = f"https://www.google.com/search?q={search_query}&tbm=nws"
    headers = {'User-Agent': 'Mozilla/5.0'}
    response = requests.get(link, headers=headers)
    webpage = response.content
    soup = BeautifulSoup(webpage, 'html5lib')
    links = []
    for div_tag in soup.find_all('div', class_='Gx5Zad'):
        a_tag = div_tag.find('a')
        if a_tag:
            if 'href' in a_tag.attrs:
                href = a_tag['href']
                if href.startswith('/url?q='):
                    url = href.split('/url?q=')[1].split('&sa=')[0]
                    links.append(url)
    return links    

In [7]:
def to_chunks(data):
    text_splitter=RecursiveCharacterTextSplitter(
        chunk_size=3000,
        chunk_overlap=50
    )
    docs=text_splitter.split_text(data)
    return docs

In [8]:
def load_bart_model(model_name="facebook/bart-large-cnn"):
    tokenizer = BartTokenizer.from_pretrained(model_name)
    model = BartForConditionalGeneration.from_pretrained(model_name)
    return tokenizer, model

In [9]:
def summarize_text(tokenizer, model, text, max_chunk_length, summary_max_length):
    inputs = tokenizer(text, return_tensors="pt", max_length=max_chunk_length, truncation=True)
    summary_ids = model.generate(inputs["input_ids"], max_length=summary_max_length, min_length=200, length_penalty=2.0, num_beams=4, early_stopping=True)
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return summary

In [10]:
def summarize_article(url, model_name="facebook/bart-large-cnn"):
    data = scraping_article(url)
    chunks = to_chunks(data)
    tokenizer, model = load_bart_model(model_name)
    summaries = []
    for chunk in chunks:
        chunk_text = chunk
        summary = summarize_text(tokenizer, model, chunk_text,3000,800)
        summaries.append(summary)
    concatenated_summaries = " ".join(summaries)
    #  Second summarization pass: Summarize the concatenated summaries
    intermediate_chunks = [concatenated_summaries[i:i+3000] for i in range(0, len(concatenated_summaries), 3000)]
    final_summaries = []
    for intermediate_chunk in intermediate_chunks:
        final_summary = summarize_text(tokenizer, model, intermediate_chunk,3000,800)
        final_summaries.append(final_summary)    
    final_summary_text = " ".join(final_summaries)    
    return final_summary_text

In [11]:
url=find_url('elon musk')[1]
# url1="https://www.businesstoday.in/markets/company-stock/story/hdfc-bank-shares-negative-returns-in-2024-buy-sell-or-hold-price-targets-433552-2024-06-17"
summary = summarize_article(url)
print(summary)

Dubai Police in November 2019 hinted that it could add Tesla Cybertruck to its impressive fleet of cars. The police already included Bugattis, Aston Martins, and Porsches. Elon Musk, co-founder and CEO of Tesla, soon dropped a comment on the post. “This is awesome and should come as no surprise. Dubai makes wonderful decisions,’ wrote X user Cindys, while another called it “Smart and wise choice” “Awesome. Need them in Chicago,” said a third, while a fourth said it was “Fantastic seeing the powerful Dubai Police add Cybert truck! Elon Musk’s electric truck is going across the globe and joined the most innovative police force! Bravo!” joined a fifth individual. ” Combining cutting-edge technology with sustainability sets a great example for modern policing,“ posted X user Sheikh Aaqib Satar. ‘This is a great decision.


In [12]:
def senti_model(model_name="cardiffnlp/twitter-roberta-base-sentiment"):
    tokenizer=AutoTokenizer.from_pretrained(model_name)
    model=AutoModelForSequenceClassification.from_pretrained(model_name)
    return tokenizer,model

In [13]:
def find_senti(news_texts):
    tokenizer,model=senti_model()
    encoded=tokenizer(news_texts,return_tensors='pt')
    output=model(**encoded)
    scores=output[0][0].detach().numpy()
    scores=softmax(scores)
    weights = {
        'neg': -1,
        'neu': 0,
        'pos': 1
    }
    probabilities = {
        'neg': scores[0],
        'neu': scores[1],
        'pos': scores[2]
    }
    compound_score = sum(probabilities[label] * weights[label] for label in probabilities)
    senti_dict={
        'neg':scores[0],
        'neu': scores[1],
        'pos': scores[2],
        'polarity':compound_score        
    }
    return senti_dict


In [14]:
print(find_senti(summary))



{'neg': 0.0023002203, 'neu': 0.03798616, 'pos': 0.9597136, 'polarity': 0.9574133579153568}


In [15]:
print(url)

https://www.hindustantimes.com/trending/elon-musk-s-unmissable-reaction-to-dubai-police-s-addition-of-tesla-cybertruck-to-patrol-fleet-seen-it-yet-101718599827980.html
