In [19]:
import yfinance as yf
from bs4 import BeautifulSoup
import requests

In [2]:
def get_stock_sector(stock_symbol):
    try:
        stock = yf.Ticker(stock_symbol)
        info = stock.info
        sector = info.get('sector', 'Sector information not available')
        industry=info.get('industry','Industry information not available')        
        return sector,industry
    except Exception as e:
        print("Error:", e)
        return None


In [3]:
stock_symbol = input("Enter Stock Symbol: ")
sector,industry = get_stock_sector(stock_symbol)
if sector and industry:
    print("Stock:",stock_symbol)
    print("Sector:", sector)
    print('Industry:',industry)
elif sector:
    print("Stock:",stock_symbol)
    print("Sector:", sector)
    print('Industry:',"Falied to retrive industry information")
elif industry:
    print("Stock:",stock_symbol)
    print("Sector:", "Failed to retrieve sector information.")
    print('Industry:',industry)   
else:
    print("Failed to retrieve sector information.")

Stock: AAPL
Sector: Technology
Industry: Consumer Electronics


In [4]:
from langchain.document_loaders import UnstructuredURLLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [5]:
def loading_data(url):
    loader=UnstructuredURLLoader(urls=[
    url,
    ])
    data=loader.load()
    return data    

In [6]:
def scraping_article(url):
    headers = {
    'User-Agent': 'Your User Agent String',
    }
    r=requests.get(url,headers=headers)
    soup=BeautifulSoup(r.text,'html.parser')
    paragraphs=soup.find_all('p')
    text= [paragraph.text for paragraph in paragraphs]
    words=' '.join(text).split(' ')
    article = ' '.join(words)
    return article

In [8]:
def find_url(keyword):
    
    root = "https://www.google.com/"
    search_query = keyword.replace(" ", "+")
    link = f"https://www.google.com/search?q={search_query}&tbm=nws"
    headers = {'User-Agent': 'Mozilla/5.0'}
    response = requests.get(link, headers=headers)
    webpage = response.content
    soup = BeautifulSoup(webpage, 'html5lib')
    links = []
    for div_tag in soup.find_all('div', class_='Gx5Zad'):
        a_tag = div_tag.find('a')
        if a_tag:
            if 'href' in a_tag.attrs:
                href = a_tag['href']
                if href.startswith('/url?q='):
                    url = href.split('/url?q=')[1].split('&sa=')[0]
                    links.append(url)
    return links
    

In [9]:
def to_chunks(data):
    text_splitter=RecursiveCharacterTextSplitter(
        chunk_size=3000,
        chunk_overlap=50
    )
    docs=text_splitter.split_text(data)
    return docs

In [10]:
import os
from dotenv import load_dotenv

In [11]:
load_dotenv()
api_token=os.getenv('HUGGINGFACEHUB_API_TOKEN')


In [12]:
os.environ['HF_TOKEN']=api_token
os.environ['HUGGINGFACEHUB_API_TOKEN']=api_token

In [13]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from transformers import BartTokenizer, BartForConditionalGeneration


In [14]:
# def load_distilbert_model(model_name="sshleifer/distilbart-cnn-12-6"):
#     tokenizer = AutoTokenizer.from_pretrained(model_name)
#     model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
#     return tokenizer, model

def load_bart_model(model_name="facebook/bart-large-cnn"):
    tokenizer = BartTokenizer.from_pretrained(model_name)
    model = BartForConditionalGeneration.from_pretrained(model_name)
    return tokenizer, model


In [15]:
def summarize_text(tokenizer, model, text, max_chunk_length, summary_max_length):
    inputs = tokenizer(text, return_tensors="pt", max_length=max_chunk_length, truncation=True)
    summary_ids = model.generate(inputs["input_ids"], max_length=summary_max_length, min_length=200, length_penalty=2.0, num_beams=4, early_stopping=True)
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return summary


In [16]:
def summarize_article(url, model_name="facebook/bart-large-cnn"):
    data = scraping_article(url)
    chunks = to_chunks(data)
    tokenizer, model = load_bart_model(model_name)
    summaries = []
    for chunk in chunks:
        chunk_text = chunk
        summary = summarize_text(tokenizer, model, chunk_text,3000,800)
        summaries.append(summary)
    concatenated_summaries = " ".join(summaries)
    #  Second summarization pass: Summarize the concatenated summaries
    intermediate_chunks = [concatenated_summaries[i:i+3000] for i in range(0, len(concatenated_summaries), 3000)]
    final_summaries = []
    for intermediate_chunk in intermediate_chunks:
        final_summary = summarize_text(tokenizer, model, intermediate_chunk,3000,800)
        final_summaries.append(final_summary)
    
    final_summary_text = " ".join(final_summaries)
    
    return final_summary_text

In [17]:
url=find_url('elon musk')[0]
summary = summarize_article(url)
print(summary)


Former Union minister Rajeev Chandrasekhar has pushed back against Elon Musk's comments on electronic voting machines (EVMs) Musk had suggested they should be eliminated due to the potential risk of being hacked by humans or AI, even if the risk is minimal. ChandraseKhar countered Musk’s view, stating it might apply to the US and other regions where standard computing platforms are used to build "Internet-connected voting machines" He asserted that this is not the case in India, where EVMs are custom-designed, secure, and isolated from any network or media. The entrepreneur-turned-politician also offered to give a tutorial to the SpaceX boss demonstrating the robustness of EVMs designed in India. During the recently concluded marathon Lok Sabha election, the Opposition had alleged that there was a possibility of the ruling BJP tampering with EVM to turn poll outcomes in their favour. In response, Chief Election Commissioner RajeeV Kumar assured that EVM are "100% safe"
