In [1]:
import yfinance as yf

In [2]:
def get_stock_sector(stock_symbol):
    try:
        stock = yf.Ticker(stock_symbol)
        info = stock.info
        sector = info.get('sector', 'Sector information not available')
        industry=info.get('industry','Industry information not available')        
        return sector,industry
    except Exception as e:
        print("Error:", e)
        return None


In [3]:
stock_symbol = input("Enter Stock Symbol: ")
sector,industry = get_stock_sector(stock_symbol)
if sector and industry:
    print("Stock:",stock_symbol)
    print("Sector:", sector)
    print('Industry:',industry)
elif sector:
    print("Stock:",stock_symbol)
    print("Sector:", sector)
    print('Industry:',"Falied to retrive industry information")
elif industry:
    print("Stock:",stock_symbol)
    print("Sector:", "Failed to retrieve sector information.")
    print('Industry:',industry)   
else:
    print("Failed to retrieve sector information.")

Stock: AAPL
Sector: Technology
Industry: Consumer Electronics


In [4]:
from langchain.document_loaders import UnstructuredURLLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [5]:
def loading_data(url):
    loader=UnstructuredURLLoader(urls=[
    url,
    ])
    data=loader.load()
    return data    

In [6]:
def to_chunks(data):
    text_splitter=RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=200
    )
    docs=text_splitter.split_documents(data)
    return docs

In [7]:
import os
from dotenv import load_dotenv

In [8]:
load_dotenv()
api_token=os.getenv('HUGGINGFACEHUB_API_TOKEN')


In [9]:
os.environ['HF_TOKEN']=api_token
os.environ['HUGGINGFACEHUB_API_TOKEN']=api_token

In [10]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM


In [11]:
def load_distilbert_model(model_name="sshleifer/distilbart-cnn-12-6"):
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
    return tokenizer, model


In [12]:
def summarize_text(tokenizer, model, text, max_chunk_length=1024, summary_max_length=150):
    inputs = tokenizer(text, return_tensors="pt", max_length=max_chunk_length, truncation=True)
    summary_ids = model.generate(inputs["input_ids"], max_length=summary_max_length, min_length=50, length_penalty=2.0, num_beams=4, early_stopping=True)
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return summary


In [13]:
def summarize_article(url, model_name="sshleifer/distilbart-cnn-12-6"):
    data = loading_data(url)
    chunks = to_chunks(data)
    tokenizer, model = load_distilbert_model(model_name)
    summaries = []
    for chunk in chunks:
        chunk_text = chunk.page_content
        summary = summarize_text(tokenizer, model, chunk_text)
        summaries.append(summary)
    final_summary = " ".join(summaries)
    return final_summary

In [15]:
url = "https://finance.yahoo.com/news/traders-fully-price-november-fed-123939499.html"
summary = summarize_article(url)
print(summary)

 US Treasuries trimmed gains after the Federal Reserve reduced the number of projected interest-rate cuts this year to just one. The central bank is at odds with the market, leaving the central bank at odds. Bloomberg's most recent article was published by David Gergen.  Fed officials left their main policy-rate steady at a two-decade-high of 5.25% to 5.5% on Wednesday. They forecast only a single quarter-point cut by the end of the year. Still, Treasury yields rebounded slightly from their lowest levels of the session after the Fed communications.  Treasury yields remained lower by five to nine basis points in late trading after declining by at least 12 basis points. The median projection for the longer-run interest rate climbed to 2.8% from 2.6% in March. The idea that the so-called neutral rate is probably higher than pre-pandemic has been gaining currency.  Consumer prices were flat in May versus April, slowing the annual rate of inflation to 3.3% from 3.4%. The median estimates of