In [1]:
import yfinance as yf
import requests
from bs4 import BeautifulSoup

In [17]:
def get_stock_sector(stock_symbol):
    try:
        stock = yf.Ticker(stock_symbol)
        info = stock.info
        sector = info.get('sector', 'Sector information not available')
        industry=info.get('industry','Industry information not available')        
        return sector,industry
    except Exception as e:
        print("Error:", e)
        return None


In [19]:
stock_symbol = input("Enter Stock Symbol: ")
sector,industry = get_stock_sector(stock_symbol)
if sector and industry:
    print("Stock:",stock_symbol)
    print("Sector:", sector)
    print('Industry:',industry)
elif sector:
    print("Stock:",stock_symbol)
    print("Sector:", sector)
    print('Industry:',"Falied to retrive industry information")
elif industry:
    print("Stock:",stock_symbol)
    print("Sector:", "Failed to retrieve sector information.")
    print('Industry:',industry)   
else:
    print("Failed to retrieve sector information.")

Stock: TSLA
Sector: Consumer Cyclical
Industry: Auto Manufacturers


In [4]:
from langchain.document_loaders import UnstructuredURLLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [5]:
# def loading_data(url):
#     loader=UnstructuredURLLoader(urls=[
#     url,
#     ])
#     data=loader.load()
#     return data    

In [6]:
def scraping_article(url):
    headers = {
    'User-Agent': 'Your User Agent String',
    }
    r=requests.get(url,headers=headers)
    soup=BeautifulSoup(r.text,'html.parser')
    paragraphs=soup.find_all('p')
    text= [paragraph.text for paragraph in paragraphs]
    words=' '.join(text).split(' ')
    article = ' '.join(words)
    return article

In [7]:
def to_chunks(data):
    text_splitter=RecursiveCharacterTextSplitter(
        chunk_size=3000,
        chunk_overlap=50
    )
    docs=text_splitter.split_text(data)
    return docs

In [8]:
import os
from dotenv import load_dotenv

In [9]:
load_dotenv()
api_token=os.getenv('HUGGINGFACEHUB_API_TOKEN')


In [10]:
os.environ['HF_TOKEN']=api_token
os.environ['HUGGINGFACEHUB_API_TOKEN']=api_token

In [11]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from transformers import BartTokenizer, BartForConditionalGeneration


In [12]:
# def load_distilbert_model(model_name="sshleifer/distilbart-cnn-12-6"):
#     tokenizer = AutoTokenizer.from_pretrained(model_name)
#     model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
#     return tokenizer, model

def load_distilbert_model(model_name="facebook/bart-large-cnn"):
    tokenizer = BartTokenizer.from_pretrained(model_name)
    model = BartForConditionalGeneration.from_pretrained(model_name)
    return tokenizer, model


In [13]:
def summarize_text(tokenizer, model, text, max_chunk_length, summary_max_length):
    inputs = tokenizer(text, return_tensors="pt", max_length=max_chunk_length, truncation=True)
    summary_ids = model.generate(inputs["input_ids"], max_length=summary_max_length, min_length=200, length_penalty=2.0, num_beams=4, early_stopping=True)
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return summary


In [14]:
def summarize_article(url, model_name="facebook/bart-large-cnn"):
    data = scraping_article(url)
    chunks = to_chunks(data)
    tokenizer, model = load_distilbert_model(model_name)
    summaries = []
    for chunk in chunks:
        chunk_text = chunk
        summary = summarize_text(tokenizer, model, chunk_text,3000,800)
        summaries.append(summary)
    concatenated_summaries = " ".join(summaries)
    #  Second summarization pass: Summarize the concatenated summaries
    intermediate_chunks = [concatenated_summaries[i:i+3000] for i in range(0, len(concatenated_summaries), 3000)]
    # intermediate_chunks=to_chunks(concatenated_summaries)
    final_summaries = []
    for intermediate_chunk in intermediate_chunks:
        final_summary = summarize_text(tokenizer, model, intermediate_chunk,3000,800)
        final_summaries.append(final_summary)
    
    final_summary_text = " ".join(final_summaries)
    
    return final_summary_text

In [15]:
url = "https://www.ndtv.com/india-news/pm-modi-to-release-rs-20-000-crore-to-farmers-during-varanasi-visit-5898394"
summary = summarize_article(url)
print(summary)

Prime Minister Narendra Modi will release over Rs 20,000 crore as the 17th instalment of the PM-KISAN scheme at an event in Varanasi on June 18. PM Modi will also grant certificates to more than 30,000 self-help groups designated as Krishi Sakhis. The scheme, launched in February 2019, transfers online an amount of Rs 6,000 annually in three equal instalments of Rs 2,000 directly into the bank accounts of farmers. The total amount transferred to the beneficiaries since the inception of the scheme will cross well over Rs 3.24 lakh crore, Minister of Agriculture and Farmers Welfare Shivraj Singh Chouhan said on Saturday. Around 2.5 crore farmers will join the event across the country through video. Several Union Ministers will also be visiting 50 Krishi Vigyan Kendras to interact with farmers and raise awareness about the various schemes of the Department. The event will also take place at the Baba Kashi Vishwanath Temple.


In [21]:
# a=scraping_article(url)
# print(a)