In [2]:
from transformers import BartTokenizer, BartForConditionalGeneration

tokenizer = BartTokenizer.from_pretrained('facebook/bart-base')
model = BartForConditionalGeneration.from_pretrained('facebook/bart-base')

In [3]:
import requests
from bs4 import BeautifulSoup

In [4]:
class LLM:
    def __init__(self, tokenizer, model):
        self.tokenizer = tokenizer
        self.model = model
    
    def summarize(self, text):
        # Tokenize the input text and generate summary
        inputs = self.tokenizer.encode("summarize: " + text, return_tensors="pt", max_length=1024, truncation=True)
        summary_ids = self.model.generate(inputs, max_length=150, min_length=40, length_penalty=2.0, num_beams=4, early_stopping=True)
        summary = self.tokenizer.decode(summary_ids[0], skip_special_tokens=True)
        return summary

# Initialize the LLM instance with the tokenizer and model
llm = LLM(tokenizer, model)

In [5]:
from textblob import TextBlob

def sentiment_analysis(text):
    blob = TextBlob(text)
    sentiment_polarity = blob.sentiment.polarity
    return sentiment_polarity

def subjectivity_analysis(text):
    blob = TextBlob(text)
    subjectivity = blob.sentiment.subjectivity
    return subjectivity

def relevance_scoring(summary, stock_name, domain_keywords):
    relevance_score = sum(1 for word in domain_keywords if word in summary) + (stock_name in summary)
    return relevance_score

def urgency_scoring(summary):
    # Look for time-sensitive words or phrases in the summary
    urgency_keywords = ['breaking', 'urgent', 'immediate']
    urgency_score = sum(1 for word in urgency_keywords if word in summary)
    return urgency_score

def mention_count(summary, stock_name, domain_keywords):
    # Count how many times the stock name and domain keywords appear in the summary
    count = sum(summary.count(word) for word in domain_keywords) + summary.count(stock_name)
    return count

def biasness_analysis(summary):
    # Placeholder function for biasness analysis
    # A real implementation would require a more complex approach or a specialized model
    pass

In [6]:
from langchain_community.document_loaders import WebBaseLoader

#Initialize the WebLoader
web_loader = WebBaseLoader()

# Function to summarize news articles and extract features (as previously defined)
def summarize_and_extract_features(news_url, stock_name, domain_keywords):
    # Initialize the WebLoader with the news URL
    loader = WebBaseLoader(news_url)
    
    # Load the news article using Langchain's WebLoader
    documents = loader.load()  # This returns a list of Document objects
    
    # Extract 'page_content' from each Document object
    article_text_list = [doc.page_content for doc in documents]
    
    # Join the list of strings into a single string
    article_text = ' '.join(article_text_list)
    
    
    # Summarize the article using the LLM (assuming 'llm' is your preloaded language model)
    summary = llm.summarize(article_text)
    
    # Extract features from the summary (implement these functions based on your LLM's capabilities)
    sentiment_score = sentiment_analysis(summary)
    subjectivity_score = subjectivity_analysis(summary)
    relevance_score = relevance_scoring(summary, stock_name, domain_keywords)
    urgency_score = urgency_scoring(summary)
    volume_of_mentions = mention_count(summary, stock_name, domain_keywords)
    biasness_score = biasness_analysis(summary)

    # Return the extracted features
    return {
        'sentiment_score': sentiment_score,
        'subjectivity_score': subjectivity_score,
        'relevance_score': relevance_score,
        'urgency_score': urgency_score,
        'volume_of_mentions': volume_of_mentions,
        'biasness_score': biasness_score,
        'summary':summary
    }


# List of article URLs
article_urls = [
    "https://economictimes.indiatimes.com/markets/stocks/news/market-trading-guide-hdfc-bank-nykaa-among-8-stock-recommendations-for-tuesday/stock-ideas/slideshow/111057719.cms?from=mdr",
    "https://economictimes.indiatimes.com/markets/stocks/news/building-up-a-stake-in-hdfc-bank-could-bring-a-windfall/articleshow/111071118.cms?from=mdr",
    "https://www.livemint.com/market/stock-market-news/dividend-stocks-bank-of-india-hdfc-amc-l-t-finance-tata-investment-corporation-to-trade-ex-dividend-on-june-18-11718620002998.html",
    "https://economictimes.indiatimes.com/markets/stocks/stock-liveblog/hdfc-life-insurance-company-share-price-live-updates-18-jun-2024/liveblog/111072122.cms"
    # Add more URLs here
]

# Feature extraction using Langchain's WebLoader
for url in article_urls:
    features = summarize_and_extract_features(url,"HDFC","Finance")
    print(features)


USER_AGENT environment variable not set, consider setting it to identify your requests.


{'sentiment_score': 0.2, 'subjectivity_score': 0.2, 'relevance_score': 8, 'urgency_score': 0, 'volume_of_mentions': 70, 'biasness_score': None, 'summary': 'summarize: Market Trading Guide: HDFC Bank, Nykaa among 8 stock recommendations for Tuesday - stock ideas | The Economic TimesBenchmarks Nifty23,501.10-65.91Precious Metal Gold (MCX) (Rs/10g.)71,614.00-972.0English EditionEnglish Editionहिन्दीગુજરાતીमराठीবাংলাಕ್ನಡമലയാ�'}
{'sentiment_score': 0.2, 'subjectivity_score': 0.2, 'relevance_score': 8, 'urgency_score': 0, 'volume_of_mentions': 60, 'biasness_score': None, 'summary': 'summarize: Building up a stake in HDFC Bank could bring a windfall - The Economic Times  Benchmarks Nifty23,472.80-94.21Precious Metal Gold (MCX) (Rs/10g.)72,751.00165.0English EditionEnglish Editionहिन्दीગુજરાતીमराठीবাংলাಕ್ನಡമലയാളംத�'}
{'sentiment_score': 0.0, 'subjectivity_score': 0.0, 'relevance_score': 5, 'urgency_score': 0, 'volume_of_mentions': 6, 'biasness_score': None, 'summary': 'summarize:  \ue61a \ue65