In [5]:
# Stock Market Trend Prediction using Sentiment Analysis

In [4]:
!pip install yfinance nltk pandas requests beautifulsoup4

import time
import random
import yfinance as yf
import pandas as pd
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import requests
from bs4 import BeautifulSoup

# Ensure VADER lexicon
try:
    nltk.data.find('sentiment/vader_lexicon')
except LookupError:
    nltk.download('vader_lexicon')

def get_nifty50_stocks():
    return {
        "Reliance Industries": "RELIANCE.NS",
        "HDFC Bank": "HDFCBANK.NS",
        "ICICI Bank": "ICICIBANK.NS",
        "Infosys": "INFY.NS",
        "Tata Consultancy Services": "TCS.NS",
        "Larsen & Toubro": "LT.NS",
        "Hindustan Unilever": "HINDUNILVR.NS",
        "Bharti Airtel": "BHARTIARTL.NS",
        "State Bank of India": "SBIN.NS",
        "ITC": "ITC.NS"
    }

def get_recent_stock_data(ticker_symbol, days=5):
    try:
        stock = yf.Ticker(ticker_symbol)
        data = stock.history(period=f"{days}d")
        return data if not data.empty else None
    except Exception:
        return None

def get_news_sentiment(company_name, max_headlines=10, timeout=8):
    url = f"https://news.google.com/rss/search?q={company_name}+stock&hl=en-IN&gl=IN&ceid=IN:en"
    headers = {'User-Agent': 'Mozilla/5.0'}
    try:
        response = requests.get(url, headers=headers, timeout=timeout)
        response.raise_for_status()
    except requests.exceptions.RequestException:
        return 0.0

    soup = BeautifulSoup(response.content, "xml")
    items = soup.find_all("item")[:max_headlines]
    titles = [item.title.text for item in items if item.title]

    if not titles:
        return 0.0

    sid = SentimentIntensityAnalyzer()
    scores = [sid.polarity_scores(title)['compound'] for title in titles]
    return sum(scores) / len(scores)

def predict_trend(company_name, ticker_symbol):
    avg_sentiment = get_news_sentiment(company_name)
    stock_data = get_recent_stock_data(ticker_symbol)

    price_trend = 0
    if stock_data is not None and len(stock_data) >= 3:
        price_change = stock_data['Close'].iloc[-1] - stock_data['Close'].iloc[-3]
        price_trend = 1 if price_change > 0 else -1

    final_score = (avg_sentiment * 2.0) + (price_trend * 0.5)

    if final_score > 0.5:
        prediction = "UP"
    elif final_score < -0.3:
        prediction = "DOWN"
    else:
        prediction = "NEUTRAL / SIDEWAYS"

    return prediction, final_score

def run_batch(stocks):
    results = []
    for company, ticker in stocks.items():
        pred, score = predict_trend(company, ticker)
        results.append([company, ticker, pred, score])
        time.sleep(random.uniform(0.8, 1.5))

    return pd.DataFrame(results, columns=["Company", "Ticker", "Prediction", "Score"])

# === RUN ONCE (no input, no CLI) ===
stocks = get_nifty50_stocks()
results_df = run_batch(stocks)
results_df




[nltk_data] Downloading package vader_lexicon to /root/nltk_data...


Unnamed: 0,Company,Ticker,Prediction,Score
0,Reliance Industries,RELIANCE.NS,UP,1.06812
1,HDFC Bank,HDFCBANK.NS,NEUTRAL / SIDEWAYS,0.01332
2,ICICI Bank,ICICIBANK.NS,NEUTRAL / SIDEWAYS,0.0296
3,Infosys,INFY.NS,UP,0.95018
4,Tata Consultancy Services,TCS.NS,NEUTRAL / SIDEWAYS,-0.2931
5,Larsen & Toubro,LT.NS,NEUTRAL / SIDEWAYS,0.28594
6,Hindustan Unilever,HINDUNILVR.NS,NEUTRAL / SIDEWAYS,-0.28632
7,Bharti Airtel,BHARTIARTL.NS,NEUTRAL / SIDEWAYS,-0.07618
8,State Bank of India,SBIN.NS,UP,0.96828
9,ITC,ITC.NS,DOWN,-0.37802
