In [37]:
!pip install requests vaderSentiment python-dotenv sqlalchemy finnhub-python psycopg2-binary




In [38]:
import os
import requests
from datetime import datetime, timezone
from sqlalchemy import create_engine, text
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from dotenv import load_dotenv
import finnhub

# Load environment variables
load_dotenv()

DATABASE_URL = os.getenv("DATABASE_URL")
FINNHUB_API_KEY = os.getenv("FINNHUB_API_KEY")
GNEWS_API_KEY = os.getenv("GNEWS_API_KEY")

assert DATABASE_URL and FINNHUB_API_KEY and GNEWS_API_KEY, "Missing environment variables!"

# Setup
engine = create_engine(DATABASE_URL)
finnhub_client = finnhub.Client(api_key=FINNHUB_API_KEY)
vader = SentimentIntensityAnalyzer()

def analyze_sentiment(text):
    scores = vader.polarity_scores(text)
    score = scores['compound']
    label = 'POS' if score > 0.05 else 'NEG' if score < -0.05 else 'NEU'
    confidence = abs(score)  # Proxy confidence
    return score, label, 'VADER', confidence

def insert_sentiment(data_dict):
    with engine.begin() as conn:
        query = text("""
            INSERT INTO sentiment_data (
                ticker, source, api_vendor, date, headline, sentiment_score,
                sentiment_label, article_url, author, platform_tags,
                model_used, confidence, created_at
            )
            VALUES (
                :ticker, :source, :api_vendor, :date, :headline, :sentiment_score,
                :sentiment_label, :article_url, :author, :platform_tags,
                :model_used, :confidence, :created_at
            )
            ON CONFLICT (ticker, date, headline)
            DO UPDATE SET
                sentiment_score = EXCLUDED.sentiment_score,
                sentiment_label = EXCLUDED.sentiment_label,
                article_url = EXCLUDED.article_url,
                confidence = EXCLUDED.confidence,
                created_at = EXCLUDED.created_at;
        """)
        conn.execute(query, data_dict)

def fetch_finnhub_sentiment(ticker):
    try:
        news = finnhub_client.company_news(ticker, _from='2024-06-01', to=datetime.today().strftime('%Y-%m-%d'))
        for item in news:
            if not item.get('headline'):
                continue
            score, label, model, conf = analyze_sentiment(item['headline'])
            data = {
                'ticker': ticker,
                'source': item.get('source', 'Finnhub'),
                'api_vendor': 'Finnhub',
                'date': datetime.fromtimestamp(item['datetime']).date(),
                'headline': item['headline'],
                'sentiment_score': score,
                'sentiment_label': label,
                'article_url': item.get('url'),
                'author': item.get('source'),
                'platform_tags': 'Finnhub',
                'model_used': model,
                'confidence': conf,
                'created_at': datetime.now(timezone.utc).isoformat()
            }
            insert_sentiment(data)
        print(f"✅ Finnhub: Inserted sentiment records for {ticker}")
    except Exception as e:
        print(f"❌ Error fetching Finnhub data for {ticker}: {e}")

def fetch_gnews_sentiment(ticker):
    url = f"https://gnews.io/api/v4/search?q={ticker}&token={GNEWS_API_KEY}&lang=en&max=10"
    response = requests.get(url)

    if response.status_code != 200:
        raise Exception(f"GNews API failed: {response.status_code} - {response.text}")

    articles = response.json().get("articles", [])
    for article in articles:
        score, label, model, conf = analyze_sentiment(article["title"])
        data = {
            "ticker": ticker,
            "source": article.get("source", {}).get("name", "GNews"),
            "api_vendor": "GNews API",
            "date": datetime.strptime(article["publishedAt"][:10], "%Y-%m-%d").date(),
            "headline": article["title"],
            "sentiment_score": score,
            "sentiment_label": label,
            "article_url": article["url"],
            "author": article.get("author") or "Unknown",
            "platform_tags": "GNews",
            "model_used": model,
            "confidence": conf,
            "created_at": datetime.now(timezone.utc).isoformat()
        }
        insert_sentiment(data)
    print(f"✅ GNews: Inserted sentiment records for {ticker}")

# Run for selected tickers
tickers = ['NVDA']
for ticker in tickers:
    fetch_finnhub_sentiment(ticker)
    fetch_gnews_sentiment(ticker)


✅ Finnhub: Inserted sentiment records for NVDA
✅ GNews: Inserted sentiment records for NVDA
