In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import yfinance as yf
from datetime import datetime, timedelta
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from scipy.special import softmax
from newsapi import NewsApiClient


In [None]:
FINBERT_MODEL="ProsusAI/finbert"
tokenizer=AutoTokenizer.from_pretrained(FINBERT_MODEL)
finbert=AutoModelForSequenceClassification.from_pretrained(FINBERT_MODEL)
finbert.eval()


BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

In [None]:
def get_finbert_sentiment(text):
    inputs=tokenizer(
        text,
        return_tensors="pt",
        truncation=True,
        padding=True,
        max_length=512
    )

    with torch.no_grad():
        outputs=finbert(**inputs)

    probs=softmax(outputs.logits.numpy()[0])
    sentiment_score=probs[2]-probs[0]  
    return float(sentiment_score)


In [None]:
NEWS_API_KEY = "d1801a0f067c43d9a6ca322e42617703"
newsapi = NewsApiClient(api_key=NEWS_API_KEY)

def get_daily_sentiment(stock_name):
    articles=newsapi.get_everything(
        q=stock_name,
        language="en",
        sort_by="relevancy",
        from_param=(datetime.now() - timedelta(days=1)).strftime("%Y-%m-%d")
    )["articles"]

    if len(articles) == 0:
        return 0.0

    docs=[]
    for a in articles:
        text = f"{a['title']} {a['description']} {a['content']}"
        docs.append(text)

    long_doc=" ".join(docs)
    return get_finbert_sentiment(long_doc)


In [None]:
def compute_features(df):
    df["Return"] = np.log(df["Close"] / df["Close"].shift(1))
    df["Volatility"] = df["Return"].rolling(10).std()

    delta=df["Close"].diff()
    gain=delta.clip(lower=0)
    loss=-delta.clip(upper=0)

    avg_gain=gain.rolling(14).mean()
    avg_loss=loss.rolling(14).mean()
    rs=avg_gain/avg_loss
    df["RSI"]=100 - (100/(1+rs))

    ema12 = df["Close"].ewm(span=12).mean()
    ema26 = df["Close"].ewm(span=26).mean()
    df["MACD"]=ema12 - ema26

    df=df.dropna()
    return df


In [None]:
class SentimentLSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim=64, num_layers=2):
        super().__init__()
        self.lstm = nn.LSTM(
            input_dim,
            hidden_dim,
            num_layers,
            batch_first=True,
            dropout=0.2
        )
        self.fc=nn.Linear(hidden_dim, 1)

    def forward(self, x):
        h0=torch.zeros(2, x.size(0), 64)
        c0=torch.zeros(2, x.size(0), 64)
        out, _=self.lstm(x, (h0, c0))
        return self.fc(out[:, -1, :])


In [None]:
model=SentimentLSTM(input_dim=7)
model.load_state_dict(torch.load("lstm.pt"))
model.eval()


SentimentLSTM(
  (lstm): LSTM(7, 64, num_layers=2, batch_first=True, dropout=0.2)
  (fc): Linear(in_features=64, out_features=1, bias=True)
)

In [18]:
def build_sequence(features, seq_len=60):
    return torch.tensor(features[-seq_len:], dtype=torch.float32).unsqueeze(0)


In [19]:
def get_trade_signal(lstm_prediction, sentiment_score, threshold=0.001):
    if lstm_prediction > threshold and sentiment_score > 0.2:
        return "BUY"
    elif lstm_prediction < -threshold and sentiment_score < -0.2:
        return "SELL"
    else:
        return "HOLD"


In [None]:
STOCKS={
    "AAPL": "Apple",
    "MSFT": "Microsoft",
    "GOOGL": "Google",
    "NVDA": "Nvidia",
    "TSLA": "Tesla"
}

report=[]

for ticker, name in STOCKS.items():
    print(f"Processing {ticker}...")

    df = yf.download(ticker, period="1y")
    df = compute_features(df)

    sentiment = get_daily_sentiment(name)
    df["Sentiment"] = sentiment

    base_features = df[[
    "Return",
    "Volatility",
    "RSI",
    "MACD",
    "Volume",
    "Sentiment"
]].values

# Dummy feature added since week 3 model was trained with 7 features
dummy_feature=np.zeros((base_features.shape[0], 1))

features=np.hstack([base_features, dummy_feature])


seq=build_sequence(features)
with torch.no_grad():
    pred = model(seq).item()

signal=get_trade_signal(pred, sentiment)

report.append([ticker, pred, sentiment, signal])


Processing AAPL...


[*********************100%***********************]  1 of 1 completed


Processing MSFT...


[*********************100%***********************]  1 of 1 completed


Processing GOOGL...


[*********************100%***********************]  1 of 1 completed


Processing NVDA...


[*********************100%***********************]  1 of 1 completed


Processing TSLA...


[*********************100%***********************]  1 of 1 completed


In [24]:
report_df=pd.DataFrame(
    report,
    columns=["Stock", "Predicted Return", "Sentiment", "Signal"]
)

report_df


Unnamed: 0,Stock,Predicted Return,Sentiment,Signal
0,TSLA,0.338183,0.624879,BUY
