In [1]:
# Sentiment analysis
import torch
from transformers import BertTokenizer, BertForSequenceClassification
import torch.nn.functional as F
import pandas as pd
from datasets import Dataset

# Check for CUDA
device = 0 if torch.cuda.is_available() else -1
print("✅ Using CUDA" if device == 0 else "⚠️ CUDA not available, using CPU")

# Load data
df = pd.read_csv("stock_data.csv")
df = df.dropna(subset=["summary", "headline"], how="all")
df["summary"] = df["summary"].fillna(df["headline"])

# Load FinBERT model and tokenizer
finbert = BertForSequenceClassification.from_pretrained("yiyanghkust/finbert-tone", num_labels=3)
tokenizer = BertTokenizer.from_pretrained("yiyanghkust/finbert-tone")
finbert.eval()

if torch.cuda.is_available():
    finbert.cuda()

# Text prompts
prompts = [
    f"Financial Sentiment Analysis for {ticker}:\nNews: \"{text}\"\nDetermine if this news is financially positive, neutral, or negative."
    for text, ticker in zip(df["summary"], df["Ticker"])
]

# Tokenize in batches
batch_size = 32
all_probs = []

for i in range(0, len(prompts), batch_size):
    batch_prompts = prompts[i:i+batch_size]
    inputs = tokenizer(batch_prompts, padding=True, truncation=True, return_tensors="pt")

    if device == 0:
        inputs = {k: v.cuda() for k, v in inputs.items()}

    with torch.no_grad():
        outputs = finbert(**inputs)
        logits = outputs.logits
        probs = F.softmax(logits, dim=-1).cpu().numpy()

    all_probs.extend(probs)

# Add sentiment probability columns
df["FinBERT_neutral"] = [float(p[0]) for p in all_probs]
df["FinBERT_positive"] = [float(p[1]) for p in all_probs]
df["FinBERT_negative"] = [float(p[2]) for p in all_probs]

# Save the result with probability columns only
df.to_csv("stock_data.csv", index=False)

print("✅ Sentiment analysis with probabilities saved to 'sentiment_data_news_2yr.csv'.")


✅ Using CUDA


Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


✅ Sentiment analysis with probabilities saved to 'sentiment_data_news_2yr.csv'.


In [3]:
import torch
import torch.nn as nn
import pandas as pd

# === 1. Define the GEN AI (Transformer) Model ===
class StockGenModel(nn.Module):
    def __init__(self, feature_dim=8):
        super().__init__()
        self.embedding = nn.Linear(feature_dim, 64)  # Embed each timestep
        self.transformer = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(d_model=64, nhead=4),
            num_layers=2
        )
        self.fc = nn.Linear(64, 1)  # Predict 1 value (next day's close)

    def forward(self, x):
        x = self.embedding(x)  # (batch, seq_len, feature_dim) -> (batch, seq_len, 64)
        x = self.transformer(x)
        x = x.mean(dim=1)  # Global average pooling
        x = self.fc(x)
        return x.squeeze(-1)

# === 2. Load CSV ===
csv_path = "sentiment_analysis_aapl.csv"  # <<-- your CSV file path
feature_columns = ["Close_t-1", "Close_t-2", "Close_t-3", "Close_t-4", 
                   "Close_t-5", "Close_t-6", "Close_t-7", "Sentiment"]

df = pd.read_csv(csv_path)

# === 3. Map Sentiment Strings to Numbers ===
sentiment_mapping = {
    'positive': 2,
    'neutral': 1,
    'negative': 0
}
df['Sentiment'] = df['Sentiment'].map(sentiment_mapping)

# === 4. Preprocess Features ===
features = df[feature_columns].values
features = torch.tensor(features, dtype=torch.float32)  # (seq_len, feature_dim)
features = features.unsqueeze(0)  # add batch dimension => (1, seq_len, feature_dim)

# === 5. Create Model and Predict ===
model = StockGenModel(feature_dim=features.shape[-1])
model.eval()  # inference mode

with torch.no_grad():
    prediction = model(features)
    prediction_value = prediction.item()

# === 6. Print Prediction with Date ===
last_date = pd.to_datetime(df['Date'].values[-1])
predicted_date = last_date + pd.Timedelta(days=1)

print(f"Predicted Closing Price for {predicted_date.date()}: {prediction_value:.2f}")


Predicted Closing Price for 2025-02-01: 0.82




In [None]:
from huggingface_hub import login
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
import torch

device = "cuda" if torch.cuda.is_available() else "cpu"

# Smaller model: Flan-T5 Small
base_model = "google/flan-t5-base"

tokenizer = AutoTokenizer.from_pretrained(base_model)
model = AutoModelForSeq2SeqLM.from_pretrained(base_model).to(device)

model.eval()

# Prepare the input
previous_prices = df[["Close_t-1", "Close_t-2", "Close_t-3",
                      "Close_t-4", "Close_t-5", "Close_t-6", "Close_t-7"]].iloc[-1].tolist()

trend_input = f"""
You are a financial expert. Based on stock prices and a predicted value, classify the trend as Uptrend, Downtrend or Sideways.

Previous 7 closing prices: {previous_prices}
Predicted next price: {prediction_value:.2f}

Classify the stock trend strictly as one of these: Downtrend, Uptrend, Sideways.

Answer in exactly ONE WORD ONLY.
"""

# Tokenize and generate
inputs = tokenizer(trend_input, return_tensors="pt").to(device)

with torch.no_grad():  # prevent gradient tracking, saves memory
    outputs = model.generate(
        **inputs,
        max_new_tokens=3,
        temperature=0.1,
        do_sample=False,             # deterministic output
        use_cache=True               # faster generation
    )

trend = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(f"Predicted Trend: {trend}")




Predicted Trend: Uptrend


In [None]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch
import re

# Load Flan-T5 Base
model_name = "google/flan-t5-base"

device = "cuda" if torch.cuda.is_available() else "cpu"

tokenizer = AutoTokenizer.from_pretrained(model_name)
flan_model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(device)

flan_model.eval()

# Prepare past prices
past_prices = df[["Close_t-1", "Close_t-2", "Close_t-3",
                  "Close_t-4", "Close_t-5", "Close_t-6", "Close_t-7"]].iloc[-1].tolist()
sentiment_label = df["Sentiment"].iloc[-1]

# Optional: Convert numeric sentiment back to string
inv_sentiment_map = {v: k for k, v in sentiment_mapping.items()}
sentiment_str = inv_sentiment_map.get(sentiment_label, "neutral")

# Prepare input prompt (more explicit and detailed)
confidence_input = (
    f"As a financial expert, you are tasked with predicting the confidence level of a stock trend continuing. "
    f"Here are the details: \n"
    f"Trend: {trend}\n"
    f"Sentiment: {sentiment_str}\n"
    f"Past 7-Day Closing Prices: {past_prices}\n"
    f"Please output a confidence score as a percentage (0-100%). "
    f"Respond only with a number followed by '%', e.g., '75%'"
)

# Tokenize and generate
inputs = tokenizer(confidence_input, return_tensors="pt").to(device)

with torch.no_grad():
    outputs = flan_model.generate(
        **inputs,
        max_new_tokens=30,  # Allow more space for model to generate
        temperature=0.5,  # Experiment with slightly higher temperature for diversity
        top_p=0.9,  # Try top_p sampling for more structured output
        do_sample=True,
        use_cache=True
    )

generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(f"Generated Text: {generated_text}")

# Extract confidence value (check if the match is better)
confidence_match = re.search(r"(\d{1,3}(\.\d+)?)", generated_text)  # Handle decimal percentages

if confidence_match:
    confidence_value = float(confidence_match.group(1))  # Convert string to float
    confidence_percentage = confidence_value * 100  # Convert to percentage
    confidence = f"{confidence_percentage:.2f}"  # Format as a percentage with two decimals
else:
    confidence = "0"  # Default to 0% if no match is found

print(f"Confidence Score: {confidence}%")


In [33]:

import numpy as np

# === Extract last 7 days of closing prices ===
past_prices = df[["Close_t-1", "Close_t-2", "Close_t-3",
                  "Close_t-4", "Close_t-5", "Close_t-6", "Close_t-7"]].iloc[-1].values

# === Calculate volatility: standard deviation as a simple proxy ===
volatility_value = np.std(past_prices)

print(f"Volatility Estimate (std dev): {volatility_value:.4f}")


Volatility Estimate (std dev): 6.9671


In [None]:
from transformers import T5Tokenizer, T5ForConditionalGeneration
import torch

# Load the T5 model and tokenizer
model_name = "google/flan-t5-base"
tokenizer = T5Tokenizer.from_pretrained(model_name)
recommendation_model = T5ForConditionalGeneration.from_pretrained(model_name)

# Check if CUDA is available and set the device accordingly
device = "cuda" if torch.cuda.is_available() else "cpu"

# Move the model to the selected device (GPU if available)
recommendation_model = recommendation_model.to(device)

# Map sentiment integer to readable string
inv_sentiment_map = {v: k for k, v in sentiment_mapping.items()}
sentiment_str = inv_sentiment_map.get(df["Sentiment"].iloc[-1], "neutral")

# Construct a more optimized and direct prompt for the model
recommendation_input = (
    f"Given the stock trend '{trend}', confidence '{confidence}%', "
    f"and sentiment '{sentiment_str}', provide a stock trading recommendation. "
    f"Only respond with one of the following: 'buy', 'sell', or 'hold'. "
    f"Make your recommendation based on the trend, sentiment, and confidence provided."
)

# Tokenize input and move input tensors to the selected device (GPU if available)
input_ids = tokenizer(recommendation_input, return_tensors="pt").input_ids.to(device)

# Generate recommendation
output_ids = recommendation_model.generate(input_ids, max_new_tokens=20)
recommendation = tokenizer.decode(output_ids[0], skip_special_tokens=True)

print(f"📈 Stock Recommendation: {recommendation}")


📈 Stock Recommendation: sell


In [40]:
# Final Output as JSON ===
import json

result = {
    "stock": df.iloc[-1]["Ticker"] if "Ticker" in df.columns else "Invalid Ticker",
    "predicted_price": f"${prediction_value:.2f}",
    "trend": trend,
    "confidence": f"{confidence}%",
    "recommendation": recommendation,
    "volatility_estimate": f"{volatility_value:.4f}"
}

json_output = json.dumps(result, indent=4)
print(json_output)



{
    "stock": "AAPL",
    "predicted_price": "$0.82",
    "trend": "uptrend",
    "confidence": "5.00%",
    "recommendation": "sell",
    "volatility_estimate": "6.9671"
}
