In [4]:
!pip install transformers torch pandas requests
!pip install safetensors




In [8]:
import torch
import requests
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from safetensors.torch import load_file  # Import for loading .safetensors files
from google.colab import files  # Remove or modify if not using Colab
import os

###############################################################################
#                              MODEL CONFIGURATIONS                           #
###############################################################################
MODEL_CONFIGS = {
    "prajjwal1-bert-tiny": {
        "base_id": "prajjwal1/bert-tiny",
        "finetuned_path": "/content/prajjwal1_bert_tiny_model.safetensors",  # Update as per your path
        "finetuned_type": "safetensors",
        "untuned_label_map": {0: "Negative", 1: "Neutral", 2: "Positive"},  # Assumed Mapping
        "finetuned_label_map": {0: "Neutral", 1: "Bullish", 2: "Bearish"}
    },
    "cardiffnlp-twitter-roberta-base": {
        "base_id": "cardiffnlp/twitter-roberta-base-sentiment-latest",
        "finetuned_path": "/content/roberta_model.pt",  # Update as per your path
        "finetuned_type": "pt",
        "untuned_label_map": {0: "Negative", 1: "Neutral", 2: "Positive"},
        "finetuned_label_map": {0: "Neutral", 1: "Bullish", 2: "Bearish"}
    },
    "Jedida-distilbert": {
        "base_id": "Jedida/tweet_sentiments_analysis_distilbert",
        "finetuned_path": "/content/Jedida_tweet_sentiments_analysis_distilbert_model.safetensors",  # Update as per your path
        "finetuned_type": "safetensors",
        "untuned_label_map": {0: "Negative", 1: "Neutral", 2: "Positive"},
        "finetuned_label_map": {0: "Neutral", 1: "Bullish", 2: "Bearish"}
    }
}

###############################################################################
#                           LOADING MODELS & TOKENIZERS                       #
###############################################################################
def load_model_and_tokenizer(
    base_id: str,
    finetuned_path: str = None,
    finetuned_type: str = None,
    use_finetuned: bool = False,
    device: str = "cpu",
    num_labels: int = 3
):
    """
    Loads either the untuned version of a model or its fine-tuned version,
    depending on `use_finetuned`. Handles .pt vs. .safetensors differences.

    **Important:** Always uses the original tokenizer from `base_id`.

    :param base_id: Model hub ID for the base pretrained model.
    :param finetuned_path: Local path to the fine-tuned weights.
    :param finetuned_type: "pt" or "safetensors" (needed for the loading method).
    :param use_finetuned: If True, load the fine-tuned weights. Otherwise, untuned.
    :param device: "cpu" or "cuda".
    :param num_labels: Number of sentiment classes (defaults to 3).
    :return: (tokenizer, model)
    """
    # Always load the tokenizer from the base model
    tokenizer = AutoTokenizer.from_pretrained(base_id)

    if not use_finetuned or not finetuned_path:
        # Load untuned model directly from Hugging Face
        print(f"Loading untuned model from {base_id}")
        model = AutoModelForSequenceClassification.from_pretrained(
            base_id, num_labels=num_labels
        )
    else:
        # Load fine-tuned weights
        print(f"Loading fine-tuned model from {finetuned_path}")
        model = AutoModelForSequenceClassification.from_pretrained(
            base_id, num_labels=num_labels
        )
        if finetuned_type == "pt":
            # Load the saved state_dict using torch.load
            state_dict = torch.load(finetuned_path, map_location=torch.device(device))
            model.load_state_dict(state_dict)
        elif finetuned_type == "safetensors":
            # Load the saved state_dict using safetensors
            state_dict = load_file(finetuned_path)
            model.load_state_dict(state_dict)
        else:
            raise ValueError("finetuned_type must be either 'pt' or 'safetensors'")

    model.to(device)
    return tokenizer, model

###############################################################################
#                             PREDICTION FUNCTION                             #
###############################################################################
def predict_sentiment(text, tokenizer, model, label_map, device="cpu"):
    """
    Predict the sentiment for a single text using the given tokenizer and model.
    Returns the predicted sentiment label as a string based on the provided label_map.

    :param text: The input text for sentiment analysis.
    :param tokenizer: Tokenizer associated with the model.
    :param model: The sequence classification model.
    :param label_map: Dictionary mapping label IDs to sentiment strings.
    :param device: Device to perform computation on ("cpu" or "cuda").
    :return: Predicted sentiment label as a string.
    """
    inputs = tokenizer(
        text,
        return_tensors="pt",
        truncation=True,
        padding=True,
        max_length=256
    ).to(device)

    with torch.no_grad():
        outputs = model(**inputs)
    logits = outputs.logits
    pred_label_id = torch.argmax(logits, dim=1).item()

    return label_map.get(pred_label_id, "unknown")

###############################################################################
#                        FETCHING STOCKTWITS MESSAGES                         #
###############################################################################
def fetch_stocktwits(symbol, count=10):
    """
    Fetch recent messages from StockTwits API for a specific stock symbol.

    :param symbol: Stock symbol, e.g. "AAPL"
    :param count: Number of messages to fetch
    :return: List of dictionaries (id, text, created_at)
    """
    url = f"https://api.stocktwits.com/api/2/streams/symbol/{symbol}.json"
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"
    }

    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()  # Raise an error for bad status codes
        data = response.json()
        messages = data.get("messages", [])
        return [
            {
                "id": msg["id"],
                "text": msg["body"],
                "created_at": msg["created_at"]
            }
            for msg in messages[:count]
        ]
    except requests.exceptions.HTTPError as http_err:
        print(f"HTTP error occurred: {http_err}")  # e.g., 404 Not Found
    except Exception as err:
        print(f"An error occurred: {err}")
    return []

###############################################################################
#                                    MAIN                                     #
###############################################################################
def main():
    # Parameters
    symbol = "AAPL"  # Replace with your desired stock symbol
    count = 10       # Number of messages to fetch

    # GPU / CPU device setting
    device = "cuda" if torch.cuda.is_available() else "cpu"
    print(f"Using device: {device}")

    # 1. Fetch messages
    messages = fetch_stocktwits(symbol, count=count)

    if not messages:
        print("No messages were retrieved.")
        return

    # 2. Prepare dataframe structure
    #    We'll add columns for each model's fine-tuned and untuned predictions
    columns = ["id", "text", "created_at"]
    df = pd.DataFrame(messages, columns=columns)

    # 3. For each model in MODEL_CONFIGS, get predictions for:
    #    (a) The untuned version
    #    (b) The fine-tuned version
    #    Then store these columns in the DataFrame.
    for key, cfg in MODEL_CONFIGS.items():
        base_id = cfg["base_id"]
        finetuned_path = cfg["finetuned_path"]
        finetuned_type = cfg["finetuned_type"]
        untuned_label_map = cfg["untuned_label_map"]
        finetuned_label_map = cfg["finetuned_label_map"]

        # -- (a) Load Untuned Model --
        print(f"\nLoading untuned model: {base_id}")
        untuned_tokenizer, untuned_model = load_model_and_tokenizer(
            base_id=base_id,
            use_finetuned=False,
            device=device
        )
        # Predict
        print(f"Predicting sentiments with untuned model: {key}")
        untuned_preds = [
            predict_sentiment(
                msg["text"], untuned_tokenizer, untuned_model, untuned_label_map, device=device
            )
            for msg in messages
        ]
        df[f"untuned_{key}"] = untuned_preds

        # -- (b) Load Fine-Tuned Model --
        print(f"Loading fine-tuned model: {finetuned_path}")
        ft_tokenizer, ft_model = load_model_and_tokenizer(
            base_id=base_id,
            finetuned_path=finetuned_path,
            finetuned_type=finetuned_type,
            use_finetuned=True,
            device=device
        )
        # Predict
        print(f"Predicting sentiments with fine-tuned model: {key}")
        finetuned_preds = [
            predict_sentiment(
                msg["text"], ft_tokenizer, ft_model, finetuned_label_map, device=device
            )
            for msg in messages
        ]
        df[f"finetuned_{key}"] = finetuned_preds

    # 4. Save results to CSV
    csv_filename = f"stocktwits_{symbol}_{count}_messages.csv"
    df.to_csv(csv_filename, index=False, encoding='utf-8-sig')  # utf-8-sig for better compatibility
    print(f"\nPredictions saved to {csv_filename}")

    # 5. (Optional) Preview CSV content
    print("\nFile content preview:")
    print(df.head())  # Display first few rows instead of the entire file

    # 6. (Optional) Download the CSV file to your local machine (Colab)
    try:
        files.download(csv_filename)
    except:
        print("Download functionality is not available in this environment.")

###############################################################################
#                                RUN SCRIPT                                   #
###############################################################################
if __name__ == "__main__":
    main()


Using device: cuda

Loading untuned model: prajjwal1/bert-tiny
Loading untuned model from prajjwal1/bert-tiny


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Predicting sentiments with untuned model: prajjwal1-bert-tiny
Loading fine-tuned model: /content/prajjwal1_bert_tiny_model.safetensors
Loading fine-tuned model from /content/prajjwal1_bert_tiny_model.safetensors


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-tiny and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Predicting sentiments with fine-tuned model: prajjwal1-bert-tiny

Loading untuned model: cardiffnlp/twitter-roberta-base-sentiment-latest
Loading untuned model from cardiffnlp/twitter-roberta-base-sentiment-latest


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Predicting sentiments with untuned model: cardiffnlp-twitter-roberta-base
Loading fine-tuned model: /content/roberta_model.pt
Loading fine-tuned model from /content/roberta_model.pt


Some weights of the model checkpoint at cardiffnlp/twitter-roberta-base-sentiment-latest were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
  state_dict = torch.load(finetuned_path, map_location=torch.device(device))


Predicting sentiments with fine-tuned model: cardiffnlp-twitter-roberta-base

Loading untuned model: Jedida/tweet_sentiments_analysis_distilbert
Loading untuned model from Jedida/tweet_sentiments_analysis_distilbert
Predicting sentiments with untuned model: Jedida-distilbert
Loading fine-tuned model: /content/Jedida_tweet_sentiments_analysis_distilbert_model.safetensors
Loading fine-tuned model from /content/Jedida_tweet_sentiments_analysis_distilbert_model.safetensors
Predicting sentiments with fine-tuned model: Jedida-distilbert

Predictions saved to stocktwits_AAPL_10_messages.csv

File content preview:
          id                                               text  \
0  600611392             $AAPL $SPY $QQQ let the fun begin.....   
1  600611197  $AAPL Apple Stock Prediction 2025, 2026, 2030 ...   
2  600610880  $AAPL no they are not, and plton is not the ne...   
3  600610576  $AAPL $SPY $QQQ Biden&#39;s last CPA with fake...   
4  600610535  $AAPL ðŸŽ¯ðŸ’¯ðŸ‘‡Since alert - it is

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>