In [None]:
# DeepSeek-R1 Sentiment Scoring Pipeline using Ollama
# Full script version (can be used in .py file or adapted into a notebook)

import os
import pandas as pd
import numpy as np
import ollama
from tqdm import tqdm
import ast
import re


In [None]:

# === CONFIGURATION ===
USE_GPU = True  # Toggle between CPU and GPU
MODEL_NAME = "deepseek_sentiment"
INPUT_PATH = r"/Users/seemablatif/Library/CloudStorage/GoogleDrive-seemab.latif@seecs.edu.pk/.shortcut-targets-by-id/1qHspO5MC2YvuRdtMkSxFIni55C_DBf69/FinRL/Data/Filtered_News/3K_2018_Summary_Replaced_5000_part1.csv"
SCORED_DIR = r"/Users/seemablatif/Library/CloudStorage/GoogleDrive-seemab.latif@seecs.edu.pk/.shortcut-targets-by-id/1qHspO5MC2YvuRdtMkSxFIni55C_DBf69/FinRL/Data/Input/Test_One"  # Output directory for file
FAILED_LOG = os.path.join(SCORED_DIR, "failed_responses.csv")


In [None]:

# === SET OLLAMA GPU/CPU MODE ===
def set_ollama_gpu_mode(use_gpu=True):
    config_path = os.path.expanduser("~/.ollama/config")
    os.makedirs(os.path.dirname(config_path), exist_ok=True)
    config_content = "[performance]\noffload = {}\n".format(str(use_gpu).lower())
    with open(config_path, "w") as f:
        f.write(config_content)
    print(f"Ollama GPU mode set to: {'GPU' if use_gpu else 'CPU'}")


In [None]:

# === LLM FUNCTION ===
def get_sentiment(symbol, *articles, retries=2):
    articles = [text for text in articles if text != 0 and pd.notnull(text)]
    if not articles:
        return [], []

    results = []
    failed = []

    few_shots = """
Calculate the sentiment score for the given input:
Consider the stock symbol ‘APPL’ and the news article  ‘Below is Validea's guru fundamental report for APPLE INC (AAPL)...’
Output
{"score": 4, "reason": "The news highlights that Apple Inc. (AAPL)..."}

Calculate the sentiment score for the given input:
Consider the stock symbol ‘EBAY’ and the news article  ‘Fool.com contributor Parkev Tatevosian reveals his top dividend stocks...’
Output
{"score": 3, "reason": "The news article discusses dividend stock recommendations..."}

Calculate the sentiment score for the given input:
Consider the stock symbol ‘AAPL’ and the news article  ‘In a letter to the Department of Justice, Senator Ron Wyden said...’
Output
{"score": 1, "reason": "The news highlights potential privacy concerns..."}
"""

    for article in articles:
        for attempt in range(retries + 1):
            try:
                prompt = f"""
Forget all previous instructions.

You are a financial expert with extensive experience in stock recommendation and market based news sentiment analysis. You will receive summarized news for a specific stock and its stock symbol.
Your task is to analyze the overall news in the context of the stock’s potential short-term movement, and assign a sentiment score from 1 to 5, based on the expected directional impact on the company’s stock price. The scoring criteria lies in one of the following score bands:
Scoring Criteria:
Score 5 – Strongly Positive: The news is likely to significantly increase investor confidence and drive the stock price up.
Score 4 – Somewhat Positive: The news is moderately positive, potentially causing a small upward price movement.
Score 3 – Neutral: The news is balanced or has no clear market impact.
Score 2 – Somewhat Negative: The news may cause a small decline in stock price.
Score 1 – Strongly Negative: The news is likely to significantly decrease investor confidence and drive the stock price down.

Below are a few examples for reference:
{few_shots}

IMPORTANT:
Only return a Python dictionary in the following format (no commentary or additional explanation):

python
{{"score": <int>, "reason": "<your explanation>"}}
Here is the article:
\"\"\"{article}\"\"\"
"""
                response = ollama.chat(model="deepseek-r1", messages=[
                    {"role": "user", "content": prompt.strip()}
                ])
                content = response['message']['content'].strip()

                match = re.search(r'\{.*?\}', content, re.DOTALL)
                if match:
                    parsed = match.group(0)
                    result_dict = ast.literal_eval(parsed)
                    score = result_dict.get("score")
                    reason = result_dict.get("reason")
                    if isinstance(score, int) and 1 <= score <= 5 and isinstance(reason, str):
                        print(f"Response: {parsed}")
                        results.append((score, reason))
                        break
                    else:
                        raise ValueError("Invalid score or reason format")
                else:
                    raise ValueError("No dictionary found in response")

            except Exception as e:
                print(f"Attempt {attempt + 1} failed: {e}")
                if attempt == retries:
                    failed.append(article)
                    results.append(("null", "null"))

    if failed:
        log_df = pd.DataFrame({"Symbol": symbol, "Failed_Article": failed})
        if os.path.exists(FAILED_LOG):
            log_df.to_csv(FAILED_LOG, mode='a', header=False, index=False)
        else:
            log_df.to_csv(FAILED_LOG, index=False)

    scores, reasons = zip(*results) if results else ([], [])
    return scores, reasons


In [None]:

# === RUN ON FULL CSV ===
def run_sentiment_on_full_df(df, output_dir, model_used=MODEL_NAME, batch_size=10):
    df = df[pd.notnull(df['Article'])].copy()

    if df.empty:
        print("No valid articles in the dataset.")
        return

    print(f"Scoring full dataset ({len(df)} rows)")

    sentiment_col = model_used
    reason_col = model_used.replace("sentiment", "reasoning")

    out_file = os.path.join(output_dir, f"_dataset_scored_part1.csv")
    if os.path.exists(out_file):
        scored_df = pd.read_csv(out_file)
        processed_indices = scored_df.index[scored_df[sentiment_col].notnull()].tolist()
        print(f"Resuming from previously processed {len(processed_indices)} rows...")
    else:
        scored_df = df.copy()
        scored_df[sentiment_col] = "null"
        scored_df[reason_col] = "null"
        processed_indices = []

    for i in tqdm(range(0, len(scored_df), batch_size), desc="Scoring dataset", unit="batch"):
        batch_indices = list(range(i, min(i + batch_size, len(scored_df))))
        if all(idx in processed_indices for idx in batch_indices):
            continue

        batch = scored_df.iloc[batch_indices]
        articles = batch['Article'].tolist()
        scores, reasons = get_sentiment("ALL", *articles)

        for idx, score, reason in zip(batch_indices, scores, reasons):
            scored_df.at[idx, sentiment_col] = score
            scored_df.at[idx, reason_col] = reason

        try:
            scored_df.to_csv(out_file, index=False)
        except Exception as e:
            print(f"Failed to save CSV in batch: {e}")

        print(f"Saved progress after batch ending at row {batch_indices[-1]}")

    print(f"\nCompleted scoring full dataset. Final CSV saved at: {out_file}")


In [None]:

# === MAIN EXECUTION ===

if __name__ == "__main__":
    set_ollama_gpu_mode(USE_GPU)
    os.makedirs(SCORED_DIR, exist_ok=True)

    df = pd.read_csv(INPUT_PATH)
    df.columns = df.columns.str.capitalize()

    run_sentiment_on_full_df(df, SCORED_DIR)