In [None]:
import pandas as pd
from tqdm import tqdm

# Load your data
df = pd.read_csv("nyt_frontpage_all_clean.csv")

# --- Optional: Filter for financial sections only ---
financial_sections = [
    "Business Day", "Business", "Economy", "Economic", "Finance", "Financial",
    "Markets", "Market", "Technology", "Tech", "Politics", "Political",
    "U.S.", "World", "International", "Global", "Energy", "Oil",
    "Federal Reserve", "Treasury", "Trade", "Commerce"
]
df_fin = df[df["section_name"].isin(financial_sections)].copy()

# --- FinBERT Sentiment Only ---
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

tokenizer = AutoTokenizer.from_pretrained("yiyanghkust/finbert-tone")
model = AutoModelForSequenceClassification.from_pretrained("yiyanghkust/finbert-tone")
labels = ["negative", "neutral", "positive"]

tqdm.pandas()

def finbert_sentiment(text):
    with torch.no_grad():
        inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=128)
        outputs = model(**inputs)
        probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
        pred = labels[probs.argmax()]
        return pred

# Get string label
df_fin["finbert_label"] = df_fin["fulltext_clean"].astype(str).progress_apply(finbert_sentiment)

# Map to numeric: negative -> -1, neutral -> 0, positive -> 1
label_map = {"negative": -1, "neutral": 0, "positive": 1}
df_fin["finbert_numeric"] = df_fin["finbert_label"].map(label_map)

# --- Save result ---
df_fin.to_csv("nyt_frontpage_finbert_sentiment_numeric.csv", index=False)
print("✅ FinBERT sentiment labels and numeric scores added, CSV saved!")



100%|██████████| 68756/68756 [00:16<00:00, 4290.94it/s]
100%|██████████| 68756/68756 [00:16<00:00, 4153.11it/s]


config.json:   0%|          | 0.00/533 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


vocab.txt: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/439M [00:00<?, ?B/s]

  0%|          | 13/68756 [00:00<1:27:08, 13.15it/s]

model.safetensors:   0%|          | 0.00/439M [00:00<?, ?B/s]

100%|██████████| 68756/68756 [1:15:12<00:00, 15.24it/s]


✅ Finansal section'lara sentiment skoru eklendi ve CSV kaydedildi!
