# Label Quality Analysis: Twitter Financial Sentiment

This notebook performs comprehensive label quality analysis on the Twitter Financial News Sentiment dataset (Zeroshot, 2023).

**Focus**: Identifying ambiguous cases, noisy labels, and borderline classifications in social-media text.

**Prerequisites**: Train a model first using 

In [None]:
# Setup
import sys
import os

# Get project root
PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath("")))
if os.path.basename(os.getcwd()) == "notebooks":
    PROJECT_ROOT = os.path.dirname(os.getcwd())
    os.chdir(PROJECT_ROOT)

src_path = os.path.join(PROJECT_ROOT, "src")
if src_path not in sys.path:
    sys.path.insert(0, src_path)

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import joblib

from dataset_loader import load_dataset
from preprocess import preprocess_batch
from label_quality import (
    detect_misclassifications,
    detect_ambiguous_predictions,
    detect_noisy_labels,
    analyze_neutral_ambiguous_zone,
    analyze_borderline_cases,
    quantify_dataset_ambiguity
)

%matplotlib inline
plt.style.use("seaborn-v0_8")
sns.set_palette("husl")

print("✓ Setup complete")
print(f"Project root: {PROJECT_ROOT}")