<a href="https://colab.research.google.com/github/stepanjaburek/workingpaper_czech_psp_speeches/blob/main/Sentiment(PolDEBATE).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
from transformers import pipeline
import pandas as pd
from tqdm.notebook import tqdm

In [None]:
model_name = "mlburnham/Political_DEBATE_large_v1.0"
hypothesis_template = "The author of this text has {} sentiment about left-wing politics
classes = ["Negative", "Neutral", "Positive"]

In [None]:
device = 0 if torch.cuda.is_available() else -1
classifier = pipeline("zero-shot-classification",
                     model=model_name,
                     device=device)


def analyze_sentiments(df, classifier, classes, hypothesis, batch_size=16):
    results = []
    for i in tqdm(range(0, len(df), batch_size)):
        batch_output = classifier(
            df['translated_text'][i:i + batch_size].tolist(),
            classes,
            hypothesis_template=hypothesis,
            multi_label=False,
            batch_size=batch_size
        )

        for item in batch_output:
            results.append({
                'label': item['labels'][0],
                'score': item['scores'][0],
                **{f'{label}_score': score for label, score in zip(item['labels'], item['scores'])}
            })

    return pd.DataFrame(results)

# Run analysis
df = pd.read_csv("/content/translated_green.csv")
results = analyze_sentiments(df, classifier, classes, hypothesis_template)

# Combine and save
pd.concat([df, results], axis=1).to_csv('sentiment_results.csv', index=False)

# Show summary
print("\nSentiment Distribution:")
print(results['label'].value_counts())