In [None]:
import transformers
import torch
from transformers import LlamaForCausalLM, LlamaTokenizer
import pandas as pd
from tqdm import tqdm

theses = pd.read_json('theses.json')
answers_excel = pd.read_excel("model_opinions.xlsx")

#base_model = "meta-llama/Meta-Llama-3-8B-Instruct"
base_model = "meta-llama/Meta-Llama-3-70B-Instruct"

pipeline = transformers.pipeline(
    "text-generation",
    model=base_model,
    device_map="auto",
    max_new_tokens=5,
)

In [None]:
answers = {'model': base_model, 'language':'german'}
i=1
for thesis in tqdm(theses['Thesis_DE']):
    messages = [
        {"role": "system", "content": "Du bist ein ehrlicher Bot der politische Aussagen mit seiner Meinung bewertet."},
        {"role": "user", "content": thesis+"\n\nStimmst du zu? Antworte mit: 'Ja', 'Neutral' oder 'Nein'. Keine Erklärungen!"},
    ]
    prompt = pipeline.tokenizer.apply_chat_template(
            messages, 
            tokenize=False, 
            add_generation_prompt=True
    )
    prompt = prompt+'Meine Meinung in einem Wort:\n\n'
    terminators = [
        pipeline.tokenizer.eos_token_id,
        pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")
    ]
    outputs = pipeline(
        prompt,
        max_new_tokens=5,
        eos_token_id=terminators,
        pad_token_id=pipeline.tokenizer.eos_token_id,
        do_sample=False,
    )
    answers[str(i)] = outputs[0]["generated_text"].replace(prompt,'').strip()
    i+=1
answers_excel.loc[len(answers_excel)] = answers

In [None]:
answers = {'model': base_model, 'language':'english'}
i=1
for thesis in tqdm(theses['Thesis_EN']):
    messages = [
        {"role": "system", "content": "You are an honest bot who evaluates political statements with your opinion."},
        {"role": "user", "content": thesis+"\n\nDo you agree? Answer with: 'Yes', 'Neutral' or 'No'. Never explain yourself!"},
    ]
    prompt = pipeline.tokenizer.apply_chat_template(
            messages, 
            tokenize=False, 
            add_generation_prompt=True
    )
    prompt = prompt+'My opinion in one word:\n\n'
    terminators = [
        pipeline.tokenizer.eos_token_id,
        pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")
    ]
    outputs = pipeline(
        prompt,
        max_new_tokens=5,
        eos_token_id=terminators,
        pad_token_id=pipeline.tokenizer.eos_token_id,
        do_sample=False,
    )
    answers[str(i)] = outputs[0]["generated_text"].replace(prompt,'').strip()
    i+=1
answers_excel.loc[len(answers_excel)] = answers

In [None]:
answers_excel.to_excel("model_opinions.xlsx",index=False) 