In [None]:
import os

# Read the files specified above
contents = []
for fname in os.listdir("data/lintulehtio"):
    if not "nayte" in fname:
        continue
    path = os.path.join("data/lintulehtio", fname)
    with open(path, 'r') as f:
        contents.append((fname, "".join(f.readlines())))

# temporarily decrease amount of texts
# contents = contents[:2]

for fname, text in contents:
    print(f"{fname}:\n")
    print(f"{text}\n")

In [None]:
emotions = ["ilo", "viha", "suru", "hämmästys", "pelko", "inho"]

In [None]:
import json
import requests
from pprint import pprint

from llm import generate

output_format = {
    "type": "object",
    "properties": {
        "emotion_present": {
            "type": "boolean"
        }
    },
    "required": [
      "emotion_present"
    ]
}
results = []
n_iter = 10
for fname, text in contents:
    for emotion in emotions:
        for idx in range(n_iter):
            messages = [
                {
                    "role": "system",
                    "content": "Olet laadullisen tutkimuksen avustaja. Saat tekstinäytteen sekä yhden ihmisen perustunteista. Lue teksti huolella ja päätä esiintyykö tunne tekstinäytteessä."    
                },
                {
                    "role": "user",
                    "content": f"Tunne: {emotion} \n\n Tekstinäyte: \n\n {text}"
                }
            ]
            # model = "mistral-large"
            # model = "deepseek-r1:70b"
            model = "llama3.3:70b"
            result = generate(model, messages, seed=idx, n_context=4096, output_format=output_format)
        
            present = json.loads(result['message']['content'])['emotion_present']
            results.append({
                "fname": fname,
                "emotion": emotion,
                "iter": idx,
                "result": present
            })

In [None]:
%pip install -q jinja2
import pandas as pd

# Create a dictionary to store the results
transformed_data = {}

# Process the results
for item in results:
    fname = item['fname']
    emotion = item['emotion']
    
    if fname not in transformed_data:
        transformed_data[fname] = {}
    
    if emotion not in transformed_data[fname]:
        transformed_data[fname][emotion] = []
    
    transformed_data[fname][emotion].append(item['result'])

# Calculate percentages
for fname in transformed_data:
    for emotion in transformed_data[fname]:
        true_count = transformed_data[fname][emotion].count(True)
        total_count = len(transformed_data[fname][emotion])
        transformed_data[fname][emotion] = (true_count / total_count) * 100

# Create DataFrame
df = pd.DataFrame.from_dict(transformed_data, orient='index')

# Add averages
df['total'] = df.mean(axis=1)
df.loc['total'] = df.mean()

# Format percentages
df = df.round(2)

# Define the styling function
def color_high_values(val):
    color = 'background-color: rgba(144, 238, 144, 0.3)' if val >= 80 else ''
    return color

# Apply the styling
styled_df = df.style.map(color_high_values).format("{:.2f}")

# Display the styled DataFrame
styled_df