In [None]:
import os
from dotenv import load_dotenv
from openai import OpenAI
import pandas as pd
import re
from sklearn.utils import resample
import json

In [None]:
#API key
client = OpenAI(api_key = os.getenv("OPENAI_API_KEY"))

## Data sampling

In [None]:
# load all remaining data (data not included in prompt selection or prompting experiment): text, label
data = pd.read_csv("")

### Generate confidence scores

In [None]:
# prompt for confidence scores
sentiment_v1_prompt_confidence = """
Du er en gennemsnitlig dansk nyhedsforbruger. Du får en overskrift og underoverskrift på en artikel, og skal tildele den en kategori svarende til det sentiment den fremkalder.
Kategorier: ”Positiv”: Fremkalder en overordnet positiv sentiment. ”Negativ”: Fremkalder en overordnet negativ sentiment. ”Neutral”: Fremkalder hverken en positiv eller negativ sentiment
Giv også en confidence score med to decimaler fra 0.00 til 1.00, der repræsenterer hvor sikker du er i din vurdering af sentiment, hvor 0 er meget usikker og 1 er meget sikker.
Giv et præcist svar i json: {{sentiment: ”kategori”, "confidence": "score"}}.
"""

In [None]:
# Function to call GPT-4o with zero-shot prompts. Return json
def zeroshot_sentiment_annotation(text, prompt):
    try:
        # Make a request to GPT-4o
        response = client.chat.completions.create(
            model="gpt-4o-2024-08-06",
            messages=[
                {
                    "role": "system", 
                    "content": f"{prompt}"
                },
                {
                    "role": "user", 
                    "content": f"Artikel: {text} \nArtiklen fremkalder dette sentiment:"
                }
            ],
            temperature=0,
            response_format={
                "type": "json_schema",
                "json_schema": {
                    "name": "topic_schema",
                    "schema": {
                        "type": "object",
                        "properties": {
                            "sentiment": {
                                "description": "Sentiment of the article",
                                "type": "string"
                            },
                            "confidence": {
                                "description": "Confidence score for the sentiment from 0.00 to 1.00",
                                "type": "number",
                                "minimum": 0.0,
                                "maximum": 1.0
                            }
                        },
                        "additionalProperties": False
                    }
                }
            }
        )

        # Extract response
        sentiment_data = response.choices[0].message.content
        return sentiment_data
    except Exception as e:
        print(f"Error: {e}")
        return None
    

# Apply function and add results to df
data["llm_annotation"] = data["text"].apply(lambda text: zeroshot_sentiment_annotation(text, prompt=sentiment_v1_prompt_confidence))

# Save the results to a new CSV file
data.to_csv("", index=False)

In [None]:
# extract confidence scores
def extract_confidence(value):
    match = re.search(r'"confidence":\s*"?(\d+\.\d+)"?', str(value))
    if match:
        return int(float(match.group(1)))
    return None  # Return None if no match is found

data["confidence_scores"] = data["llm_annotation"].apply(extract_confidence)

In [None]:
# map labels to integers
def map_sentiment_to_int(annotation):
    # LLM output is not consistent, therefore search for string match in output
    if "Negativ" in annotation or "negativ" in annotation:
        return 0
    elif "Neutral" in annotation or "neutral" in annotation:
        return 1
    elif "Positiv" in annotation or "positiv" in annotation:
        return 2
    else:
        return None  # In case of unexpected values

data["label"] = data["label"].map(map_sentiment_to_int)
data["llm_annotation"] = data["llm_annotation"].map(map_sentiment_to_int)

### Random sampling

In [None]:
data = data.sample(n=300)

### Selective sampling

In [None]:
# Define bins and labels for confidence intervals
bins = [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
labels = ["0-0.1", "0.1-0.2", "0.2-0.3", "0.3-0.4", "0.4-0.5", "0.5-0.6", "0.6-0.7", "0.7-0.8", "0.8-0.9", "0.9-1.0"]
data["confidence_interval"] = pd.cut(data["confidence_scores"], bins=bins, labels=labels, include_lowest=True)

# Determine if LLM annotation is correct
data["is_correct"] = data["llm_annotation"] == data["label"]

# Calculate the distribution of examples across intervals and correctness in the full dataset
full_distribution = data.groupby(["confidence_interval", "is_correct"]).size() / len(data)

# Calculate target sample sizes based on this distribution
total_samples = 300
sample_sizes = (full_distribution * total_samples).round().astype(int)

# Initialize an empty DataFrame for the sampled data
sampled_df = pd.DataFrame()

# Sample 300 examples based on the calculated distribution
for (interval, correct), size in sample_sizes.items():
    # Only sample if the size is greater than zero
    if size > 0:
        # Get the subset of data for this interval and correctness
        group_data = data[(data["confidence_interval"] == interval) & (data["is_correct"] == correct)]
        
        # Set `replace=True` only if we need more samples than available in this group
        replace = size > len(group_data)
        
        # Sample the data
        sampled_group = resample(
            group_data,
            n_samples=size,
            random_state=42,
            replace=replace
        )
        sampled_df = pd.concat([sampled_df, sampled_group])

# Reset index for the sampled DataFrame
sampled_df.reset_index(drop=True, inplace=True)

# save df to csv
sampled_df.to_csv("")

## Format fine-tuning data

In [None]:
sampled_data = pd.read_csv("")

In [None]:
# create fine-tuning dataset with best-performing prompt and with simplest prompt

messages = []
for id, text in sampled_data.text.items():
    label = {"sentiment": data.label[id]}
    label = json.dumps(label)
    article = {
    "messages": [
            {"role": "system", "content": """Du er en gennemsnitlig dansk nyhedsforbruger. Du får en overskrift og underoverskrift på en artikel, og skal tildele den en kategori svarende til det sentiment den fremkalder. Kategorier: ”Positiv”: Fremkalder en overordnet positiv sentiment. ”Negativ”: Fremkalder en overordnet negativ sentiment. ”Neutral”: Fremkalder hverken en positiv eller negativ sentiment. Giv et præcist svar i json: {{sentiment: ”kategori”}}."""},
            {"role": "user", "content": f"Artikel: {text} \nArtiklen fremkalder dette sentiment:"},
            {"role": "assistant", "content": label}
            ],
    }

    messages.append(article)

In [None]:
# save to json
with open("", "w", encoding="utf-8") as json_file:
    for el in messages:
        json_file.write(json.dumps(el, ensure_ascii=False))
        json_file.write("\n")

## Inference with fine-tuned model

In [None]:
# Load evaluation data
evaluation_data = pd.read_csv("")

In [None]:
# prompts
sentiment_v1_prompt = """
Du er en gennemsnitlig dansk nyhedsforbruger. Du får en overskrift og underoverskrift på en artikel, og skal tildele den en kategori svarende til det sentiment den fremkalder.
Kategorier: ”Positiv”: Fremkalder en overordnet positiv sentiment. ”Negativ”: Fremkalder en overordnet negativ sentiment. ”Neutral”: Fremkalder hverken en positiv eller negativ sentiment
Giv et præcist svar i json: {{sentiment: ”kategori”}}.
"""

sentiment_v0_prompt = """
Du er en gennemsnitlig dansk nyhedsforbruger. Du får en overskrift og underoverskrift på en artikel, og skal tildele den en kategori svarende til det sentiment den fremkalder. 
Kategorier: ”Positiv”, ”Negativ”, ”Neutral”. 
Giv et præcist svar i json: {{sentiment: ”kategori”}}.
"""

In [None]:
def zeroshot_sentiment_annotation(text, prompt):
    try:
        response = client.chat.completions.create(
            model="", # specify fine-tuned model
            messages=[
                {
                    "role": "system", 
                    "content": f"{prompt}"
                },
                {
                    "role": "user", 
                    "content": f"Artikel: {text} \nArtiklen fremkalder dette sentiment:"
                }
            ],
            temperature=0,
            response_format={
                "type": "json_schema",
                "json_schema": {
                    "name": "topic_schema",
                    "schema": {
                        "type": "object",
                        "properties": {
                            "sentiment": {
                                "description": "Sentiment of the article",
                                "type": "string"
                            },
                            "additionalProperties": False
                        }
                    }
                }
            }
        )

        # Extract  response
        sentiment = response.choices[0].message.content
        return sentiment
    except Exception as e:
        print(f"Error: {e}")
        return None
    
# Apply function and add results to df
evaluation_data["llm_annotation"] = evaluation_data["text"].apply(lambda text: zeroshot_sentiment_annotation(text, prompt=sentiment_v0_prompt))

# Save the results to csv file
evaluation_data.to_csv("", index=False)