In [1]:
# Parameters cell

# Model
MODEL = "davinci-002"

# Conditioners
CONDITIONERS = 1

# Sample rate
SAMPLE_SIZE = -1
SAMPLE_COUNT = -1

SAMPLE_MULTIPLIER = 1

# Corpus
CORPUS = "converted_climate_change_gpt-4-turbo-preview_gpt-4-turbo-preview_2_2"

# Narrative set
NARRATIVE_SET = "climate"

# Which helpful/unhelpful sets to use, if any
NARRATIVE_SUFFIX = ""

# The name of the experiment (i.e. where to save the results)
EXPERIMENT_NAME = "climate_synthetic_test_df"

# DEBUG = True
DEBUG = False

In [2]:
# # pip install
# %pip install openai dill seaborn


In [3]:
import os
from analyze import experiment_set, NarrativeContext

ctx = NarrativeContext(
    model=MODEL,
    conditioners=CONDITIONERS,
    # sample_rate=SAMPLE_SIZE
)

# Create the results directory
RESULTS_DIR = os.path.join("./corpus_results/new_method", EXPERIMENT_NAME)
if not os.path.exists(RESULTS_DIR):
    os.makedirs(RESULTS_DIR)

In [4]:
# Multiprocessing with joblib
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import os
import yaml
import pickle
from analyze import (
    join_many_strings,
    get_logits,
    likelihood_delta,
    stratified_n_tuple_sampling,
)
import itertools
from tqdm.auto import tqdm
from joblib import Parallel, delayed
import random


def load_experiments(name):
    path = os.path.join("../narrative_sets", name)

    with open(os.path.join(path, "helpful" + NARRATIVE_SUFFIX + ".txt"), "r") as f:
        helpful = f.read().splitlines()

    with open(os.path.join(path, "unhelpful" + NARRATIVE_SUFFIX + ".txt"), "r") as f:
        unhelpful = f.read().splitlines()

    # Neutral set might not exist, if so just use an empty list
    if not os.path.exists(os.path.join(path, "neutral" + NARRATIVE_SUFFIX + ".txt")):
        neutral = []
    else:
        with open(os.path.join(path, "neutral" + NARRATIVE_SUFFIX + ".txt"), "r") as f:
            neutral = f.read().splitlines()

    # # Drop texts less than 10 words and more than 50 words.
    # helpful = [text for text in helpful if len(text.split()) >= 10 and len(text.split()) <= 50]
    # unhelpful = [text for text in unhelpful if len(text.split()) >= 10 and len(text.split()) <= 50]
    # neutral = [text for text in neutral if len(text.split()) >= 10 and len(text.split()) <= 50]

    # Drop empty texts
    helpful = [text for text in helpful if len(text) > 0]
    unhelpful = [text for text in unhelpful if len(text) > 0]
    neutral = [text for text in neutral if len(text) > 0]

    return helpful, unhelpful, neutral


print("*** Experiment {} ***".format(NARRATIVE_SET))
helpful, unhelpful, neutral = load_experiments(NARRATIVE_SET)

print("\tHelpful: {}".format(len(helpful)))
print("\tUnhelpful: {}".format(len(unhelpful)))
print("\tNeutral: {}".format(len(neutral)))


def align_text_to_conditioners(label, narrative_sets, text, ctx, sample_size):
    # No batching
    deltas = []
    for conditioners in narrative_sets:
        conditioner_string = join_many_strings(conditioners)
        deltas.append(
            {
                "conditioners": conditioners,
                "likelihood_delta": likelihood_delta(
                    conditioner_string, text["text"], ctx
                ),
            }
        )

    return {"text": text, "likelihood_deltas": deltas}


def align(label, narrative_set, texts, sample_size):
    print("*** Aligning {} texts to {} ***".format(len(texts), label))
    results = []
    for text in tqdm(texts):
        results.append(
            align_text_to_conditioners(
                label,
                pairs[(tuple(narrative_set), text["text"])],
                text,
                ctx,
                sample_size,
            )
        )

    return results


# Load texts from corpora/{CORPUS}.json
with open(os.path.join("../corpora", f"{CORPUS}.json"), "r") as f:
    corpus_data = yaml.safe_load(f)

print("Loaded {} texts from corpus".format(len(corpus_data)))
print("Total word count:", sum([len(text["text"].split()) for text in corpus_data]))

# Use SAMPLE_COUNT
if SAMPLE_COUNT != -1 and len(corpus_data) > SAMPLE_COUNT:
    corpus_data = random.sample(corpus_data, SAMPLE_COUNT)


# Precompute narrative set -- text pairs
pairs = {}
to_precompute = set()
for narrative_set in [helpful, unhelpful, neutral]:
    for text in corpus_data:
        # # Pick a random narrative set
        # narrative_sets = list(itertools.permutations(narrative_set, CONDITIONERS))
        # narrative_sets = random.sample(narrative_sets, min(len(narrative_sets), len(narrative_set)))
        narrative_sets = []
        for _ in range(SAMPLE_MULTIPLIER):
            narrative_sets_iter = stratified_n_tuple_sampling(
                list(narrative_set), CONDITIONERS
            )
            narrative_sets.extend(narrative_sets_iter)

        # Append to the list
        pairs[(tuple(narrative_set), text["text"])] = narrative_sets

        # Add to the set of things to precompute
        for conditioners in narrative_sets:
            conditioner_string = join_many_strings(conditioners)
            to_precompute.add(join_many_strings([conditioner_string, text["text"]]))
            to_precompute.add(conditioner_string)
            to_precompute.add(text["text"])

# Get the total word count of the precompute set
print(
    "Total word count of precompute set:",
    sum([len(text.split()) for text in to_precompute]),
)
# break

# Precompute the logits
print("--- Precomputing logits ---")
get_logits(list(to_precompute), ctx.model)
print("--- Done precomputing logits ---")

# Run experiments
results = {
    "helpful": align("helpful", helpful, corpus_data, None),
    "unhelpful": align("unhelpful", unhelpful, corpus_data, None),
    # "neutral": align("neutral", neutral, corpus_data, SAMPLE_SIZE)
}

# Save results with pickle
with open(
    os.path.join(
        "corpus_results/new_method/",
        EXPERIMENT_NAME,
        f"{NARRATIVE_SET}_{CORPUS}_{MODEL}_{CONDITIONERS}_{SAMPLE_MULTIPLIER}.pkl",
    ),
    "wb",
) as f:
    pickle.dump(results, f)

*** Experiment climate ***
	Helpful: 5
	Unhelpful: 5
	Neutral: 0
Loaded 16 texts from corpus
Total word count: 301
Total word count of precompute set: 7748
--- Precomputing logits ---


  0%|          | 0/10 [00:00<?, ?it/s]

--- Done precomputing logits ---
*** Aligning 16 texts to helpful ***


  0%|          | 0/16 [00:00<?, ?it/s]

*** Aligning 16 texts to unhelpful ***


  0%|          | 0/16 [00:00<?, ?it/s]

In [5]:
import json
import yaml

# Load data from pkl
with open(
    os.path.join(
        "corpus_results/new_method/",
        EXPERIMENT_NAME,
        f"{NARRATIVE_SET}_{CORPUS}_{MODEL}_{CONDITIONERS}_{SAMPLE_MULTIPLIER}.pkl",
    ),
    "rb",
) as f:
    results = pickle.load(f)

# For each narrative set in (helpful, unhelpful), find the texts with the highest and lowest likelihood delta (relative to the other set
# of narratives) and print them.
helpful = results["helpful"]
unhelpful = results["unhelpful"]


def compute_deltas(result_set):
    # Compute generally for helpful/unhelpful/neutral
    deltas = {}
    for text in result_set:
        text_deltas = [
            delta["likelihood_delta"]["likelihood_delta"]
            for delta in text["likelihood_deltas"]
        ]
        deltas[text["text"]["text"]] = np.mean(text_deltas)
        # Get the signed max abs value
        # deltas[text["text"]["text"]] = max(text_deltas, key=abs)
        # deltas[text["text"]["text"]] = max(text_deltas)
        print(text)
    return deltas


helpful_deltas = compute_deltas(helpful)
unhelpful_deltas = compute_deltas(unhelpful)

helpful_unhelpful_diffs = []
for text in helpful:
    helpful_unhelpful_diffs.append(
        {
            "text": text["text"]["text"],
            "helpful": helpful_deltas[text["text"]["text"]],
            "unhelpful": unhelpful_deltas[text["text"]["text"]],
            # "helpful_deltas": [delta["likelihood_delta"]["likelihood_delta"] for delta in text["likelihood_deltas"]],
            # "unhelpful_deltas": [delta["likelihood_delta"]["likelihood_delta"] for delta in text["likelihood_deltas"]],
            "speaker": text["text"]["speakername"],
            "affiliation": (
                text["text"]["affiliation"] if "affiliation" in text["text"] else None
            ),
            "date": text["text"]["date"],
            "diff": helpful_deltas[text["text"]["text"]]
            - unhelpful_deltas[text["text"]["text"]],
        }
    )

helpful_unhelpful_diffs = sorted(helpful_unhelpful_diffs, key=lambda x: x["diff"])

print("Most unhelpful:")
for text in helpful_unhelpful_diffs[:10]:
    print(f"[{text['diff']}] {text['text']}")
print()
print("Most helpful:")
for text in helpful_unhelpful_diffs[-10:]:
    print(f"[{text['diff']}] {text['text']}")
print()

outpath = os.path.join(
    "corpus_results/new_method/",
    EXPERIMENT_NAME,
    f"{NARRATIVE_SET}_{CORPUS}_{MODEL}_{CONDITIONERS}_{SAMPLE_MULTIPLIER}",
)
# Save results with JSON
with open(outpath + ".json", "w") as f:
    json.dump(helpful_unhelpful_diffs, f, indent=4)

# Save in CSV format
df = pd.DataFrame(helpful_unhelpful_diffs)
df.to_csv(outpath + ".csv", index=False)

{'text': {'date': '2024-01-08', 'speakername': 'a', 'text': 'We must invest in solar and wind energy now to prevent catastrophic climate change impacts.'}, 'likelihood_deltas': [{'conditioners': ['An overwhelming majority of climate scientists, over 97%, agree that climate change is real and primarily caused by human activities, particularly the emission of greenhouse gases.'], 'likelihood_delta': {'p_xy': -86.85718282300002, 'p_x': -47.04408815560001, 'p_y': -45.16461943, 'likelihood_delta': 5.351524762599993}}, {'conditioners': ['Excessive concentrations of carbon dioxide in the atmosphere, while necessary for plant life, act as a potent greenhouse gas that traps heat and contributes to global warming.'], 'likelihood_delta': {'p_xy': -101.41709930699999, 'p_x': -55.799907336000004, 'p_y': -45.16461943, 'likelihood_delta': -0.4525725409999808}}, {'conditioners': ['Renewable energy technologies are advancing and becoming more efficient, offering a promising alternative to fossil fuels 