In [136]:
import sys
import os
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import csv

import random
from typing import List, Any, Dict, Tuple
import copy
from datetime import datetime
from collections import Counter, defaultdict
from scipy import stats
from tabulate import tabulate

sys.path.append("../")

from src.scripts import run_fake_data_test
from src.helpers.visualisation import barplot_distribution, plot_confusion_matrix, tabulate_annotation_pair_summary, analyze_pair_annotations
from src.helpers.io import read_json


%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [150]:
prompt_fields_prev = [
    "multi_turn_relationship",
    "media_format",
    "topic",
    "function_purpose",
    "anthropomorphization",
    "restricted_flags",
]
response_fields_prev = [
    "answer_form",
    "self_disclosure",
    "topic_response",
    "media_format_response",
    "restricted_flags_response",
]

prompt_fields_new = [
    "prompt_multi_turn_relationship",
    "prompt_media_format",
    "prompt_interaction_features",
    "prompt_function_purpose",
    "turn_topic",
    "turn_sensitive_use_flags",
]
response_fields_new = [
    "response_answer_form",
    "response_media_format",
    "response_interaction_features",
]

In [151]:
dset = run_fake_data_test.run_automatic_analysis_v0("../data/")

Loading conversations from ../data/sample120.json
Loaded 120 conversations.
Updated file: combined.json
Added conversation IDs to 1 files
Split records into two folders:
  - ../data/labelstudio_outputs_split1_v2/: Contains 0 records with unique conversation IDs
  - ../data/labelstudio_outputs_split2_v2/: Contains 83 records with duplicate conversation IDs

gpt4o-json

prompt-multi_turn_relationship: 2 / 597 failed due to invalid annotations.
prompt-interaction_features: 1 / 597 failed due to invalid annotations.
turn-sensitive_use_flags: 0 / 597 failed due to invalid annotations.
turn-topic: 14 / 597 failed due to invalid annotations.
response-interaction_features: 0 / 597 failed due to invalid annotations.
prompt-function_purpose: 4 / 597 failed due to invalid annotations.
prompt-media_format: 19 / 597 failed due to invalid annotations.
response-media_format: 5 / 597 failed due to invalid annotations.
response-answer_form: 1 / 597 failed due to invalid annotations.

gpt4o-free

prompt

In [152]:
def run_interrater_comparison(
    dataset, 
    task_name,
    annotation_source_1,
    annotation_source_2,
):
    info_to_plot1 = dset.get_annotation_distribution(name=task_name, level="message", annotation_source=annotation_source_1)
    info_to_plot2 = dset.get_annotation_distribution(name=task_name, level="message", annotation_source=annotation_source_2)
    info_to_plot1b = dset.get_annotation_distribution(name=task_name, level="message", annotation_source=annotation_source_1, annotation_as_list_type=True)
    info_to_plot2b = dset.get_annotation_distribution(name=task_name, level="message", annotation_source=annotation_source_2, annotation_as_list_type=True)

    outdir = f"../data/annotation_analysis_v0/{annotation_source_1}--{annotation_source_2}/{task_name}"
    os.makedirs(outdir, exist_ok=True)
    fig = barplot_distribution(
        {"Split1": info_to_plot1, "Split2": info_to_plot2}, normalize=True, 
        xlabel=task_name, ylabel="Proportion", title="",
        output_path=f"{outdir}/barchart.png", order="descending")
    
    fig_b = barplot_distribution(
        {"Split1": info_to_plot1b, "Split2": info_to_plot2b}, normalize=True, 
        xlabel=task_name, ylabel="Proportion", title="",
        output_path=f"{outdir}/multilabel_barchart.png", order="descending")

    info_to_plot_cm, agreement_metrics, paired_values = dataset.get_joint_distribution(
        annotations1=(task_name, annotation_source_1), 
        annotations2=(task_name, annotation_source_2), 
        level="message",
        compute_disagreement=True,
        verbose=True
    )
    # print(info_to_plot_cm)

    fig2 = plot_confusion_matrix(
        info_to_plot_cm, normalize=True, xlabel="", ylabel="", title="Confusion Matrix",
        output_path=f"{outdir}/confusion_matrix.png")

    # print(paired_values[0:3])
    df = analyze_pair_annotations(paired_values)
    df.to_csv(f"{outdir}/pair_frequencies.csv", index=False, quoting=csv.QUOTE_NONNUMERIC)

    print()
    print(f"-----------------{task_name}-----------------")
    print(agreement_metrics)
    print(tabulate_annotation_pair_summary(df, 20))
    print(len(df))
    print()
    return paired_values


In [153]:
# task_annotations = {}
# for feature in prompt_fields_new:
#     task_annotations[feature] = run_interrater_comparison(dset, feature, "gpt4o_json_full", "gpt4o_free_full")
#     # break
# for feature in response_fields_new:
#     task_annotations[feature] = run_interrater_comparison(dset, feature, "gpt4o_json_full", "gpt4o_free_full")
#     # break

In [154]:
ex_ids = []
orig_sample = read_json("../data/sample120.json")
for datum in orig_sample["data"]:
    for turn in datum["conversation"]:
        ex_ids.append(datum["conversation_id"] + "-" + str(turn["turn"]))

In [155]:
# automatic_variants = [
#     "gpt4o_json_full",
#     # "gpt4o_free_full",
#     "gpto3mini_json_full",
#     # "gpto3mini_free_full",
# ]
# focus_keys = [(model_key, field_name) for model_key in automatic_variants for field_name in prompt_fields_new]
# focus_keys.extend([(model_key, field_name) for model_key in automatic_variants for field_name in response_fields_new])
# focus_keys.extend([(split_key, field_name) for split_key in ["split1", "split2"] for field_name in prompt_fields_new])
# focus_keys.extend([(split_key, field_name) for split_key in ["split1", "split2"] for field_name in response_fields_new])
# focus_metadatas = dset.extract_conversation_metadata_by_ids(
#     ex_ids,
#     annotation_keys=focus_keys,
#     level="message",
# )

In [156]:
def display_info_for_turn(
    ex_idx_turn,
):

    ex_idx, turn = ex_idx_turn.split("-")
    turn = int(turn)
    message = dset.id_lookup(ex_idx_turn, level="message")[ex_idx_turn].to_dict()
    role = message['role']
    # relevant_keys = prompt_fields_new if role == "user" else response_fields_new
    relevant_keys = prompt_fields_new + response_fields_new
    task_to_source_to_vals = defaultdict(dict)
    for key in message["metadata"].keys():
        source, task = key.split("-")
        if task in relevant_keys:
            task_to_source_to_vals[task][source] = message["metadata"][key]

    print(f"IDX: {ex_idx} | Turn: {turn} | Role: {role}")
    print(f"-------------------------------------------")
    for task, source_vals in task_to_source_to_vals.items():
        print()
        print(f"TASK: {task}")
        for source, val in source_vals.items():
            src_info = val["annotator"] if "split" in source else source
            print(f"{src_info}:   {val['value']}")

    print("\n****** Message Content:******")
    print(message["content"])
    print()

    if turn > 0:
        print("\n****** Previous Turn Message Content:******")
        prev_message = dset.id_lookup(ex_idx + "-" + str(turn-1), level="message")[ex_idx + "-" + str(turn-1)].to_dict()
        print(prev_message["content"])


In [193]:
ANNOTATION_TURN = 86
display_info_for_turn(ex_ids[ANNOTATION_TURN])

IDX: wildchat_c51fa354400c0d43dd649b3b164d92be | Turn: 0 | Role: user
-------------------------------------------

TASK: prompt_media_format
niloofar:   ['Likely retrieved / pasted content', 'Math / symbols', 'URLs', 'Natural language', 'Code']
victor:   ['Natural language', 'Code']
gpt4o_json:   ['Natural language', 'Code', 'Likely retrieved/pasted content']
gpt4o_free:   ['Natural language', 'Code', 'Likely retrieved/pasted content']
gpto3mini_json:   ['Natural language', 'Code', 'Likely retrieved/pasted content']
gpto3mini_free:   ['Natural language', 'Code', 'Likely retrieved/pasted content']

TASK: prompt_function_purpose
niloofar:   ['Advice, guidance, & recommendations: Instructions / how-to', 'Reasoning: Other general problem solving', 'Content generation: Code']
victor:   ['Reasoning: Other general problem solving']
gpt4o_free:   ['Advice, Guidance, & Recommendations (Instructions / How-to)', 'Information analysis (Other content analysis / description)']
gpto3mini_json:   ['Ad

In [41]:
ANNOTATION_TURN = 8

In [42]:
message = dset.id_lookup(ex_ids[ANNOTATION_TURN], level="message")[ex_ids[ANNOTATION_TURN]].to_dict()

In [49]:
# message

In [133]:
EG_ID = "wildchat_40fe9070a5268327e0278d00a7bd1396-2"

In [134]:
message = dset.id_lookup(EG_ID, level="message")[EG_ID].to_dict()

In [135]:
message

{'turn': 2,
 'role': 'user',
 'content': '1. bool Collider::checkCollision(Collider& other, float push)\n{\n\tsf::Vector2f otherPostion = other.GetPostion();\n\tsf::Vector2f otherHalfSize = other.GetHalfSize();\n\n\tsf::Vector2f thisPostion = GetPostion();\n\tsf::Vector2f thisHalfSize = GetHalfSize();\n\n\tfloat deltaX = otherPostion.x - thisPostion.x;\n\tfloat deltaY = otherPostion.y - thisPostion.y;\n\n\tfloat intersectX = abs(deltaX) - (otherHalfSize.x + thisHalfSize.x);\n\tfloat intersectY = abs(deltaY) - (otherHalfSize.y + thisHalfSize.y);\n\t\n\tif (intersectX < 0.0f && intersectY < 0.0f)\n\t{\n\t\tpush = std::min(std::max(push, 0.0f), 1.0f);\n\n\t\tif (intersectX > intersectY)\n\t\t{\n\t\t\tif (deltaX > 0.0f)\n\t\t\t{\n\t\t\t\tMove(intersectX * (1.0f - push), 0.0f);\n\t\t\t\tother.Move(-intersectX * (push), 0.0f);\n\t\t\t}\n\t\t\telse\n\t\t\t{\n\t\t\t\tMove(-intersectX * (1.0f - push), 0.0f);\n\t\t\t\tother.Move(intersectX * (push), 0.0f);\n\t\t\t}\n\t\t}\n\t\telse\n\t\t{\n\t\t\

In [109]:
# display_info_for_turn("wildchat_f1675170ab5361f56211e19bacbe1945-1")

In [11]:
# dset.data[3].conversation[2].metadata