In [None]:
import plotly.graph_objects as go
import plotly.figure_factory as ff
import pandas as pd
import string
import numpy as np

In [None]:
import os
import sys
sys.path.append('\\'.join(os.getcwd().split('\\')[:-1])+'\\src')

from src.visualization.PairResultVisualizer import PairResultVisualizer

In [48]:
def count_words(sentence):
    """
    Counts the number of words in a sentence, ignoring punctuation.
    """
    try:
        sentence = sentence.translate(str.maketrans('', '', string.punctuation))
        splitted = sentence.split()
        return len(splitted)
    except:
        return 0

In [None]:
def plot_length_distribution_comparison(df_0s, df_1s, column, name, element):
    """
        Plots the distribution of the length of the reviews for zero and one shot predictions.
    """
    lens_0s = [count_words(sentence) for sentence in df_0s[df_0s[column].notna()][column]]
    lens_1s = [count_words(sentence) for sentence in df_1s[df_1s[column].notna()][column]]
    
    mean_lens_0s = np.mean(lens_0s)
    max_lens_0s = np.max(lens_0s)
    min_lens_0s = np.min(lens_0s)

    mean_lens_1s = np.mean(lens_1s)
    max_lens_1s = np.max(lens_1s)
    min_lens_1s = np.min(lens_1s)

    title = f"""{name} - {element} length distribution 
    <br>
    0s -> [Mean: {int(round(mean_lens_0s))}, Max: {max_lens_0s}, Min: {min_lens_0s}]    1s -> [Mean: {int(round(mean_lens_1s))}, Max: {max_lens_1s}, Min: {min_lens_1s}]"""


    fig = go.Figure()

    fig.add_trace(go.Histogram(x=lens_0s, name="Zero-Shot", marker_color="#D0006F", histnorm='probability'))
    fig.add_trace(go.Histogram(x=lens_1s, name="One-Shot", marker_color="#24135F", histnorm='probability'))


    fig.update_layout(
        title=f"{name} - {element} length distribution",
        xaxis_title="Length",
        yaxis_title="Frequency"
    )

    fig.show()

In [None]:
def plot_length_distribution_comparison_with_ref(df_ref, df_0s, df_1s, column, name, element):
    """
        Plots the distribution of the length of the reviews for zero and one shot predictions.
    """
    lens_ref = [count_words(sentence) for sentence in df_ref[df_ref[column].notna()][column]]
    lens_0s = [count_words(sentence) for sentence in df_0s[df_0s[column].notna()][column]]
    lens_1s = [count_words(sentence) for sentence in df_1s[df_1s[column].notna()][column]]

    mean_lens_ref = np.mean(lens_ref)
    max_lens_ref = np.max(lens_ref)
    min_lens_ref = np.min(lens_ref)

    mean_lens_0s = np.mean(lens_0s)
    max_lens_0s = np.max(lens_0s)
    min_lens_0s = np.min(lens_0s)

    mean_lens_1s = np.mean(lens_1s)
    max_lens_1s = np.max(lens_1s)
    min_lens_1s = np.min(lens_1s)

    title = f"""{name} - {element} length distribution 
    <br>
    GT -> [Mean: {int(round(mean_lens_ref))}, Max: {max_lens_ref}, Min: {min_lens_ref}]    0s -> [Mean: {int(round(mean_lens_0s))}, Max: {max_lens_0s}, Min: {min_lens_0s}]    1s -> [Mean: {int(round(mean_lens_1s))}, Max: {max_lens_1s}, Min: {min_lens_1s}]"""


    fig = go.Figure()

    fig.add_trace(go.Histogram(x=lens_ref, name="Human-written", marker_color="#89A9EE", histnorm='probability'))
    fig.add_trace(go.Histogram(x=lens_0s, name="Zero-Shot", marker_color="#D0006F", histnorm='probability'))
    fig.add_trace(go.Histogram(x=lens_1s, name="One-Shot", marker_color="#24135F", histnorm='probability'))


    fig.update_layout(
        title=f"{name} - {element} length distribution",
        xaxis_title="Length",
        yaxis_title="Frequency"
    )

    fig.show()

In [None]:
import json
config_path = os.path.join(os.path.abspath(os.path.join(os.getcwd(), os.pardir)), "src", "visualization", "experiment_pairs.json")
with open(config_path, "r") as f:
    experiment_confs = json.load(f)
f.close()

pairs = experiment_confs["pairs"]

In [None]:
PROJECT_ROOT = os.path.abspath(os.path.join(os.getcwd(), os.pardir))

In [None]:
gpt_4_preds = PairResultVisualizer(pairs[0], project_root=PROJECT_ROOT)
pairs.remove(pairs[0])

pairs_to_remove = []

for pair in pairs:
    if pair["id"].__contains__("GPT-4"):
        gpt_4_preds.add_pair(pair)
        gpt_4_preds.load_data(project_root=PROJECT_ROOT)
        
        pairs_to_remove.append(pair)

for pair in pairs_to_remove:
    pairs.remove(pair)


In [None]:
gpt_35_preds = PairResultVisualizer(pairs[0], project_root=PROJECT_ROOT)
pairs.remove(pairs[0])

for pair in pairs:
    gpt_35_preds.add_pair(pair)
    gpt_35_preds.load_data(project_root=PROJECT_ROOT)

## **Questions**

In [None]:
plot_length_distribution_comparison_with_ref(gpt_35_preds.ground_truth_questions, gpt_35_preds.zero_shot_full_questions, gpt_35_preds.one_shot_full_questions, "NAME", "GPT-3.5-Turbo [Task 1]", "Question")
plot_length_distribution_comparison_with_ref(gpt_35_preds.ground_truth_questions, gpt_35_preds.zero_shot_questions, gpt_35_preds.one_shot_questions, "NAME", "GPT-3.5-Turbo [Task 2]", "Question")

In [None]:
plot_length_distribution_comparison_with_ref(gpt_4_preds.ground_truth_questions, gpt_4_preds.zero_shot_full_questions, gpt_4_preds.one_shot_full_questions, "NAME", "GPT-4-Turbo [Task 1]", "Question")
plot_length_distribution_comparison_with_ref(gpt_4_preds.ground_truth_questions, gpt_4_preds.zero_shot_questions, gpt_4_preds.one_shot_questions, "NAME", "GPT-4-Turbo [Task 2]", "Question")

## **Answers**

In [None]:
plot_length_distribution_comparison_with_ref(gpt_35_preds.ground_truth_answers, gpt_35_preds.zero_shot_full_answers, gpt_35_preds.one_shot_full_answers, "ANSWER", "GPT-3.5-Turbo [Task 1]", "Answer")
plot_length_distribution_comparison_with_ref(gpt_35_preds.ground_truth_answers, gpt_35_preds.zero_shot_answers, gpt_35_preds.one_shot_answers, "ANSWER", "GPT-3.5-Turbo [Task 2]", "Answer")

In [None]:
plot_length_distribution_comparison_with_ref(gpt_4_preds.ground_truth_answers, gpt_4_preds.zero_shot_full_answers, gpt_4_preds.one_shot_full_answers, "ANSWER", "GPT-4-Turbo [Task 1]", "Answer")
plot_length_distribution_comparison_with_ref(gpt_4_preds.ground_truth_answers, gpt_4_preds.zero_shot_answers, gpt_4_preds.one_shot_answers, "ANSWER", "GPT-4-Turbo [Task 2]", "Answer")

## **Distribution (M, V)**

In [50]:
def print_distribution(df, model, tech, task, column):    
    lens = [count_words(sentence) for sentence in df[df[column].notna()][column]]

    mean = int(round(np.mean(lens)))
    var = int(round(np.var(lens)))
    max_l = int(round(np.max(lens)))
    min_l = int(round(np.min(lens)))

    print(f"{model} & {tech} & {task} & {mean} & {var} & {max_l} & {min_l} ")

In [52]:
print_distribution(gpt_35_preds.ground_truth_questions, "Human-written", "", "", "NAME")
print_distribution(gpt_35_preds.zero_shot_full_questions, "GPT-3.5-Turbo", "Zero-Shot", "1", "NAME")
print_distribution(gpt_35_preds.one_shot_full_questions, "GPT-3.5-Turbo", "One-Shot","1", "NAME")
print_distribution(gpt_35_preds.zero_shot_questions, "GPT-3.5-Turbo", "Zero-Shot", "2", "NAME")
print_distribution(gpt_35_preds.one_shot_questions, "GPT-3.5-Turbo", "One-Shot","2", "NAME")
print_distribution(gpt_4_preds.zero_shot_full_questions, "GPT-4-Turbo", "Zero-Shot", "1", "NAME")
print_distribution(gpt_4_preds.one_shot_full_questions, "GPT-4-Turbo", "One-Shot","1", "NAME")
print_distribution(gpt_4_preds.zero_shot_questions, "GPT-4-Turbo", "Zero-Shot", "2", "NAME")
print_distribution(gpt_4_preds.one_shot_questions, "GPT-4-Turbo", "One-Shot","2", "NAME")

Human-written &  &  & 13 & 30 & 34 & 1 
GPT-3.5-Turbo & Zero-Shot & 1 & 12 & 12 & 27 & 4 
GPT-3.5-Turbo & One-Shot & 1 & 13 & 23 & 36 & 0 
GPT-3.5-Turbo & Zero-Shot & 2 & 12 & 15 & 59 & 4 
GPT-3.5-Turbo & One-Shot & 2 & 13 & 24 & 37 & 0 
GPT-4-Turbo & Zero-Shot & 1 & 12 & 11 & 25 & 0 
GPT-4-Turbo & One-Shot & 1 & 12 & 11 & 33 & 0 
GPT-4-Turbo & Zero-Shot & 2 & 12 & 11 & 29 & 0 
GPT-4-Turbo & One-Shot & 2 & 12 & 12 & 31 & 0 


In [51]:
print_distribution(gpt_35_preds.ground_truth_answers, "Human-written", "", "", "ANSWER")
print_distribution(gpt_35_preds.zero_shot_full_answers, "GPT-3.5-Turbo", "Zero-Shot", "1", "ANSWER")
print_distribution(gpt_35_preds.one_shot_full_answers, "GPT-3.5-Turbo", "One-Shot","1", "ANSWER")
print_distribution(gpt_35_preds.zero_shot_answers, "GPT-3.5-Turbo", "Zero-Shot", "2", "ANSWER")
print_distribution(gpt_35_preds.one_shot_answers, "GPT-3.5-Turbo", "One-Shot","2", "ANSWER")

print_distribution(gpt_4_preds.zero_shot_full_answers, "GPT-4-Turbo", "Zero-Shot", "1", "ANSWER")
print_distribution(gpt_4_preds.one_shot_full_answers, "GPT-4-Turbo", "One-Shot","1", "ANSWER")
print_distribution(gpt_4_preds.zero_shot_answers, "GPT-4-Turbo", "Zero-Shot", "2", "ANSWER")
print_distribution(gpt_4_preds.one_shot_answers, "GPT-4-Turbo", "One-Shot","2", "ANSWER")

Human-written &  &  & 2 & 2 & 26 & 1 
GPT-3.5-Turbo & Zero-Shot & 1 & 1 & 1 & 12 & 0 
GPT-3.5-Turbo & One-Shot & 1 & 2 & 2 & 14 & 0 
GPT-3.5-Turbo & Zero-Shot & 2 & 1 & 1 & 10 & 0 
GPT-3.5-Turbo & One-Shot & 2 & 2 & 2 & 20 & 0 
GPT-4-Turbo & Zero-Shot & 1 & 2 & 1 & 14 & 0 
GPT-4-Turbo & One-Shot & 1 & 2 & 1 & 20 & 0 
GPT-4-Turbo & Zero-Shot & 2 & 2 & 2 & 13 & 0 
GPT-4-Turbo & One-Shot & 2 & 2 & 1 & 12 & 0 
