In [1]:
import plotly.graph_objects as go
import plotly.figure_factory as ff
import pandas as pd

In [2]:
import os
import sys
sys.path.append('\\'.join(os.getcwd().split('\\')[:-1])+'\\src')

from src.visualization.PairResultVisualizer import PairResultVisualizer

In [3]:
def count_words(sentence):
  """
    Counts the number of words in a sentence.
  """
  splitted = sentence.split(" ")

  return len(splitted)

In [4]:
def plot_length_distribution_comparison(df_0s, df_1s, name):
    """
        Plots the distribution of the length of the reviews for zero and one shot predictions.
    """
    lens_0s = [count_words(sentence) for sentence in df_0s['NAME']]
    lens_1s = [count_words(sentence) for sentence in df_1s['NAME']]

    fig = go.Figure()

    fig.add_trace(go.Histogram(x=lens_0s, name="Zero-Shot", marker_color="#D0006F"))
    fig.add_trace(go.Histogram(x=lens_1s, name="One-Shot", marker_color="#24135F"))

    fig.update_layout(
        title=f"{name} - Question length distribution",
        xaxis_title="Length",
        yaxis_title="Frequency"
    )

    fig.show()

In [5]:
import json
config_path = os.path.join(os.path.abspath(os.path.join(os.getcwd(), os.pardir)), "src", "visualization", "experiment_pairs.json")
with open(config_path, "r") as f:
    experiment_confs = json.load(f)
f.close()

In [6]:
pairs = experiment_confs["pairs"]

In [7]:
gpt_35_0t_0fp = PairResultVisualizer(pairs[4])
gpt_35_0t_0fp.load_data(project_root=os.path.abspath(os.path.join(os.getcwd(), os.pardir)))

In [8]:
plot_length_distribution_comparison(gpt_35_0t_0fp.zero_shot_questions, gpt_35_0t_0fp.one_shot_questions, pairs[0]["id"])

In [9]:
plot_length_distribution_comparison(gpt_35_0t_0fp.zero_shot_full_questions, gpt_35_0t_0fp.one_shot_full_questions, pairs[0]["id"])

In [10]:
gpt_4_0t_0fp = PairResultVisualizer(pairs[0])
gpt_4_0t_0fp.load_data(project_root=os.path.abspath(os.path.join(os.getcwd(), os.pardir)))

In [11]:
plot_length_distribution_comparison(gpt_4_0t_0fp.zero_shot_questions, gpt_4_0t_0fp.one_shot_questions, pairs[0]["id"])

In [12]:
import plotly.figure_factory as ff

def plot_density_distribution_comparison_with_ref(df_ref, df_0s, df_1s, name):
    """
    Plots the distribution of the length of the reviews for zero and one shot predictions.
    """
    lens_ref = [len(sentence.split()) for sentence in df_ref['NAME']]
    lens_0s = [len(sentence.split()) for sentence in df_0s['NAME']]
    lens_1s = [len(sentence.split()) for sentence in df_1s['NAME']]

    hist_data = [lens_ref, lens_0s, lens_1s]

    group_labels = ["Reference", "Zero-Shot", "One-Shot"]

    fig = ff.create_distplot(hist_data, group_labels, show_hist=False, colors=["#088DA5", "#D0006F", "#372082"])

    # Add filled area under the curve
    fig.update_traces(fill='tozeroy', fillcolor='rgba(8, 141, 165, 0.2)', selector=dict(type='scatter', name='Reference'))
    fig.update_traces(fill='tozeroy', fillcolor='rgba(208, 0, 111, 0.2)', selector=dict(type='scatter', name='Zero-Shot'))
    fig.update_traces(fill='tozeroy', fillcolor='rgba(55, 32, 130, 0.2)', selector=dict(type='scatter', name='One-Shot'))

    fig.update_layout(
        title=f"{name} - Question length density distribution",
        xaxis_title="Length",
        yaxis_title="Count"
    )

    fig.show()


In [13]:
def plot_length_distribution_comparison_with_ref(df_ref, df_0s, df_1s, name):
    """
        Plots the distribution of the length of the reviews for zero and one shot predictions.
    """
    lens_ref = [len(sentence.split()) for sentence in df_ref['NAME']]
    lens_0s = [count_words(sentence) for sentence in df_0s['NAME']]
    lens_1s = [count_words(sentence) for sentence in df_1s['NAME']]

    fig = go.Figure()

    fig.add_trace(go.Histogram(x=lens_0s, name="Reference", marker_color="#68E1EA"))
    fig.add_trace(go.Histogram(x=lens_0s, name="Reference", marker_color="#A380D1"))
    fig.add_trace(go.Histogram(x=lens_0s, name="Zero-Shot", marker_color="#4A01A0"))
    fig.add_trace(go.Histogram(x=lens_1s, name="One-Shot", marker_color="#89A9EE"))
    fig.add_trace(go.Histogram(x=lens_1s, name="One-Shot", marker_color="#CB1973"))

    fig.update_layout(
        title=f"{name} - Question length distribution",
        xaxis_title="Length",
        yaxis_title="Frequency"
    )

    fig.show()

In [14]:
plot_length_distribution_comparison_with_ref(gpt_4_0t_0fp.ground_truth_questions, gpt_4_0t_0fp.zero_shot_questions, gpt_4_0t_0fp.one_shot_questions, pairs[0]["id"])

In [15]:
plot_density_distribution_comparison_with_ref(gpt_4_0t_0fp.ground_truth_questions, gpt_4_0t_0fp.zero_shot_questions, gpt_4_0t_0fp.one_shot_questions, pairs[0]["id"])