In [1]:
import os
import sys
import os
from collections import defaultdict
import numpy as np

src_path = os.path.abspath(os.path.join(os.getcwd(), ".."))
if src_path not in sys.path:
    sys.path.append(src_path)
DATA_BASEPATH = os.path.abspath(os.path.join(os.getcwd(), "..","../Data"))

from Util.posttree import PostTree
from eval import EvalSimilarity

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
POST_IDS = ['18vx8ip', '18wom5l']
TEST_TYPE = 'zero_shot'  # [few_shot, zero_shot, instruct]

for POST_ID in POST_IDS:
    POST_PATH = os.path.abspath(os.path.join(DATA_BASEPATH, 'arcticshift/processed/r_books_posts.csv'))
    ORG_COMMENTPATH = os.path.abspath(os.path.join(DATA_BASEPATH, 'arcticshift/cleaned/r_books_comments_cleaned_and_pruned.csv'))

    # reconstruct original tree
    post_tree = PostTree(
        POST_PATH,
        POST_ID
    )
    post_tree.create_comment_tree(ORG_COMMENTPATH)

    all_levels = defaultdict(float)
    all_counts = defaultdict(int)

    post_folder = os.path.join(DATA_BASEPATH, "eval_results", TEST_TYPE, post_tree.post_id)
    os.makedirs(post_folder, exist_ok=True)
    averages_file = os.path.join(post_folder, "final_averages_results.txt")

    if os.path.exists(averages_file):
        os.remove(averages_file)

    for i in range(10):
        GEN_COMMENTPATH = os.path.abspath(os.path.join(DATA_BASEPATH, f'posttrees/{TEST_TYPE}/{POST_ID}/copy_{i}.json'))

        gen_posttree = PostTree.load_from_json(GEN_COMMENTPATH)

        levels = defaultdict(float)
        counts = defaultdict(int)

        similarity = EvalSimilarity(post_tree, gen_posttree)

        bfs_file = os.path.join(post_folder, f"bfs_results_copy_{i}.txt")
        if os.path.exists(bfs_file):
            os.remove(bfs_file)

        seen = set()

        similarity_tree = similarity.compare_comments()

        for root_node in similarity_tree:
            if root_node.comment_id not in seen:
                similarity.bfs(root_node, levels, counts, bfs_file)
                seen.add(root_node.comment_id)

        # accumulate levels/counts into the all_levels and all_counts
        for depth in levels.keys():
            all_levels[depth] += levels[depth]
            all_counts[depth] += counts[depth]

    with open(averages_file, "w") as file:
        file.write("Final Average Similarity Scores by Depth (across all 10 generated trees):\n")
        print("\nFinal Average Similarity Scores by Depth (across all 10 generated trees):")
        averages = []
        for depth in sorted(all_levels.keys()):
            if all_counts[depth] > 0:
                standard_dev = np.std(all_levels[depth])
                average = all_levels[depth] / all_counts[depth]
                averages.append(average)
                line = f"Depth {depth}: Average Similarity Score = {average:.2f}, Level Standard Deviation = {standard_dev:.2f}"
                print(line)
                file.write(line + "\n")
            else:
                # If for some reason no counts, just skip or note it
                line = f"Depth {depth}: No data."
                print(line)
                file.write(line + "\n")
        std_ss_across_levels = np.std(averages)
        file.write(f"Standard Deviation of Semantic Simularity Across All Levels of the Post: {std_ss_across_levels}")


['The average novel is about 8 hours in audio and most people read faster than that. Remember most novels are under 500 pages. It all depends on the book. Some are light and you fly through them even if they are huge. Other 250 page books take forever to read.', 'The average novel is about 8 hours in audio and most people read faster than that. Remember most novels are under 500 pages. It all depends on the book. Some are light and you fly through them even if they are huge. Other 250 page books take forever to read.']
['How does a novel take 15 hours to read? A reasonable pace is something like 80-100 pages an hour with a 500 page book that is 5-6 hours. Most novels are in the 300 something page area so that is 3-4 hours pure reading time.', 'How does a novel take 15 hours to read? A reasonable pace is something like 80-100 pages an hour with a 500 page book that is 5-6 hours. Most novels are in the 300 something page area so that is 3-4 hours pure reading time.']
["Depending on your 

KeyboardInterrupt: 