In [1]:
import os
import sys
import os
from collections import defaultdict
import numpy as np

src_path = os.path.abspath(os.path.join(os.getcwd(), ".."))
if src_path not in sys.path:
    sys.path.append(src_path)
DATA_BASEPATH = os.path.abspath(os.path.join(os.getcwd(), "..","../Data"))

from Util.posttree import PostTree
from eval import EvalSimilarity

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
POST_IDS = ['18w4y9z', '1au5njg', '1atcjtj'] 
TEST_TYPE = 'instruct'  # [few_shot, zero_shot, instruct]

for POST_ID in POST_IDS:
    POST_PATH = os.path.abspath(os.path.join(DATA_BASEPATH, 'arcticshift/processed/r_books_posts.csv'))
    ORG_COMMENTPATH = os.path.abspath(os.path.join(DATA_BASEPATH, 'arcticshift/cleaned/r_books_comments_cleaned_and_pruned.csv'))

    # reconstruct original tree
    post_tree = PostTree(
        POST_PATH,
        POST_ID
    )
    post_tree.create_comment_tree(ORG_COMMENTPATH)

    all_levels = defaultdict(float)
    all_counts = defaultdict(int)

    post_folder = os.path.join(DATA_BASEPATH, "eval_results", TEST_TYPE, post_tree.post_id)
    os.makedirs(post_folder, exist_ok=True)
    averages_file = os.path.join(post_folder, "final_averages_results.txt")

    if os.path.exists(averages_file):
        os.remove(averages_file)

    for i in range(10):
        GEN_COMMENTPATH = os.path.abspath(os.path.join(DATA_BASEPATH, f'posttrees/{TEST_TYPE}/{POST_ID}/copy_{i}.json'))

        gen_posttree = PostTree.load_from_json(GEN_COMMENTPATH)

        levels = defaultdict(float)
        counts = defaultdict(int)

        similarity = EvalSimilarity(post_tree, gen_posttree)

        bfs_file = os.path.join(post_folder, f"bfs_results_copy_{i}.txt")
        if os.path.exists(bfs_file):
            os.remove(bfs_file)

        seen = set()

        similarity_tree = similarity.compare_comments()

        for root_node in similarity_tree:
            if root_node.comment_id not in seen:
                similarity.bfs(root_node, levels, counts, bfs_file)
                seen.add(root_node.comment_id)

        # accumulate levels/counts into the all_levels and all_counts
        for depth in levels.keys():
            all_levels[depth] += levels[depth]
            all_counts[depth] += counts[depth]

    with open(averages_file, "w") as file:
        file.write("Final Average Similarity Scores by Depth (across all 10 generated trees):\n")
        print("\nFinal Average Similarity Scores by Depth (across all 10 generated trees):")
        averages = []
        for depth in sorted(all_levels.keys()):
            if all_counts[depth] > 0:
                standard_dev = np.std(all_levels[depth])
                average = all_levels[depth] / all_counts[depth]
                averages.append(average)
                line = f"Depth {depth}: Average Similarity Score = {average:.2f}, Level Standard Deviation = {standard_dev:.2f}"
                print(line)
                file.write(line + "\n")
            else:
                # If for some reason no counts, just skip or note it
                line = f"Depth {depth}: No data."
                print(line)
                file.write(line + "\n")
        std_ss_across_levels = np.std(averages)
        file.write(f"Standard Deviation of Semantic Simularity Across All Levels of the Post: {std_ss_across_levels}")


['Eragon? It tried to adapt the story but it utterly failed.', 'For me: The Martian by Andy Weir. It was one of those books that I picked because I was so interested in the subject. I like space, and the storyline was interesting so I watched the movie as soon as it was out. Then I read the book and was greatly disappointed. For some reason the movie made a great ending out of it, where I was left with a "what can they do next" or "wait and see what they do then..." The book ended on like a low tone of "that\'s it? I wonder what they will do now?" If I knew how terrible the book would be then I wouldn\'t have read it. Watching the movie didn\'t turn me off the book (it was okay), but it didn\'t sell the book very well either.']
['The Golden Compass', "This question assumes you haven't read all of the series in most cases. I can think of plenty of series that I've loved as a whole and found only the first book boring by comparison."]
['Neverending Story', 'The Princess Bride']
['The Shi