In [1]:
import os
import sys

src_path = os.path.abspath(os.path.join(os.getcwd(), ".."))
if src_path not in sys.path:
    sys.path.append(src_path)
DATA_BASEPATH = os.path.abspath(os.path.join(os.getcwd(), "..","../Data"))

from Util.posttree import PostTree
from eval import EvalSimilarity


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
POST_PATH = os.path.abspath(os.path.join(DATA_BASEPATH, 'arcticshift/processed/r_books_posts.csv'))
POST_ID = '18vuw2v'
ORG_COMMENTPATH = os.path.abspath(os.path.join(DATA_BASEPATH, 'arcticshift/cleaned/r_books_comments_cleaned_and_pruned.csv'))
GEN_COMMENTPATH = os.path.abspath(os.path.join(DATA_BASEPATH, f'posttrees/{POST_ID}.json'))

In [3]:
post_tree = PostTree(
    POST_PATH,
    POST_ID
    )
post_tree.create_comment_tree(ORG_COMMENTPATH)
gen_posttree = PostTree.load_from_json(GEN_COMMENTPATH)

In [4]:
import os
from collections import deque, defaultdict

def bfs(root, levels, counts, bfs_file):
    """
    Perform BFS and calculate the cumulative similarity scores and counts at each level.

    Parameters:
        root: The root node of the tree.
        levels: Dictionary to store cumulative similarity scores at each depth.
        counts: Dictionary to store the count of nodes at each depth.
        bfs_file: File to write BFS traversal results.
    """
    with open(bfs_file, "a") as file:  # Append to BFS results file
        queue = deque([(root, 0)])  # Initialize the queue with the root and its depth

        while queue:
            current, depth = queue.popleft()

            # Update the levels and counts dictionaries
            levels[depth] += current.similarity_score
            counts[depth] += 1

            indent = "    " * depth
            line = (f"{indent}Parent ID: {current.parent_comment_id}, "
                    f"Comment ID: {current.comment_id}, "
                    f"Similarity: {current.similarity_score:.2f}")
            print(line)
            file.write(line + "\n")  # Write BFS traversal result to the file

            for child in current.children:
                queue.append((child, depth + 1))

Parent ID: None, Comment ID: kftmvg3, Similarity: 0.41
    Parent ID: kftmvg3, Comment ID: kftr3qa, Similarity: 0.14
    Parent ID: kftmvg3, Comment ID: kfudn71, Similarity: 0.39
    Parent ID: kftmvg3, Comment ID: kfuiqc5, Similarity: 0.07
    Parent ID: kftmvg3, Comment ID: kfukcl0, Similarity: 0.13
    Parent ID: kftmvg3, Comment ID: kfutxpg, Similarity: 0.26
    Parent ID: kftmvg3, Comment ID: kfvf19w, Similarity: 0.36
Parent ID: None, Comment ID: kftnl1l, Similarity: 0.45
    Parent ID: kftnl1l, Comment ID: kftr5s6, Similarity: 0.28
    Parent ID: kftnl1l, Comment ID: kfudrw7, Similarity: 0.47
        Parent ID: kfudrw7, Comment ID: kfulqaj, Similarity: 0.03
Parent ID: None, Comment ID: kftr41b, Similarity: 0.53
    Parent ID: kftr41b, Comment ID: kftraq6, Similarity: 0.06
    Parent ID: kftr41b, Comment ID: kfu17b0, Similarity: 0.28
        Parent ID: kftraq6, Comment ID: kftrfio, Similarity: 0.65
        Parent ID: kfu17b0, Comment ID: kfu1hus, Similarity: 0.20
        Parent ID

In [5]:
levels = defaultdict(float)
counts = defaultdict(int)

similarity = EvalSimilarity(post_tree, gen_posttree)

post_folder = f"/home/ujx4ab/ondemand/CBM_Final_Project/Data/eval_results/zero_shot/{post_tree.post_id}"
os.makedirs(post_folder, exist_ok=True)  # Create a folder named after the post_id

bfs_file = os.path.join(post_folder, "bfs_results.txt")
averages_file = os.path.join(post_folder, "averages_results.txt")

# Track visited nodes to prevent duplication
seen = set()
for node in similarity.compare_comments():
    if node.comment_id not in seen:
        bfs(node, levels, counts, bfs_file)
        seen.add(node.comment_id)

with open(averages_file, "w") as file:
    file.write("Average Similarity Scores by Depth:\n")
    print("\nAverage Similarity Scores by Depth:")
    for depth in sorted(levels.keys()):
        average = levels[depth] / counts[depth]
        line = f"Depth {depth}: Average Similarity Score = {average:.2f}"
        print(line)
        file.write(line + "\n")

Parent ID: None, Comment ID: kftmvg3, Similarity: 0.41
    Parent ID: kftmvg3, Comment ID: kftr3qa, Similarity: 0.14
    Parent ID: kftmvg3, Comment ID: kfudn71, Similarity: 0.39
    Parent ID: kftmvg3, Comment ID: kfuiqc5, Similarity: 0.07
    Parent ID: kftmvg3, Comment ID: kfukcl0, Similarity: 0.13
    Parent ID: kftmvg3, Comment ID: kfutxpg, Similarity: 0.26
    Parent ID: kftmvg3, Comment ID: kfvf19w, Similarity: 0.36
Parent ID: None, Comment ID: kftnl1l, Similarity: 0.45
    Parent ID: kftnl1l, Comment ID: kftr5s6, Similarity: 0.28
    Parent ID: kftnl1l, Comment ID: kfudrw7, Similarity: 0.47
        Parent ID: kfudrw7, Comment ID: kfulqaj, Similarity: 0.03
Parent ID: None, Comment ID: kftr41b, Similarity: 0.53
    Parent ID: kftr41b, Comment ID: kftraq6, Similarity: 0.06
    Parent ID: kftr41b, Comment ID: kfu17b0, Similarity: 0.28
        Parent ID: kftraq6, Comment ID: kftrfio, Similarity: 0.65
        Parent ID: kfu17b0, Comment ID: kfu1hus, Similarity: 0.20
        Parent ID

In [6]:
Average Similarity Scores by Depth:
Depth 0: Average Similarity Score = 0.38
Depth 1: Average Similarity Score = 0.26
Depth 2: Average Similarity Score = 0.27
Depth 3: Average Similarity Score = 0.23
Depth 4: Average Similarity Score = 0.20
Depth 5: Average Similarity Score = 0.20
Depth 6: Average Similarity Score = 0.33

SyntaxError: invalid syntax (817343598.py, line 1)