In [13]:
from typing import List, Dict
import tiktoken
import json
import pandas as pd

def get_context_length(entry: Dict, encoding_name: str = "cl100k_base") -> int:
    encoding = tiktoken.get_encoding(encoding_name)
    num_tokens = len(encoding.encode(entry["context_str"]))
    return num_tokens


def get_source_types(entry: Dict) -> List[str]:
    context = entry["context"]
    source_types = [x["index"] for x in context]
    return source_types


def get_df_stats(model_name: str, config_str: str):

    data_file = f"../data/results/{config_str}/all_dependencies_all_{model_name}.json"
    baseline_file = "../data/results/all_dependencies.csv"

    with open(data_file, "r", encoding="utf-8") as src:
        data = json.load(src)

    df = pd.read_csv(baseline_file)

    response_rating = []
    classification = []
    rationale = []
    plan = []
    uncertainty = []
    context_length = []
    source_types = []
    
    for entry, (index, row) in zip(data, df.iterrows()):


        assert entry["index"] == index
        
        if "responses" in entry:
            response = entry["responses"][0]
        else:
            response = entry["response"]

        try:
            if "responses" in entry:
                response = entry["responses"][0]
            else:
                response = entry["response"]
            response_dict = json.loads(response)
            isDependency = response_dict["isDependency"]
        except json.JSONDecodeError:
            pass
        except KeyError:
            return


        context_length.append(get_context_length(entry=entry))
        source_types.append(get_source_types(entry=entry))

        rating = row["final_rating"]
        
        if str(rating).lower() == "borderline":
            if isDependency:
                response_rating.append(str(isDependency))
                classification.append("TP")
                plan.append(response_dict["plan"])
                rationale.append(response_dict["rationale"])
                uncertainty.append(response_dict["uncertainty"])
            if not isDependency:
                response_rating.append(str(isDependency))
                classification.append("TN")
                plan.append(response_dict["plan"])
                rationale.append(response_dict["rationale"])
                uncertainty.append(response_dict["uncertainty"])

        # FP: The LLM validates a dependency as correct, but the dependency is actually incorrect
        if isDependency and str(rating).lower() == "false":
            response_rating.append(str(isDependency))
            classification.append("FP")
            plan.append(response_dict["plan"])
            rationale.append(response_dict["rationale"])
            uncertainty.append(response_dict["uncertainty"])


        # FN: The LLM validates a dependency as incorrect, but the dependency is actually correct
        if not isDependency and  str(rating).lower() == "true":
            response_rating.append(str(isDependency))
            classification.append("FN")
            plan.append(response_dict["plan"])
            rationale.append(response_dict["rationale"])
            uncertainty.append(response_dict["uncertainty"])

        # TP: The LLM validates a dependency as correct and the dependency is correct
        if isDependency and str(rating).lower() == "true":
            response_rating.append(str(isDependency))
            classification.append("TP")
            plan.append(response_dict["plan"])
            rationale.append(response_dict["rationale"])
            uncertainty.append(response_dict["uncertainty"])

        # TN: The LLM validates a dependency as incorrect and the dependency is incorrect
        if not isDependency and str(rating).lower() == "false":
            response_rating.append(str(isDependency))
            classification.append("TN")
            plan.append(response_dict["plan"])
            rationale.append(response_dict["rationale"])
            uncertainty.append(response_dict["uncertainty"])

    df[f"response_rating"] = response_rating
    df["classification"] = classification
    df["plan"] = plan
    df["rationale"] = rationale
    df["uncertainty"] = uncertainty
    df["context_length"] = context_length
    df["source_types"] = source_types

    return df


model_names = ["gpt-3.5-turbo-0125", "gpt-4o-2024-05-13", "llama3:8b", "llama3:70b"]
config_str = "config5"

for name in model_names:
    df = get_df_stats(model_name=name, config_str=config_str)
    df.to_csv(f"../data/analysis/{config_str}/{name}.csv", index=False)

AttributeError: 'NoneType' object has no attribute 'to_csv'