In [55]:
from typing import List, Dict
import tiktoken
import json
import pandas as pd

def get_context_length(entry: Dict, encoding_name: str = "cl100k_base") -> int:
    encoding = tiktoken.get_encoding(encoding_name)
    num_tokens = len(encoding.encode(entry["context_str"]))
    return num_tokens


def get_source_types(entry: Dict) -> List[str]:
    context = entry["context"]
    source_types = [x["index"] for x in context]
    return source_types


def get_df_stats(model_name: str, config_str: str):

    data_file = f"../data/results/{config_str}/all_dependencies_all_{model_name}.json"
    baseline_file = "../data/results/all_dependencies.csv"

    with open(data_file, "r", encoding="utf-8") as src:
        data = json.load(src)

    df = pd.read_csv(baseline_file)

    invalid_json_count = 0
    key_error_count = 0
    
    for entry, (index, row) in zip(data, df.iterrows()):

        assert entry["index"] == index
        
        response_dict = None
        is_Dependency = None

        try:
            if "responses" in entry:
                response = entry["responses"][0]
            else:
                response = entry["response"]
            response_dict = json.loads(response, strict=False)
        except json.JSONDecodeError as error:
            print("Entry: ", entry["index"], error)

            invalid_json_count += 1
            continue
        
        if not response_dict:
            print("empty")

        if "isDependency" not in response_dict:
            key_error_count += 1
    
    print("Invalid JSON count: ", invalid_json_count)
    print("KeyError count: ", key_error_count)


model_names = ["gpt-3.5-turbo-0125", "gpt-4o-2024-05-13", "llama3:8b", "llama3:70b"]
config_str = "config2"

#for name in model_names:
#    df = get_df_stats(model_name=name, config_str=config_str)
#    df.to_csv(f"../data/analysis/{config_str}/{name}.csv", index=False)

get_df_stats(model_name="llama3:8b", config_str=config_str)
#df.to_csv(f"../data/analysis/{config_str}/llama3:70b.csv", index=False)

Entry:  177 Expecting ':' delimiter: line 71 column 1 (char 1069)
Entry:  210 Expecting ':' delimiter: line 6 column 34 (char 762)
Invalid JSON count:  2
KeyError count:  88
