In [3]:
from typing import Dict, List
from generator import GeneratorEngine
from generator import GeneratorFactory
from prompt_settings import AdvancedCfgNetPromptSettings
from dotenv import load_dotenv
from data import Dependency
from util import get_projet_description, get_most_similar_shots, load_shots
from collections import Counter
import pandas as pd
import backoff
import json
from tqdm import tqdm


def transform(row: pd.Series) -> Dependency:
    dependency = Dependency(
        project=row["project"],
        option_name=row["option_name"],
        option_value=row["option_value"],
        option_type=row["option_type"].split(".")[-1],
        option_file=row["option_file"],
        option_technology=row["option_technology"],
        dependent_option_name=row["dependent_option_name"],
        dependent_option_value=row["dependent_option_value"],
        dependent_option_type=row["dependent_option_type"].split(".")[-1],
        dependent_option_file=row["dependent_option_file"],
        dependent_option_technology=row["dependent_option_technology"]
    )

    return dependency


@backoff.on_exception(backoff.expo, Exception, max_tries=3)
def generate(generator: GeneratorEngine, messages: List) -> str:
    response = generator.generate(messages=messages)

    if not response:
        raise Exception("Response is empty.")
    
    try:
        response_dict = json.loads(response, strict=False)
        if "isDependency" not in response_dict:
            raise Exception("KeyError: isDependency")
    except json.JSONDecodeError:
        raise Exception("Response format not serializable.")

    return response


def run_generation(df: pd.DataFrame, model_name: str) -> List[str]:

    prompt_settings = AdvancedCfgNetPromptSettings
    generator = GeneratorFactory().get_generator(
        model_name=model_name, 
        temperature=0.0
    )
    shots = load_shots()

    results = []
    project_info = []
    shot_info = []



    for _, row in tqdm(df.iterrows(), total=len(df), desc="Processing rows"):
        dependency = transform(row=row)

        project_str = get_projet_description(project_name=row["project"])
        context_str = row["context_str"]
        task_str = prompt_settings.get_task_str(dependency=dependency)
        shots_str = "\n\n".join([shot for shot in get_most_similar_shots(shots, dependency)])
        format_str = prompt_settings.get_format_prompt()

        system_prompt = prompt_settings.get_system_str(
            dependency=dependency,
            project_str=project_str
        )

        user_prompt = prompt_settings.advanced_query_prompt.format(
                context_str=context_str, 
                shot_str=shots_str,
                task_str=task_str,
                format_str=format_str
        )

        messages = [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ]

        try:
            response = generate(
                generator=generator,
                messages=messages
            )
        except Exception:
            response = "None"

        shot_info.append(shots_str)
        project_info.append(project_str)
        results.append(response)

    df["responses"] = results
    df["shot_info"] = shot_info
    df["project_info"] = project_info

    df.to_csv(f"../data/analysis/failures_{model_name}.csv", index=False)

In [4]:
model_name = "gpt-4o-2024-05-13"
df_failures = pd.read_csv("../data/analysis/failures_annotated.csv")
df_failures = df_failures[df_failures["config"] == "config2"]
df_failures= df_failures[df_failures["llm"] == model_name]

print(f"Num Failures for {model_name}: {len(df_failures)}")

env_file = "../.env"
load_dotenv(dotenv_path=env_file)
run_generation(df=df_failures, model_name=model_name)

Num Failures for gpt-4o-2024-05-13: 116


Processing rows:   0%|          | 0/116 [00:00<?, ?it/s]

Processing rows:   1%|          | 1/116 [00:03<07:22,  3.85s/it]

Processing rows:   2%|▏         | 2/116 [00:08<08:15,  4.35s/it]

Processing rows:   3%|▎         | 3/116 [00:13<08:21,  4.44s/it]

Processing rows:   3%|▎         | 4/116 [00:17<07:57,  4.26s/it]

Processing rows:   4%|▍         | 5/116 [00:22<08:42,  4.71s/it]

Processing rows:   5%|▌         | 6/116 [00:28<09:33,  5.22s/it]

Processing rows:   6%|▌         | 7/116 [00:32<08:51,  4.88s/it]

Processing rows:   7%|▋         | 8/116 [00:38<08:53,  4.94s/it]

Processing rows:   8%|▊         | 9/116 [00:42<08:35,  4.82s/it]

Processing rows:   9%|▊         | 10/116 [00:47<08:32,  4.83s/it]

Processing rows:   9%|▉         | 11/116 [00:51<08:07,  4.65s/it]

Processing rows:  10%|█         | 12/116 [00:56<07:55,  4.58s/it]

Processing rows:  11%|█         | 13/116 [01:00<07:52,  4.58s/it]

Processing rows:  12%|█▏        | 14/116 [01:04<07:31,  4.43s/it]

Processing rows:  13%|█▎        | 15/116 [01:08<07:08,  4.24s/it]

Processing rows:  14%|█▍        | 16/116 [01:12<06:44,  4.04s/it]

Processing rows:  15%|█▍        | 17/116 [01:15<06:28,  3.93s/it]

Processing rows:  16%|█▌        | 18/116 [01:20<06:36,  4.04s/it]

Processing rows:  16%|█▋        | 19/116 [01:23<06:21,  3.93s/it]

Processing rows:  17%|█▋        | 20/116 [01:27<06:24,  4.00s/it]

Processing rows:  18%|█▊        | 21/116 [01:32<06:30,  4.11s/it]

Processing rows:  19%|█▉        | 22/116 [01:37<06:46,  4.32s/it]

Processing rows:  20%|█▉        | 23/116 [01:40<06:27,  4.17s/it]

Processing rows:  21%|██        | 24/116 [01:43<05:46,  3.77s/it]

Processing rows:  22%|██▏       | 25/116 [01:48<06:15,  4.13s/it]

Processing rows:  22%|██▏       | 26/116 [01:52<06:03,  4.04s/it]

Processing rows:  23%|██▎       | 27/116 [01:56<05:42,  3.85s/it]

Processing rows:  24%|██▍       | 28/116 [02:02<06:51,  4.68s/it]

Processing rows:  25%|██▌       | 29/116 [02:05<05:58,  4.12s/it]

Processing rows:  26%|██▌       | 30/116 [02:09<05:54,  4.12s/it]

Processing rows:  27%|██▋       | 31/116 [02:12<05:26,  3.85s/it]

Processing rows:  28%|██▊       | 32/116 [02:16<05:26,  3.89s/it]

Processing rows:  28%|██▊       | 33/116 [02:20<05:13,  3.78s/it]

Processing rows:  29%|██▉       | 34/116 [02:24<05:09,  3.77s/it]

Processing rows:  30%|███       | 35/116 [02:28<05:16,  3.90s/it]

Processing rows:  31%|███       | 36/116 [02:31<05:08,  3.86s/it]

Processing rows:  32%|███▏      | 37/116 [02:36<05:16,  4.01s/it]

Processing rows:  33%|███▎      | 38/116 [02:40<05:11,  3.99s/it]

Processing rows:  34%|███▎      | 39/116 [02:43<04:54,  3.82s/it]

Processing rows:  34%|███▍      | 40/116 [02:46<04:22,  3.46s/it]

Processing rows:  35%|███▌      | 41/116 [02:49<04:10,  3.33s/it]

Processing rows:  36%|███▌      | 42/116 [02:53<04:22,  3.55s/it]

Processing rows:  37%|███▋      | 43/116 [02:56<04:09,  3.42s/it]

Processing rows:  38%|███▊      | 44/116 [02:59<03:57,  3.30s/it]

Processing rows:  39%|███▉      | 45/116 [03:02<03:46,  3.19s/it]

Processing rows:  40%|███▉      | 46/116 [03:06<03:55,  3.36s/it]

Processing rows:  41%|████      | 47/116 [03:09<03:47,  3.29s/it]

Processing rows:  41%|████▏     | 48/116 [03:13<04:03,  3.58s/it]

Processing rows:  42%|████▏     | 49/116 [03:17<04:09,  3.72s/it]

Processing rows:  43%|████▎     | 50/116 [03:20<03:52,  3.52s/it]

Processing rows:  44%|████▍     | 51/116 [03:23<03:34,  3.30s/it]

Processing rows:  45%|████▍     | 52/116 [03:27<03:48,  3.58s/it]

Processing rows:  46%|████▌     | 53/116 [03:32<04:02,  3.85s/it]

Processing rows:  47%|████▋     | 54/116 [03:35<03:48,  3.68s/it]

Processing rows:  47%|████▋     | 55/116 [03:41<04:31,  4.46s/it]

Processing rows:  48%|████▊     | 56/116 [03:49<05:18,  5.31s/it]

Processing rows:  49%|████▉     | 57/116 [03:52<04:43,  4.81s/it]

Processing rows:  50%|█████     | 58/116 [03:56<04:24,  4.56s/it]

Processing rows:  51%|█████     | 59/116 [03:59<03:56,  4.16s/it]

Processing rows:  52%|█████▏    | 60/116 [04:03<03:49,  4.10s/it]

Processing rows:  53%|█████▎    | 61/116 [04:07<03:37,  3.96s/it]

Processing rows:  53%|█████▎    | 62/116 [04:11<03:32,  3.94s/it]

Processing rows:  54%|█████▍    | 63/116 [04:15<03:29,  3.96s/it]

Processing rows:  55%|█████▌    | 64/116 [04:20<03:41,  4.25s/it]

Processing rows:  56%|█████▌    | 65/116 [04:24<03:38,  4.28s/it]

Processing rows:  57%|█████▋    | 66/116 [04:29<03:44,  4.48s/it]

Processing rows:  58%|█████▊    | 67/116 [04:33<03:36,  4.41s/it]

Processing rows:  59%|█████▊    | 68/116 [04:38<03:34,  4.46s/it]

Processing rows:  59%|█████▉    | 69/116 [04:41<03:12,  4.11s/it]

Processing rows:  60%|██████    | 70/116 [04:45<03:00,  3.93s/it]

Processing rows:  61%|██████    | 71/116 [04:48<02:50,  3.79s/it]

Processing rows:  62%|██████▏   | 72/116 [04:52<02:48,  3.82s/it]

Processing rows:  63%|██████▎   | 73/116 [04:56<02:40,  3.74s/it]

Processing rows:  64%|██████▍   | 74/116 [04:59<02:27,  3.50s/it]

Processing rows:  65%|██████▍   | 75/116 [05:03<02:35,  3.80s/it]

Processing rows:  66%|██████▌   | 76/116 [05:07<02:27,  3.69s/it]

Processing rows:  66%|██████▋   | 77/116 [05:09<02:13,  3.42s/it]

Processing rows:  67%|██████▋   | 78/116 [05:13<02:10,  3.43s/it]

Processing rows:  68%|██████▊   | 79/116 [05:17<02:18,  3.75s/it]

Processing rows:  69%|██████▉   | 80/116 [05:22<02:20,  3.89s/it]

Processing rows:  70%|██████▉   | 81/116 [05:26<02:19,  4.00s/it]

Processing rows:  71%|███████   | 82/116 [05:30<02:16,  4.02s/it]

Processing rows:  72%|███████▏  | 83/116 [05:34<02:14,  4.07s/it]

Processing rows:  72%|███████▏  | 84/116 [05:37<02:03,  3.86s/it]

Processing rows:  73%|███████▎  | 85/116 [05:40<01:48,  3.50s/it]

Processing rows:  74%|███████▍  | 86/116 [05:44<01:46,  3.55s/it]

Processing rows:  75%|███████▌  | 87/116 [05:47<01:39,  3.43s/it]

Processing rows:  76%|███████▌  | 88/116 [05:53<01:59,  4.26s/it]

Processing rows:  77%|███████▋  | 89/116 [05:57<01:50,  4.11s/it]

Processing rows:  78%|███████▊  | 90/116 [06:00<01:40,  3.87s/it]

Processing rows:  78%|███████▊  | 91/116 [06:04<01:39,  3.97s/it]

Processing rows:  79%|███████▉  | 92/116 [06:09<01:36,  4.04s/it]

Processing rows:  80%|████████  | 93/116 [06:12<01:27,  3.81s/it]

Processing rows:  81%|████████  | 94/116 [06:16<01:28,  4.03s/it]

Processing rows:  82%|████████▏ | 95/116 [06:20<01:23,  3.97s/it]

Processing rows:  83%|████████▎ | 96/116 [06:25<01:21,  4.09s/it]

Processing rows:  84%|████████▎ | 97/116 [06:29<01:17,  4.10s/it]

Processing rows:  84%|████████▍ | 98/116 [06:32<01:11,  3.95s/it]

Processing rows:  85%|████████▌ | 99/116 [06:38<01:15,  4.47s/it]

Processing rows:  86%|████████▌ | 100/116 [06:43<01:15,  4.69s/it]

Processing rows:  87%|████████▋ | 101/116 [06:48<01:10,  4.72s/it]

Processing rows:  88%|████████▊ | 102/116 [06:54<01:13,  5.23s/it]

Processing rows:  89%|████████▉ | 103/116 [06:59<01:03,  4.90s/it]

Processing rows:  90%|████████▉ | 104/116 [07:02<00:53,  4.45s/it]

Processing rows:  91%|█████████ | 105/116 [07:06<00:46,  4.21s/it]

Processing rows:  91%|█████████▏| 106/116 [07:14<00:54,  5.49s/it]

Processing rows:  92%|█████████▏| 107/116 [07:19<00:46,  5.19s/it]

Processing rows:  93%|█████████▎| 108/116 [07:23<00:39,  4.94s/it]

Processing rows:  94%|█████████▍| 109/116 [07:30<00:38,  5.48s/it]

Processing rows:  95%|█████████▍| 110/116 [07:36<00:34,  5.68s/it]

Processing rows:  96%|█████████▌| 111/116 [07:40<00:26,  5.27s/it]

Processing rows:  97%|█████████▋| 112/116 [07:44<00:18,  4.73s/it]

Processing rows:  97%|█████████▋| 113/116 [07:47<00:13,  4.44s/it]

Processing rows:  98%|█████████▊| 114/116 [07:51<00:08,  4.21s/it]

Processing rows:  99%|█████████▉| 115/116 [07:55<00:04,  4.04s/it]

Processing rows: 100%|██████████| 116/116 [07:58<00:00,  4.13s/it]
