In [1]:
from typing import Dict, List
from generator import GeneratorEngine
from generator import GeneratorFactory
from prompt_settings import AdvancedCfgNetPromptSettings
from dotenv import load_dotenv
from data import Dependency
from util import get_projet_description, get_most_similar_shots, load_shots
from collections import Counter
import pandas as pd
import backoff
import json
from tqdm import tqdm


def transform(row: pd.Series) -> Dependency:
    dependency = Dependency(
        project=row["project"],
        option_name=row["option_name"],
        option_value=row["option_value"],
        option_type=row["option_type"].split(".")[-1],
        option_file=row["option_file"],
        option_technology=row["option_technology"],
        dependent_option_name=row["dependent_option_name"],
        dependent_option_value=row["dependent_option_value"],
        dependent_option_type=row["dependent_option_type"].split(".")[-1],
        dependent_option_file=row["dependent_option_file"],
        dependent_option_technology=row["dependent_option_technology"]
    )

    return dependency


@backoff.on_exception(backoff.expo, Exception, max_tries=3)
def generate(generator: GeneratorEngine, messages: List) -> str:
    response = generator.generate(messages=messages)

    if not response:
        raise Exception("Response is empty.")
    
    try:
        response_dict = json.loads(response, strict=False)
        if "isDependency" not in response_dict:
            raise Exception("KeyError: isDependency")
    except json.JSONDecodeError:
        raise Exception("Response format not serializable.")

    return response


def run_generation(df: pd.DataFrame, model_name: str) -> List[str]:

    prompt_settings = AdvancedCfgNetPromptSettings
    generator = GeneratorFactory().get_generator(
        model_name=model_name, 
        temperature=0.0
    )
    shots = load_shots()

    results = []

    for _, row in tqdm(df.iterrows(), total=len(df), desc="Processing rows"):
        dependency = transform(row=row)

        project_str = get_projet_description(project_name=row["project"])
        context_str = row["context_str"]
        task_str = prompt_settings.get_task_str(dependency=dependency)
        shots_str = "\n\n".join([shot for shot in get_most_similar_shots(shots, dependency)])
        format_str = prompt_settings.get_format_prompt()

        system_prompt = prompt_settings.get_system_str(
            dependency=dependency,
            project_str=project_str
        )

        user_prompt = prompt_settings.advanced_query_prompt.format(
                context_str=context_str, 
                shot_str=shots_str,
                task_str=task_str,
                format_str=format_str
        )

        messages = [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ]


        try:
            response = generate(
                generator=generator,
                messages=messages
            )
        except Exception:
            response = "None"

        results.append(response)

    df["responses"] = response

    df.to_csv(f"../data/analysis/failures_{model_name}.csv", index=False)

In [5]:
model_name = "gpt-3.5-turbo-0125" # "gpt-4o-2024-05-13"
df_failures = pd.read_csv("../data/analysis/failures_annotated.csv")
df_failures = df_failures[df_failures["config"] == "config2"]
df_failures= df_failures[df_failures["llm"] == model_name]

print(f"Num Failures for {model_name}: {len(df_failures)}")

env_file = "../.env"
load_dotenv(dotenv_path=env_file)
run_generation(df=df_failures, model_name=model_name)

Num Failures for gpt-3.5-turbo-0125: 185


Processing rows:   0%|          | 0/185 [00:00<?, ?it/s]

Processing rows:   1%|          | 1/185 [00:03<09:58,  3.25s/it]

Processing rows:   1%|          | 2/185 [00:07<10:50,  3.55s/it]

Processing rows:   2%|▏         | 3/185 [00:09<09:56,  3.28s/it]

Processing rows:   2%|▏         | 4/185 [00:14<11:17,  3.74s/it]

Processing rows:   3%|▎         | 5/185 [00:18<11:30,  3.84s/it]

Processing rows:   3%|▎         | 6/185 [00:21<10:41,  3.58s/it]

Processing rows:   4%|▍         | 7/185 [00:24<10:18,  3.48s/it]

Processing rows:   4%|▍         | 8/185 [00:28<10:02,  3.40s/it]

Processing rows:   5%|▍         | 9/185 [00:30<09:29,  3.24s/it]

Processing rows:   5%|▌         | 10/185 [00:33<08:53,  3.05s/it]

Processing rows:   6%|▌         | 11/185 [00:38<10:12,  3.52s/it]

Processing rows:   6%|▋         | 12/185 [00:42<10:47,  3.74s/it]

Processing rows:   7%|▋         | 13/185 [00:45<10:36,  3.70s/it]

Processing rows:   8%|▊         | 14/185 [00:49<10:25,  3.66s/it]

Processing rows:   8%|▊         | 15/185 [00:52<10:12,  3.60s/it]

Processing rows:   9%|▊         | 16/185 [00:56<09:50,  3.50s/it]

Processing rows:   9%|▉         | 17/185 [00:59<09:14,  3.30s/it]

Processing rows:  10%|▉         | 18/185 [01:02<09:31,  3.42s/it]

Processing rows:  10%|█         | 19/185 [01:06<10:03,  3.64s/it]

Processing rows:  11%|█         | 20/185 [01:11<10:22,  3.77s/it]

Processing rows:  11%|█▏        | 21/185 [01:15<10:38,  3.89s/it]

Processing rows:  12%|█▏        | 22/185 [01:18<10:14,  3.77s/it]

Processing rows:  12%|█▏        | 23/185 [01:22<10:23,  3.85s/it]

Processing rows:  13%|█▎        | 24/185 [01:27<11:09,  4.16s/it]

Processing rows:  14%|█▎        | 25/185 [01:31<11:12,  4.20s/it]

Processing rows:  14%|█▍        | 26/185 [01:35<10:45,  4.06s/it]

Processing rows:  15%|█▍        | 27/185 [01:38<10:05,  3.83s/it]

Processing rows:  15%|█▌        | 28/185 [01:43<10:26,  3.99s/it]

Processing rows:  16%|█▌        | 29/185 [01:47<10:14,  3.94s/it]

Processing rows:  16%|█▌        | 30/185 [01:51<10:26,  4.04s/it]

Processing rows:  17%|█▋        | 31/185 [01:55<10:18,  4.02s/it]

Processing rows:  17%|█▋        | 32/185 [01:58<09:29,  3.72s/it]

Processing rows:  18%|█▊        | 33/185 [02:02<09:22,  3.70s/it]

Processing rows:  18%|█▊        | 34/185 [02:05<09:18,  3.70s/it]

Processing rows:  19%|█▉        | 35/185 [02:09<09:34,  3.83s/it]

Processing rows:  19%|█▉        | 36/185 [02:14<09:50,  3.97s/it]

Processing rows:  20%|██        | 37/185 [02:18<10:11,  4.13s/it]

Processing rows:  21%|██        | 38/185 [02:21<09:21,  3.82s/it]

Processing rows:  21%|██        | 39/185 [02:25<09:07,  3.75s/it]

Processing rows:  22%|██▏       | 40/185 [02:28<08:32,  3.54s/it]

Processing rows:  22%|██▏       | 41/185 [02:31<08:05,  3.37s/it]

Processing rows:  23%|██▎       | 42/185 [02:34<08:03,  3.38s/it]

Processing rows:  23%|██▎       | 43/185 [02:37<07:25,  3.14s/it]

Processing rows:  24%|██▍       | 44/185 [02:39<06:56,  2.96s/it]

Processing rows:  24%|██▍       | 45/185 [02:43<07:18,  3.13s/it]

Processing rows:  25%|██▍       | 46/185 [02:46<07:33,  3.26s/it]

Processing rows:  25%|██▌       | 47/185 [02:50<07:50,  3.41s/it]

Processing rows:  26%|██▌       | 48/185 [02:56<09:12,  4.03s/it]

Processing rows:  26%|██▋       | 49/185 [02:59<08:21,  3.69s/it]

Processing rows:  27%|██▋       | 50/185 [03:02<08:16,  3.68s/it]

Processing rows:  28%|██▊       | 51/185 [03:06<08:19,  3.73s/it]

Processing rows:  28%|██▊       | 52/185 [03:10<08:30,  3.84s/it]

Processing rows:  29%|██▊       | 53/185 [03:14<08:18,  3.78s/it]

Processing rows:  29%|██▉       | 54/185 [03:18<08:16,  3.79s/it]

Processing rows:  30%|██▉       | 55/185 [03:21<08:06,  3.74s/it]

Processing rows:  30%|███       | 56/185 [03:23<07:03,  3.28s/it]

Processing rows:  31%|███       | 57/185 [03:27<06:58,  3.27s/it]

Processing rows:  31%|███▏      | 58/185 [03:31<07:30,  3.55s/it]

Processing rows:  32%|███▏      | 59/185 [03:34<07:15,  3.45s/it]

Processing rows:  32%|███▏      | 60/185 [03:38<07:41,  3.69s/it]

Processing rows:  33%|███▎      | 61/185 [03:41<07:11,  3.48s/it]

Processing rows:  34%|███▎      | 62/185 [03:44<06:50,  3.34s/it]

Processing rows:  34%|███▍      | 63/185 [03:48<06:41,  3.29s/it]

Processing rows:  35%|███▍      | 64/185 [03:52<07:05,  3.52s/it]

Processing rows:  35%|███▌      | 65/185 [03:55<07:14,  3.62s/it]

Processing rows:  36%|███▌      | 66/185 [04:00<07:34,  3.82s/it]

Processing rows:  36%|███▌      | 67/185 [04:03<07:15,  3.69s/it]

Processing rows:  37%|███▋      | 68/185 [04:07<07:18,  3.75s/it]

Processing rows:  37%|███▋      | 69/185 [04:11<07:27,  3.86s/it]

Processing rows:  38%|███▊      | 70/185 [04:14<06:57,  3.63s/it]

Processing rows:  38%|███▊      | 71/185 [04:17<06:13,  3.28s/it]

Processing rows:  39%|███▉      | 72/185 [04:20<06:19,  3.36s/it]

Processing rows:  39%|███▉      | 73/185 [04:25<06:49,  3.66s/it]

Processing rows:  40%|████      | 74/185 [04:30<07:35,  4.10s/it]

Processing rows:  41%|████      | 75/185 [04:33<07:08,  3.89s/it]

Processing rows:  41%|████      | 76/185 [04:36<06:43,  3.70s/it]

Processing rows:  42%|████▏     | 77/185 [04:41<06:56,  3.85s/it]

Processing rows:  42%|████▏     | 78/185 [04:45<07:04,  3.97s/it]

Processing rows:  43%|████▎     | 79/185 [04:48<06:37,  3.75s/it]

Processing rows:  43%|████▎     | 80/185 [04:52<06:26,  3.68s/it]

Processing rows:  44%|████▍     | 81/185 [04:55<06:03,  3.50s/it]

Processing rows:  44%|████▍     | 82/185 [04:58<05:56,  3.46s/it]

Processing rows:  45%|████▍     | 83/185 [05:02<06:03,  3.57s/it]

Processing rows:  45%|████▌     | 84/185 [05:07<06:51,  4.07s/it]

Processing rows:  46%|████▌     | 85/185 [05:10<06:19,  3.80s/it]

Processing rows:  46%|████▋     | 86/185 [05:14<06:10,  3.75s/it]

Processing rows:  47%|████▋     | 87/185 [05:18<06:24,  3.92s/it]

Processing rows:  48%|████▊     | 88/185 [05:26<07:59,  4.94s/it]

Processing rows:  48%|████▊     | 89/185 [05:30<07:26,  4.66s/it]

Processing rows:  49%|████▊     | 90/185 [05:33<06:44,  4.25s/it]

Processing rows:  49%|████▉     | 91/185 [05:37<06:29,  4.14s/it]

Processing rows:  50%|████▉     | 92/185 [05:40<06:08,  3.97s/it]

Processing rows:  50%|█████     | 93/185 [05:44<05:47,  3.77s/it]

Processing rows:  51%|█████     | 94/185 [05:49<06:23,  4.21s/it]

Processing rows:  51%|█████▏    | 95/185 [05:52<05:51,  3.90s/it]

Processing rows:  52%|█████▏    | 96/185 [05:56<05:48,  3.91s/it]

Processing rows:  52%|█████▏    | 97/185 [06:00<05:42,  3.89s/it]

Processing rows:  53%|█████▎    | 98/185 [06:04<05:39,  3.90s/it]

Processing rows:  54%|█████▎    | 99/185 [06:08<05:33,  3.88s/it]

Processing rows:  54%|█████▍    | 100/185 [06:10<05:02,  3.56s/it]

Processing rows:  55%|█████▍    | 101/185 [06:13<04:47,  3.42s/it]

Processing rows:  55%|█████▌    | 102/185 [06:16<04:28,  3.23s/it]

Processing rows:  56%|█████▌    | 103/185 [06:20<04:25,  3.24s/it]

Processing rows:  56%|█████▌    | 104/185 [06:23<04:23,  3.25s/it]

Processing rows:  57%|█████▋    | 105/185 [06:26<04:29,  3.36s/it]

Processing rows:  57%|█████▋    | 106/185 [06:29<04:10,  3.17s/it]

Processing rows:  58%|█████▊    | 107/185 [06:32<04:11,  3.23s/it]

Processing rows:  58%|█████▊    | 108/185 [06:35<04:01,  3.14s/it]

Processing rows:  59%|█████▉    | 109/185 [06:40<04:22,  3.45s/it]

Processing rows:  59%|█████▉    | 110/185 [06:43<04:20,  3.48s/it]

Processing rows:  60%|██████    | 111/185 [06:46<04:07,  3.35s/it]

Processing rows:  61%|██████    | 112/185 [06:49<04:02,  3.32s/it]

Processing rows:  61%|██████    | 113/185 [06:54<04:21,  3.63s/it]

Processing rows:  62%|██████▏   | 114/185 [06:57<04:13,  3.57s/it]

Processing rows:  62%|██████▏   | 115/185 [07:00<03:57,  3.39s/it]

Processing rows:  63%|██████▎   | 116/185 [07:04<03:57,  3.44s/it]

Processing rows:  63%|██████▎   | 117/185 [07:07<03:43,  3.28s/it]

Processing rows:  64%|██████▍   | 118/185 [07:10<03:34,  3.20s/it]

Processing rows:  64%|██████▍   | 119/185 [07:13<03:29,  3.18s/it]

Processing rows:  65%|██████▍   | 120/185 [07:17<03:53,  3.60s/it]

Processing rows:  65%|██████▌   | 121/185 [07:22<04:02,  3.78s/it]

Processing rows:  66%|██████▌   | 122/185 [07:26<04:01,  3.83s/it]

Processing rows:  66%|██████▋   | 123/185 [07:28<03:38,  3.53s/it]

Processing rows:  67%|██████▋   | 124/185 [07:32<03:45,  3.69s/it]

Processing rows:  68%|██████▊   | 125/185 [07:36<03:36,  3.61s/it]

Processing rows:  68%|██████▊   | 126/185 [07:39<03:31,  3.59s/it]

Processing rows:  69%|██████▊   | 127/185 [07:43<03:33,  3.68s/it]

Processing rows:  69%|██████▉   | 128/185 [07:47<03:27,  3.64s/it]

Processing rows:  70%|██████▉   | 129/185 [07:50<03:10,  3.40s/it]

Processing rows:  70%|███████   | 130/185 [07:54<03:24,  3.71s/it]

Processing rows:  71%|███████   | 131/185 [07:58<03:24,  3.78s/it]

Processing rows:  71%|███████▏  | 132/185 [08:02<03:21,  3.81s/it]

Processing rows:  72%|███████▏  | 133/185 [08:06<03:17,  3.81s/it]

Processing rows:  72%|███████▏  | 134/185 [08:10<03:23,  4.00s/it]

Processing rows:  73%|███████▎  | 135/185 [08:14<03:11,  3.83s/it]

Processing rows:  74%|███████▎  | 136/185 [08:17<02:57,  3.62s/it]

Processing rows:  74%|███████▍  | 137/185 [08:20<02:47,  3.49s/it]

Processing rows:  75%|███████▍  | 138/185 [08:22<02:30,  3.21s/it]

Processing rows:  75%|███████▌  | 139/185 [08:26<02:30,  3.27s/it]

Processing rows:  76%|███████▌  | 140/185 [08:30<02:39,  3.53s/it]

Processing rows:  76%|███████▌  | 141/185 [08:34<02:41,  3.68s/it]

Processing rows:  77%|███████▋  | 142/185 [08:37<02:30,  3.50s/it]

Processing rows:  77%|███████▋  | 143/185 [08:41<02:33,  3.64s/it]

Processing rows:  78%|███████▊  | 144/185 [08:47<02:56,  4.31s/it]

Processing rows:  78%|███████▊  | 145/185 [08:52<03:03,  4.59s/it]

Processing rows:  79%|███████▉  | 146/185 [08:56<02:50,  4.36s/it]

Processing rows:  79%|███████▉  | 147/185 [09:00<02:44,  4.34s/it]

Processing rows:  80%|████████  | 148/185 [09:05<02:43,  4.43s/it]

Processing rows:  81%|████████  | 149/185 [09:09<02:38,  4.41s/it]

Processing rows:  81%|████████  | 150/185 [09:14<02:32,  4.37s/it]

Processing rows:  82%|████████▏ | 151/185 [09:17<02:19,  4.11s/it]

Processing rows:  82%|████████▏ | 152/185 [09:21<02:16,  4.15s/it]

Processing rows:  83%|████████▎ | 153/185 [09:25<02:08,  4.03s/it]

Processing rows:  83%|████████▎ | 154/185 [09:30<02:15,  4.36s/it]

Processing rows:  84%|████████▍ | 155/185 [09:36<02:20,  4.67s/it]

Processing rows:  84%|████████▍ | 156/185 [09:39<02:04,  4.30s/it]

Processing rows:  85%|████████▍ | 157/185 [09:44<02:03,  4.41s/it]

Processing rows:  85%|████████▌ | 158/185 [09:47<01:47,  3.97s/it]

Processing rows:  86%|████████▌ | 159/185 [09:51<01:44,  4.01s/it]

Processing rows:  86%|████████▋ | 160/185 [09:54<01:35,  3.80s/it]

Processing rows:  87%|████████▋ | 161/185 [09:57<01:27,  3.65s/it]

Processing rows:  88%|████████▊ | 162/185 [10:04<01:41,  4.43s/it]

Processing rows:  88%|████████▊ | 163/185 [10:07<01:30,  4.10s/it]

Processing rows:  89%|████████▊ | 164/185 [10:11<01:28,  4.19s/it]

Processing rows:  89%|████████▉ | 165/185 [10:16<01:24,  4.23s/it]

Processing rows:  90%|████████▉ | 166/185 [10:19<01:12,  3.80s/it]

Processing rows:  90%|█████████ | 167/185 [10:22<01:07,  3.76s/it]

Processing rows:  91%|█████████ | 168/185 [10:27<01:08,  4.00s/it]

Processing rows:  91%|█████████▏| 169/185 [10:32<01:10,  4.40s/it]

Processing rows:  92%|█████████▏| 170/185 [10:35<01:01,  4.10s/it]

Processing rows:  92%|█████████▏| 171/185 [10:40<01:01,  4.36s/it]

Processing rows:  93%|█████████▎| 172/185 [10:44<00:53,  4.10s/it]

Processing rows:  94%|█████████▎| 173/185 [10:48<00:49,  4.11s/it]

Processing rows:  94%|█████████▍| 174/185 [10:52<00:44,  4.06s/it]

Processing rows:  95%|█████████▍| 175/185 [10:55<00:38,  3.80s/it]

Processing rows:  95%|█████████▌| 176/185 [10:59<00:33,  3.70s/it]

Processing rows:  96%|█████████▌| 177/185 [11:03<00:30,  3.77s/it]

Processing rows:  96%|█████████▌| 178/185 [11:06<00:24,  3.55s/it]

Processing rows:  97%|█████████▋| 179/185 [11:09<00:21,  3.62s/it]

Processing rows:  97%|█████████▋| 180/185 [11:15<00:20,  4.12s/it]

Processing rows:  98%|█████████▊| 181/185 [11:18<00:15,  3.97s/it]

Processing rows:  98%|█████████▊| 182/185 [11:22<00:11,  3.74s/it]

Processing rows:  99%|█████████▉| 183/185 [11:25<00:07,  3.65s/it]

Processing rows:  99%|█████████▉| 184/185 [11:30<00:04,  4.07s/it]

Processing rows: 100%|██████████| 185/185 [11:34<00:00,  3.75s/it]
