In [6]:
from typing import Dict, List
from generator import GeneratorEngine
from generator import GeneratorFactory
from prompt_settings import AdvancedCfgNetPromptSettings
from dotenv import load_dotenv
from data import Dependency
from util import get_projet_description, get_most_similar_shots, load_shots
from collections import Counter
import pandas as pd
import backoff
import json
from tqdm import tqdm


def transform(row: pd.Series) -> Dependency:
    dependency = Dependency(
        project=row["project"],
        option_name=row["option_name"],
        option_value=row["option_value"],
        option_type=row["option_type"].split(".")[-1],
        option_file=row["option_file"],
        option_technology=row["option_technology"],
        dependent_option_name=row["dependent_option_name"],
        dependent_option_value=row["dependent_option_value"],
        dependent_option_type=row["dependent_option_type"].split(".")[-1],
        dependent_option_file=row["dependent_option_file"],
        dependent_option_technology=row["dependent_option_technology"]
    )

    return dependency


@backoff.on_exception(backoff.expo, Exception, max_tries=3)
def generate(generator: GeneratorEngine, messages: List) -> str:
    response = generator.generate(messages=messages)

    if not response:
        raise Exception("Response is empty.")
    
    try:
        response_dict = json.loads(response, strict=False)
        if "isDependency" not in response_dict:
            raise Exception("KeyError: isDependency")
    except json.JSONDecodeError:
        raise Exception("Response format not serializable.")

    return response


def run_generation(df: pd.DataFrame, model_name: str) -> List[str]:

    prompt_settings = AdvancedCfgNetPromptSettings
    generator = GeneratorFactory().get_generator(
        model_name=model_name, 
        temperature=0.0
    )
    shots = load_shots()

    results = []
    project_info = []
    shot_info = []



    for _, row in tqdm(df.iterrows(), total=len(df), desc="Processing rows"):
        dependency = transform(row=row)

        project_str = get_projet_description(project_name=row["project"])
        context_str = row["context_str"]
        task_str = prompt_settings.get_task_str(dependency=dependency)
        shots_str = "\n\n".join([shot for shot in get_most_similar_shots(shots, dependency)])
        format_str = prompt_settings.get_format_prompt()

        system_prompt = prompt_settings.get_system_str(
            dependency=dependency,
            project_str=project_str
        )

        user_prompt = prompt_settings.advanced_query_prompt.format(
                context_str=context_str, 
                shot_str=shots_str,
                task_str=task_str,
                format_str=format_str
        )

        messages = [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ]

        try:
            response = generate(
                generator=generator,
                messages=messages
            )
        except Exception:
            response = "None"

        shot_info.append(shots_str)
        project_info.append(project_str)
        results.append(response)

    df["responses"] = results
    df["shot_info"] = shot_info
    df["project_info"] = project_info

    df.to_csv(f"../data/analysis/failures_{model_name}.csv", index=False)

In [8]:
model_name = "llama3:70b"
df_failures = pd.read_csv("../data/analysis/failures_annotated.csv")
df_failures = df_failures[df_failures["config"] == "config2"]
df_failures= df_failures[df_failures["llm"] == model_name]

print(f"Num Failures for {model_name}: {len(df_failures)}")

env_file = "../.env"
load_dotenv(dotenv_path=env_file)
run_generation(df=df_failures, model_name=model_name)

Num Failures for llama3:70b: 152


Processing rows:   0%|          | 0/152 [00:00<?, ?it/s]

Processing rows:   1%|          | 1/152 [00:45<1:55:20, 45.83s/it]

Processing rows:   1%|▏         | 2/152 [01:20<1:38:18, 39.32s/it]

Processing rows:   2%|▏         | 3/152 [02:03<1:41:43, 40.97s/it]

Processing rows:   3%|▎         | 4/152 [02:48<1:45:07, 42.62s/it]

Processing rows:   3%|▎         | 5/152 [03:35<1:48:12, 44.16s/it]

Processing rows:   4%|▍         | 6/152 [04:20<1:48:09, 44.45s/it]

Processing rows:   5%|▍         | 7/152 [05:06<1:48:20, 44.83s/it]

Processing rows:   5%|▌         | 8/152 [05:48<1:45:57, 44.15s/it]

Processing rows:   6%|▌         | 9/152 [06:39<1:50:06, 46.20s/it]

Processing rows:   7%|▋         | 10/152 [07:35<1:56:20, 49.16s/it]

Processing rows:   7%|▋         | 11/152 [08:16<1:49:58, 46.80s/it]

Processing rows:   8%|▊         | 12/152 [09:08<1:52:40, 48.29s/it]

Processing rows:   9%|▊         | 13/152 [09:48<1:45:49, 45.68s/it]

Processing rows:   9%|▉         | 14/152 [10:41<1:50:10, 47.90s/it]

Processing rows:  10%|▉         | 15/152 [11:27<1:48:09, 47.37s/it]

Processing rows:  11%|█         | 16/152 [12:03<1:39:50, 44.05s/it]

Processing rows:  11%|█         | 17/152 [12:55<1:44:35, 46.48s/it]

Processing rows:  12%|█▏        | 18/152 [13:52<1:50:29, 49.47s/it]

Processing rows:  12%|█▎        | 19/152 [14:43<1:50:52, 50.02s/it]

Processing rows:  13%|█▎        | 20/152 [15:28<1:46:38, 48.47s/it]

Processing rows:  14%|█▍        | 21/152 [16:17<1:46:13, 48.65s/it]

Processing rows:  14%|█▍        | 22/152 [17:01<1:42:18, 47.22s/it]

Processing rows:  15%|█▌        | 23/152 [17:48<1:41:30, 47.21s/it]

Processing rows:  16%|█▌        | 24/152 [18:30<1:37:23, 45.65s/it]

Processing rows:  16%|█▋        | 25/152 [19:16<1:36:40, 45.67s/it]

Processing rows:  17%|█▋        | 26/152 [20:02<1:36:08, 45.78s/it]

Processing rows:  18%|█▊        | 27/152 [20:52<1:38:11, 47.13s/it]

Processing rows:  18%|█▊        | 28/152 [21:41<1:38:42, 47.76s/it]

Processing rows:  19%|█▉        | 29/152 [22:22<1:33:45, 45.74s/it]

Processing rows:  20%|█▉        | 30/152 [23:16<1:37:41, 48.05s/it]

Processing rows:  20%|██        | 31/152 [24:06<1:38:17, 48.74s/it]

Processing rows:  21%|██        | 32/152 [24:49<1:33:56, 46.97s/it]

Processing rows:  22%|██▏       | 33/152 [25:33<1:31:21, 46.07s/it]

Processing rows:  22%|██▏       | 34/152 [26:12<1:26:25, 43.95s/it]

Processing rows:  23%|██▎       | 35/152 [27:00<1:27:55, 45.09s/it]

Processing rows:  24%|██▎       | 36/152 [27:43<1:25:57, 44.46s/it]

Processing rows:  24%|██▍       | 37/152 [28:35<1:29:44, 46.82s/it]

Processing rows:  25%|██▌       | 38/152 [29:24<1:30:14, 47.49s/it]

Processing rows:  26%|██▌       | 39/152 [30:08<1:27:12, 46.31s/it]

Processing rows:  26%|██▋       | 40/152 [31:04<1:31:58, 49.27s/it]

Processing rows:  27%|██▋       | 41/152 [31:45<1:26:31, 46.77s/it]

Processing rows:  28%|██▊       | 42/152 [32:34<1:27:05, 47.51s/it]

Processing rows:  28%|██▊       | 43/152 [33:19<1:25:12, 46.90s/it]

Processing rows:  29%|██▉       | 44/152 [34:00<1:20:50, 44.91s/it]

Processing rows:  30%|██▉       | 45/152 [34:39<1:16:58, 43.16s/it]

Processing rows:  30%|███       | 46/152 [35:22<1:16:13, 43.15s/it]

Processing rows:  31%|███       | 47/152 [36:13<1:19:30, 45.44s/it]

Processing rows:  32%|███▏      | 48/152 [36:58<1:18:51, 45.49s/it]

Processing rows:  32%|███▏      | 49/152 [37:51<1:21:58, 47.75s/it]

Processing rows:  33%|███▎      | 50/152 [38:31<1:16:47, 45.17s/it]

Processing rows:  34%|███▎      | 51/152 [39:12<1:14:10, 44.06s/it]

Processing rows:  34%|███▍      | 52/152 [39:56<1:13:15, 43.96s/it]

Processing rows:  35%|███▍      | 53/152 [40:41<1:13:03, 44.28s/it]

Processing rows:  36%|███▌      | 54/152 [41:27<1:13:08, 44.78s/it]

Processing rows:  36%|███▌      | 55/152 [42:13<1:13:02, 45.18s/it]

Processing rows:  37%|███▋      | 56/152 [42:55<1:10:59, 44.37s/it]

Processing rows:  38%|███▊      | 57/152 [43:38<1:09:17, 43.77s/it]

Processing rows:  38%|███▊      | 58/152 [44:26<1:10:47, 45.19s/it]

Processing rows:  39%|███▉      | 59/152 [45:12<1:10:29, 45.48s/it]

Processing rows:  39%|███▉      | 60/152 [45:52<1:07:13, 43.84s/it]

Processing rows:  40%|████      | 61/152 [46:31<1:04:10, 42.32s/it]

Processing rows:  41%|████      | 62/152 [47:18<1:05:30, 43.68s/it]

Processing rows:  41%|████▏     | 63/152 [47:57<1:02:44, 42.30s/it]

Processing rows:  42%|████▏     | 64/152 [48:41<1:02:50, 42.85s/it]

Processing rows:  43%|████▎     | 65/152 [49:25<1:02:22, 43.01s/it]

Processing rows:  43%|████▎     | 66/152 [50:04<59:57, 41.83s/it]  

Processing rows:  44%|████▍     | 67/152 [50:43<58:23, 41.22s/it]

Processing rows:  45%|████▍     | 68/152 [51:37<1:02:48, 44.86s/it]

Processing rows:  45%|████▌     | 69/152 [52:23<1:02:46, 45.38s/it]

Processing rows:  46%|████▌     | 70/152 [53:08<1:01:32, 45.04s/it]

Processing rows:  47%|████▋     | 71/152 [53:54<1:01:26, 45.51s/it]

Processing rows:  47%|████▋     | 72/152 [54:43<1:02:01, 46.52s/it]

Processing rows:  48%|████▊     | 73/152 [55:23<58:40, 44.56s/it]  

Processing rows:  49%|████▊     | 74/152 [56:07<57:49, 44.48s/it]

Processing rows:  49%|████▉     | 75/152 [56:48<55:41, 43.40s/it]

Processing rows:  50%|█████     | 76/152 [57:35<56:08, 44.32s/it]

Processing rows:  51%|█████     | 77/152 [58:16<54:11, 43.35s/it]

Processing rows:  51%|█████▏    | 78/152 [59:03<54:55, 44.54s/it]

Processing rows:  52%|█████▏    | 79/152 [59:44<52:43, 43.33s/it]

Processing rows:  53%|█████▎    | 80/152 [1:00:30<52:58, 44.14s/it]

Processing rows:  53%|█████▎    | 81/152 [1:01:12<51:26, 43.47s/it]

Processing rows:  54%|█████▍    | 82/152 [1:02:00<52:34, 45.06s/it]

Processing rows:  55%|█████▍    | 83/152 [1:02:43<50:58, 44.33s/it]

Processing rows:  55%|█████▌    | 84/152 [1:03:30<51:05, 45.08s/it]

Processing rows:  56%|█████▌    | 85/152 [1:04:17<50:57, 45.64s/it]

Processing rows:  57%|█████▋    | 86/152 [1:04:55<47:45, 43.42s/it]

Processing rows:  57%|█████▋    | 87/152 [1:05:42<48:06, 44.40s/it]

Processing rows:  58%|█████▊    | 88/152 [1:06:27<47:36, 44.63s/it]

Processing rows:  59%|█████▊    | 89/152 [1:07:17<48:27, 46.14s/it]

Processing rows:  59%|█████▉    | 90/152 [1:07:59<46:33, 45.06s/it]

Processing rows:  60%|█████▉    | 91/152 [1:08:48<46:53, 46.12s/it]

Processing rows:  61%|██████    | 92/152 [1:09:27<44:09, 44.15s/it]

Processing rows:  61%|██████    | 93/152 [1:10:09<42:41, 43.41s/it]

Processing rows:  62%|██████▏   | 94/152 [1:10:56<43:04, 44.57s/it]

Processing rows:  62%|██████▎   | 95/152 [1:11:44<43:20, 45.62s/it]

Processing rows:  63%|██████▎   | 96/152 [1:12:32<43:13, 46.31s/it]

Processing rows:  64%|██████▍   | 97/152 [1:13:16<41:54, 45.72s/it]

Processing rows:  64%|██████▍   | 98/152 [1:13:55<39:14, 43.60s/it]

Processing rows:  65%|██████▌   | 99/152 [1:14:39<38:27, 43.54s/it]

Processing rows:  66%|██████▌   | 100/152 [1:15:20<37:11, 42.91s/it]

Processing rows:  66%|██████▋   | 101/152 [1:16:06<37:10, 43.73s/it]

Processing rows:  67%|██████▋   | 102/152 [1:16:50<36:36, 43.93s/it]

Processing rows:  68%|██████▊   | 103/152 [1:17:34<35:51, 43.90s/it]

Processing rows:  68%|██████▊   | 104/152 [1:18:21<35:52, 44.84s/it]

Processing rows:  69%|██████▉   | 105/152 [1:19:05<34:59, 44.68s/it]

Processing rows:  70%|██████▉   | 106/152 [1:19:50<34:16, 44.70s/it]

Processing rows:  70%|███████   | 107/152 [1:20:43<35:28, 47.29s/it]

Processing rows:  71%|███████   | 108/152 [1:21:31<34:48, 47.46s/it]

Processing rows:  72%|███████▏  | 109/152 [1:22:13<32:52, 45.87s/it]

Processing rows:  72%|███████▏  | 110/152 [1:23:01<32:33, 46.51s/it]

Processing rows:  73%|███████▎  | 111/152 [1:23:52<32:41, 47.83s/it]

Processing rows:  74%|███████▎  | 112/152 [1:24:37<31:19, 47.00s/it]

Processing rows:  74%|███████▍  | 113/152 [1:25:26<30:49, 47.42s/it]

Processing rows:  75%|███████▌  | 114/152 [1:26:13<30:06, 47.53s/it]

Processing rows:  76%|███████▌  | 115/152 [1:27:04<29:47, 48.32s/it]

Processing rows:  76%|███████▋  | 116/152 [1:27:49<28:24, 47.36s/it]

Processing rows:  77%|███████▋  | 117/152 [1:28:32<26:51, 46.03s/it]

Processing rows:  78%|███████▊  | 118/152 [1:29:17<25:59, 45.88s/it]

Processing rows:  78%|███████▊  | 119/152 [1:29:59<24:34, 44.70s/it]

Processing rows:  79%|███████▉  | 120/152 [1:30:40<23:15, 43.60s/it]

Processing rows:  80%|███████▉  | 121/152 [1:31:24<22:36, 43.75s/it]

Processing rows:  80%|████████  | 122/152 [1:32:06<21:32, 43.09s/it]

Processing rows:  81%|████████  | 123/152 [1:32:47<20:34, 42.58s/it]

Processing rows:  82%|████████▏ | 124/152 [1:33:29<19:46, 42.36s/it]

Processing rows:  82%|████████▏ | 125/152 [1:34:15<19:34, 43.49s/it]

Processing rows:  83%|████████▎ | 126/152 [1:34:58<18:47, 43.36s/it]

Processing rows:  84%|████████▎ | 127/152 [1:35:46<18:37, 44.71s/it]

Processing rows:  84%|████████▍ | 128/152 [1:36:26<17:20, 43.37s/it]

Processing rows:  85%|████████▍ | 129/152 [1:37:14<17:08, 44.74s/it]

Processing rows:  86%|████████▌ | 130/152 [1:37:53<15:44, 42.91s/it]

Processing rows:  86%|████████▌ | 131/152 [1:38:35<14:58, 42.79s/it]

Processing rows:  87%|████████▋ | 132/152 [1:39:26<15:04, 45.24s/it]

Processing rows:  88%|████████▊ | 133/152 [1:40:17<14:50, 46.86s/it]

Processing rows:  88%|████████▊ | 134/152 [1:41:08<14:23, 47.98s/it]

Processing rows:  89%|████████▉ | 135/152 [1:41:56<13:39, 48.18s/it]

Processing rows:  89%|████████▉ | 136/152 [1:42:35<12:07, 45.47s/it]

Processing rows:  90%|█████████ | 137/152 [1:43:28<11:54, 47.65s/it]

Processing rows:  91%|█████████ | 138/152 [1:44:28<11:59, 51.38s/it]

Processing rows:  91%|█████████▏| 139/152 [1:45:10<10:29, 48.44s/it]

Processing rows:  92%|█████████▏| 140/152 [1:45:50<09:11, 45.94s/it]

Processing rows:  93%|█████████▎| 141/152 [1:46:48<09:05, 49.57s/it]

Processing rows:  93%|█████████▎| 142/152 [1:47:35<08:06, 48.67s/it]

Processing rows:  94%|█████████▍| 143/152 [1:48:27<07:27, 49.77s/it]

Processing rows:  95%|█████████▍| 144/152 [1:49:15<06:34, 49.37s/it]

Processing rows:  95%|█████████▌| 145/152 [1:50:07<05:49, 49.92s/it]

Processing rows:  96%|█████████▌| 146/152 [1:50:57<05:00, 50.02s/it]

Processing rows:  97%|█████████▋| 147/152 [1:51:46<04:08, 49.77s/it]

Processing rows:  97%|█████████▋| 148/152 [1:52:35<03:18, 49.64s/it]

Processing rows:  98%|█████████▊| 149/152 [1:53:26<02:29, 49.83s/it]

Processing rows:  99%|█████████▊| 150/152 [1:54:14<01:38, 49.30s/it]

Processing rows:  99%|█████████▉| 151/152 [1:54:56<00:47, 47.28s/it]

Processing rows: 100%|██████████| 152/152 [1:55:43<00:00, 45.68s/it]
