In [1]:
import pandas as pd
import json
import re
import csv
import time
from tqdm import tqdm
from ollama import chat


input_file = 'merged_200_papers.json'
# Load data
df = pd.read_json(input_file, lines=True)

llm_fields = [
    "llm_Comprehensiveness", "llm_Vagueness", "llm_Objectivity", "llm_Fairness", "llm_Actionability", 
    "llm_Constructiveness", "llm_Relevance Alignment", "llm_Clarity and Readability", "llm_Usage of Technical Terms",
    "llm_Factuality", "llm_Overall Quality", "llm_overall_score_100", "llm_Sentiment Polarity", "llm_Politeness", 
]


# Check for missing fields and add them if not present
for field in llm_fields:
    if field not in df.columns:
        df[field] = pd.NA

# Pattern to extract JSON block
pattern = re.compile(r"<review_assessment>\s*(\{.*?\})\s*</review_assessment>", re.DOTALL)

# Define prompt template
template = """# REVIEW-QUALITY JUDGE

## 0 — ROLE

You are **ReviewInspector-LLM**, a rigorous, impartial meta-reviewer.
Your goal is to assess the quality of a single peer-review against a predefined set of criteria and to provide precise, structured evaluations.

## 1 — INPUTS

Title: {title}
Abstract: {abstract}
Review: {review_text}

## 2 — EVALUATION CRITERIA

Return **only** the scale value or label at right (no rationale text).

| #  | Criterion                    | Allowed scale / label                       | Description                                                                |
| -- | ---------------------------- | ------------------------------------------- | -------------------------------------------------------------------------- |
| 1  | **Comprehensiveness**        | integer **0-5**                             | Extent to which the review covers all key aspects of the paper.            |
| 2  | **Usage of Technical Terms** | integer **0-5**                             | Appropriateness and frequency of domain-specific vocabulary.               |
| 3  | **Factuality**               | **factual / partially factual / unfactual** | Accuracy of the statements made in the review.                             |
| 4  | **Sentiment Polarity**       | **negative / neutral / positive**           | Overall sentiment conveyed by the reviewer.                                |
| 5  | **Politeness**               | **polite / neutral / impolite**             | Tone and manner of the review language.                                    |
| 6  | **Vagueness**                | **none / low / moderate / high / extreme**  | Degree of ambiguity or lack of specificity in the review.                  |
| 7  | **Objectivity**              | integer **0-5**                             | Presence of unbiased, evidence-based commentary.                           |
| 8  | **Fairness**                 | integer **0-5**                             | Perceived impartiality and balance in judgments.                           |
| 9  | **Actionability**            | integer **0-5**                             | Helpfulness of the review in suggesting clear next steps.                  |
| 10 | **Constructiveness**         | integer **0-5**                             | Degree to which the review offers improvements rather than just criticism. |
| 11 | **Relevance Alignment**      | integer **0-5**                             | How well the review relates to the content and scope of the paper.         |
| 12 | **Clarity and Readability**  | integer **0-5**                             | Ease of understanding the review, including grammar and structure.         |
| 13 | **Overall Quality**          | integer **0-100**                           | Holistic evaluation of the review's usefulness and professionalism.        |

## 3 — SCORING GUIDELINES

For 0-5 scales:

* 5 = Outstanding
* 4 = Strong
* 3 = Adequate
* 2 = Weak
* 1 = Very weak
* 0 = Absent/irrelevant

## 4 — ANALYSIS & COMPUTATION (silent)

1. Read and understand the review in the context of the paper title and abstract.
2. Extract quantitative and qualitative signals (e.g., term usage, factual consistency, tone, clarity).
3. Map observations to the corresponding scoring scales.

## 5 — OUTPUT FORMAT (strict)  
Return **exactly one** JSON block wrapped in the tag below — **no comments or extra text**.

```json
<review_assessment>
{{
  "paper_title": "{title}",
  "criteria": {{
    "Comprehensiveness":       ...,
    "Usage of Technical Terms":   ...,
    "Factuality":    ...,
    "Sentiment Polarity":      ...,
    "Politeness":  ...,
    "Vagueness":          ...,
    "Objectivity":             ...,
    "Fairness":         ...,
    "Actionability":        ...,
    "Constructiveness":    ...,
    "Relevance Alignment":    ...,
    "Clarity and Readability":    ...,
    "Relevance Alignment":    ...,
    "Overall Quality":     ...
  }},
  "overall_score_100": ...
}}
</review_assessment>
```
"""

df

Unnamed: 0,paper_id,title,abstract,review_text,authors,reviewer,review_date,review_rating,review_confidence,review_soundness,...,llm_Fairness,llm_Actionability,llm_Constructiveness,llm_Relevance Alignment,llm_Clarity and Readability,llm_Usage of Technical Terms,llm_Factuality,llm_Overall Quality,llm_Sentiment Polarity,llm_Politeness
0,123,Navigating Text-To-Image Customization: From L...,Text-to-image generative models have garnered ...,**Summary:** \nThis paper presents an open-sou...,"['~SHIH-YING_YEH1', '~Yu-Guan_Hsieh1', '~Zhido...",Reviewer_EGJf,1701662567826,6.0,3.0,3.0,...,,,,,,,,,,
1,123,Navigating Text-To-Image Customization: From L...,Text-to-image generative models have garnered ...,This paper proposes a comprehensive library fo...,"['~SHIH-YING_YEH1', '~Yu-Guan_Hsieh1', '~Zhido...",Reviewer_DWom,1699636125239,6.0,3.0,3.0,...,,,,,,,,,,
2,123,Navigating Text-To-Image Customization: From L...,Text-to-image generative models have garnered ...,"This author introduces LyCORIS, an open source...","['~SHIH-YING_YEH1', '~Yu-Guan_Hsieh1', '~Zhido...",Reviewer_PnHf,1699636125143,6.0,4.0,4.0,...,,,,,,,,,,
3,123,Navigating Text-To-Image Customization: From L...,Text-to-image generative models have garnered ...,"The authors propose LyCORIS, an open-source li...","['~SHIH-YING_YEH1', '~Yu-Guan_Hsieh1', '~Zhido...",Reviewer_ekPo,1699636125075,8.0,4.0,3.0,...,,,,,,,,,,
4,0,$\nu$-ensembles: Improving deep ensemble calib...,We present a method to improve the calibration...,"This paper introduces ν-ensembles, a novel dee...","['~Konstantinos_Pitas1', '~Julyan_Arbel1']",Reviewer_HFRa,1699636992453,3.0,4.0,2.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
656,118,Module Extraction for Efficient Object Query o...,The extraction of logically-independent fragme...,The submission addresses the problem of partit...,,Anonymous,03/May/2014,,,,...,,,,,,,,,,
657,61,EARTh: an Environmental Application Reference ...,The paper aims at providing a description of E...,This revision addresses my concerns. I am part...,,Natasha Noy,22/Jul/2013,,,,...,,,,,,,,,,
658,76,Facilitating Data Discovery by Connecting Rela...,"In this study, we investigate two approaches t...",The paper presents and compares RDF/XML (in th...,,Anonymous,15/Jun/2013,,,,...,,,,,,,,,,
659,76,Facilitating Data Discovery by Connecting Rela...,"In this study, we investigate two approaches t...",This paper investigates two different approach...,,Ghislain Hachey,17/Jun/2013,,,,...,,,,,,,,,,


In [2]:
# Process each row
# Set the temperature parameter for the llama model
temperature = 0
seed = 42


# Process each row
for idx, row in tqdm(df.iterrows(), total=len(df), desc="Scoring with LLM"):
    # Skip if all llm fields are already filled
    if all(pd.notna(row.get(field, pd.NA)) for field in llm_fields):
        continue
    # if idx >= 5:
    #     break

    prompt = template.format(
        title=row['title'],
        abstract=row['abstract'],
        review_text=row['review_text'],
    )
    
    for attempt in range(5):
        try:
            response = chat("qwen3:8b", messages=[{'role': 'user', 'content': prompt}], options={'temperature': temperature, 'seed': seed})
            content = response['message']['content']
            match = pattern.search(content)
            if not match:
                raise ValueError("No JSON block found")

            parsed = json.loads(match.group(1))
            print(parsed["overall_score_100"])
            for key, val in parsed["criteria"].items():
                df.at[idx, f"llm_{key}"] = val
            df.at[idx, "llm_overall_score_100"] = parsed["overall_score_100"]

            # Save after every successful row
            # df.to_csv(input_file, index=False, quoting=csv.QUOTE_ALL)
            break

        except Exception as e:
            print(f"❌ Error at row {idx}, attempt {attempt + 1}: {e}")

Scoring with LLM:   0%|          | 0/661 [00:00<?, ?it/s]

Scoring with LLM:   0%|          | 1/661 [00:42<7:45:08, 42.29s/it]

55


Scoring with LLM:   0%|          | 2/661 [01:43<9:43:40, 53.14s/it]

75


❌ Error at row 2, attempt 1: 'overall_score_100'


Scoring with LLM:   0%|          | 3/661 [02:47<10:40:22, 58.39s/it]

75


Scoring with LLM:   1%|          | 4/661 [03:23<8:59:50, 49.30s/it] 

85


Scoring with LLM:   1%|          | 5/661 [10:38<34:20:51, 188.49s/it]

92


Scoring with LLM:   1%|          | 6/661 [11:13<24:48:01, 136.31s/it]

95


Scoring with LLM:   1%|          | 7/661 [11:54<19:06:59, 105.23s/it]

85


Scoring with LLM:   1%|          | 8/661 [12:36<15:25:57, 85.08s/it] 

95


Scoring with LLM:   1%|▏         | 9/661 [13:17<12:54:43, 71.29s/it]

75


Scoring with LLM:   2%|▏         | 10/661 [13:59<11:16:07, 62.32s/it]

90


❌ Error at row 10, attempt 1: No JSON block found


Scoring with LLM:   2%|▏         | 11/661 [15:45<13:39:22, 75.63s/it]

90


Scoring with LLM:   2%|▏         | 12/661 [16:27<11:47:42, 65.43s/it]

65


Scoring with LLM:   2%|▏         | 13/661 [17:08<10:26:33, 58.01s/it]

88


Scoring with LLM:   2%|▏         | 14/661 [17:44<9:13:04, 51.29s/it] 

88


Scoring with LLM:   2%|▏         | 15/661 [18:28<8:50:35, 49.28s/it]

55


Scoring with LLM:   2%|▏         | 16/661 [19:03<8:03:08, 44.94s/it]

55


Scoring with LLM:   3%|▎         | 17/661 [19:50<8:06:23, 45.32s/it]

85


Scoring with LLM:   3%|▎         | 18/661 [20:34<8:04:07, 45.18s/it]

100


Scoring with LLM:   3%|▎         | 19/661 [21:11<7:36:28, 42.66s/it]

82


Scoring with LLM:   3%|▎         | 20/661 [21:45<7:07:58, 40.06s/it]

80


Scoring with LLM:   3%|▎         | 21/661 [22:20<6:50:38, 38.50s/it]

85


Scoring with LLM:   3%|▎         | 22/661 [22:58<6:47:06, 38.23s/it]

95


Scoring with LLM:   3%|▎         | 23/661 [23:37<6:51:20, 38.68s/it]

85


❌ Error at row 23, attempt 1: 'overall_score_100'


Scoring with LLM:   4%|▎         | 24/661 [24:52<8:46:07, 49.56s/it]

88


Scoring with LLM:   4%|▍         | 25/661 [25:34<8:18:59, 47.07s/it]

75


Scoring with LLM:   4%|▍         | 26/661 [26:11<7:47:29, 44.17s/it]

92


Scoring with LLM:   4%|▍         | 27/661 [26:49<7:27:03, 42.31s/it]

85


Scoring with LLM:   4%|▍         | 28/661 [27:33<7:32:17, 42.87s/it]

75


Scoring with LLM:   4%|▍         | 29/661 [28:08<7:05:45, 40.42s/it]

90


Scoring with LLM:   5%|▍         | 30/661 [29:09<8:10:26, 46.63s/it]

85


❌ Error at row 30, attempt 1: 'overall_score_100'


❌ Error at row 30, attempt 2: 'overall_score_100'


Scoring with LLM:   5%|▍         | 31/661 [30:52<11:06:53, 63.51s/it]

75


Scoring with LLM:   5%|▍         | 32/661 [31:19<9:12:28, 52.70s/it] 

80


Scoring with LLM:   5%|▍         | 33/661 [31:36<7:19:04, 41.95s/it]

88


Scoring with LLM:   5%|▌         | 34/661 [31:56<6:07:41, 35.19s/it]

75


Scoring with LLM:   5%|▌         | 35/661 [32:38<6:29:04, 37.29s/it]

95


Scoring with LLM:   5%|▌         | 36/661 [33:02<5:48:38, 33.47s/it]

75


Scoring with LLM:   6%|▌         | 37/661 [33:33<5:37:53, 32.49s/it]

85


Scoring with LLM:   6%|▌         | 38/661 [33:52<4:57:38, 28.67s/it]

88


Scoring with LLM:   6%|▌         | 39/661 [34:11<4:25:23, 25.60s/it]

85


Scoring with LLM:   6%|▌         | 40/661 [34:42<4:43:12, 27.36s/it]

85


Scoring with LLM:   6%|▌         | 41/661 [35:04<4:25:59, 25.74s/it]

88


Scoring with LLM:   6%|▋         | 42/661 [35:30<4:24:20, 25.62s/it]

95


Scoring with LLM:   7%|▋         | 43/661 [35:51<4:10:11, 24.29s/it]

75


Scoring with LLM:   7%|▋         | 44/661 [36:23<4:34:41, 26.71s/it]

55


Scoring with LLM:   7%|▋         | 45/661 [36:54<4:48:11, 28.07s/it]

85


Scoring with LLM:   7%|▋         | 46/661 [37:15<4:23:55, 25.75s/it]

85


Scoring with LLM:   7%|▋         | 47/661 [37:38<4:17:22, 25.15s/it]

85


Scoring with LLM:   7%|▋         | 48/661 [38:15<4:52:34, 28.64s/it]

75


Scoring with LLM:   7%|▋         | 49/661 [38:46<4:58:08, 29.23s/it]

85


Scoring with LLM:   8%|▊         | 50/661 [39:07<4:33:21, 26.84s/it]

65


Scoring with LLM:   8%|▊         | 51/661 [39:37<4:42:52, 27.82s/it]

85


Scoring with LLM:   8%|▊         | 52/661 [39:54<4:09:38, 24.59s/it]

65


Scoring with LLM:   8%|▊         | 53/661 [40:12<3:49:10, 22.62s/it]

65


Scoring with LLM:   8%|▊         | 54/661 [40:36<3:50:58, 22.83s/it]

85


Scoring with LLM:   8%|▊         | 55/661 [41:06<4:13:59, 25.15s/it]

75


Scoring with LLM:   8%|▊         | 56/661 [41:34<4:21:07, 25.90s/it]

85


Scoring with LLM:   9%|▊         | 57/661 [42:09<4:50:02, 28.81s/it]

75


Scoring with LLM:   9%|▉         | 58/661 [42:36<4:43:15, 28.19s/it]

75


❌ Error at row 58, attempt 1: 'overall_score_100'


Scoring with LLM:   9%|▉         | 59/661 [43:38<6:25:28, 38.42s/it]

95


Scoring with LLM:   9%|▉         | 60/661 [44:26<6:51:16, 41.06s/it]

85


Scoring with LLM:   9%|▉         | 61/661 [44:43<5:40:34, 34.06s/it]

85


Scoring with LLM:   9%|▉         | 62/661 [45:19<5:44:31, 34.51s/it]

100


Scoring with LLM:  10%|▉         | 63/661 [45:43<5:12:44, 31.38s/it]

75


Scoring with LLM:  10%|▉         | 64/661 [46:06<4:46:37, 28.81s/it]

85


Scoring with LLM:  10%|▉         | 65/661 [46:40<5:02:08, 30.42s/it]

65


Scoring with LLM:  10%|▉         | 66/661 [46:58<4:25:00, 26.72s/it]

85


❌ Error at row 66, attempt 1: 'overall_score_100'


Scoring with LLM:  10%|█         | 67/661 [47:53<5:47:37, 35.11s/it]

80


Scoring with LLM:  10%|█         | 68/661 [48:16<5:12:33, 31.62s/it]

85


Scoring with LLM:  10%|█         | 69/661 [48:41<4:50:25, 29.43s/it]

85


Scoring with LLM:  11%|█         | 70/661 [49:06<4:38:27, 28.27s/it]

85


Scoring with LLM:  11%|█         | 71/661 [49:30<4:24:49, 26.93s/it]

88


Scoring with LLM:  11%|█         | 72/661 [50:06<4:50:22, 29.58s/it]

55


Scoring with LLM:  11%|█         | 73/661 [50:37<4:55:46, 30.18s/it]

95


Scoring with LLM:  11%|█         | 74/661 [50:59<4:29:43, 27.57s/it]

95


Scoring with LLM:  11%|█▏        | 75/661 [51:32<4:46:36, 29.35s/it]

85


❌ Error at row 75, attempt 1: 'overall_score_100'


Scoring with LLM:  11%|█▏        | 76/661 [52:25<5:54:52, 36.40s/it]

88


Scoring with LLM:  12%|█▏        | 77/661 [52:58<5:42:52, 35.23s/it]

95


Scoring with LLM:  12%|█▏        | 78/661 [53:35<5:47:46, 35.79s/it]

55


Scoring with LLM:  12%|█▏        | 79/661 [53:49<4:44:28, 29.33s/it]

85


Scoring with LLM:  12%|█▏        | 80/661 [54:06<4:09:27, 25.76s/it]

80


Scoring with LLM:  12%|█▏        | 81/661 [54:31<4:05:42, 25.42s/it]

95


❌ Error at row 81, attempt 1: 'overall_score_100'


Scoring with LLM:  12%|█▏        | 82/661 [55:37<6:02:30, 37.57s/it]

82


Scoring with LLM:  13%|█▎        | 83/661 [56:08<5:42:59, 35.60s/it]

65


Scoring with LLM:  13%|█▎        | 84/661 [56:39<5:28:53, 34.20s/it]

80


Scoring with LLM:  13%|█▎        | 85/661 [57:10<5:18:46, 33.20s/it]

75


Scoring with LLM:  13%|█▎        | 86/661 [57:51<5:40:25, 35.52s/it]

65


Scoring with LLM:  13%|█▎        | 87/661 [58:14<5:03:31, 31.73s/it]

90


Scoring with LLM:  13%|█▎        | 88/661 [58:42<4:52:38, 30.64s/it]

75


Scoring with LLM:  13%|█▎        | 89/661 [59:17<5:06:42, 32.17s/it]

45


Scoring with LLM:  14%|█▎        | 90/661 [59:47<4:57:37, 31.27s/it]

85


Scoring with LLM:  14%|█▍        | 91/661 [1:00:06<4:22:43, 27.65s/it]

85


Scoring with LLM:  14%|█▍        | 92/661 [1:00:29<4:08:24, 26.19s/it]

85


Scoring with LLM:  14%|█▍        | 93/661 [1:00:59<4:19:40, 27.43s/it]

90


Scoring with LLM:  14%|█▍        | 94/661 [1:01:35<4:44:18, 30.09s/it]

95


Scoring with LLM:  14%|█▍        | 95/661 [1:01:57<4:19:36, 27.52s/it]

85


Scoring with LLM:  15%|█▍        | 96/661 [1:02:18<4:02:53, 25.79s/it]

65


Scoring with LLM:  15%|█▍        | 97/661 [1:02:54<4:29:15, 28.64s/it]

90


Scoring with LLM:  15%|█▍        | 98/661 [1:03:18<4:15:07, 27.19s/it]

78


Scoring with LLM:  15%|█▍        | 99/661 [1:03:47<4:21:51, 27.96s/it]

85


Scoring with LLM:  15%|█▌        | 100/661 [1:04:23<4:43:41, 30.34s/it]

75


Scoring with LLM:  15%|█▌        | 101/661 [1:04:56<4:48:49, 30.95s/it]

95


Scoring with LLM:  15%|█▌        | 102/661 [1:05:11<4:05:19, 26.33s/it]

85


Scoring with LLM:  16%|█▌        | 103/661 [1:05:33<3:51:26, 24.89s/it]

85


Scoring with LLM:  16%|█▌        | 104/661 [1:06:01<4:01:41, 26.03s/it]

85


Scoring with LLM:  16%|█▌        | 105/661 [1:06:17<3:32:37, 22.95s/it]

85


Scoring with LLM:  16%|█▌        | 106/661 [1:06:52<4:04:12, 26.40s/it]

95


Scoring with LLM:  16%|█▌        | 107/661 [1:07:21<4:11:46, 27.27s/it]

88


Scoring with LLM:  16%|█▋        | 108/661 [1:07:39<3:47:21, 24.67s/it]

80


Scoring with LLM:  16%|█▋        | 109/661 [1:08:00<3:35:36, 23.44s/it]

75


Scoring with LLM:  17%|█▋        | 110/661 [1:08:23<3:34:25, 23.35s/it]

88


Scoring with LLM:  17%|█▋        | 111/661 [1:08:48<3:38:43, 23.86s/it]

80


❌ Error at row 111, attempt 1: 'overall_score_100'


Scoring with LLM:  17%|█▋        | 112/661 [1:09:51<5:25:04, 35.53s/it]

92


❌ Error at row 112, attempt 1: 'overall_score_100'


Scoring with LLM:  17%|█▋        | 113/661 [1:11:01<6:59:31, 45.93s/it]

80


Scoring with LLM:  17%|█▋        | 114/661 [1:11:36<6:29:27, 42.72s/it]

75


Scoring with LLM:  17%|█▋        | 115/661 [1:11:55<5:24:10, 35.62s/it]

88


Scoring with LLM:  18%|█▊        | 116/661 [1:12:26<5:08:33, 33.97s/it]

88


Scoring with LLM:  18%|█▊        | 117/661 [1:12:56<4:59:08, 32.99s/it]

55


Scoring with LLM:  18%|█▊        | 118/661 [1:13:32<5:05:52, 33.80s/it]

85


Scoring with LLM:  18%|█▊        | 119/661 [1:14:19<5:41:49, 37.84s/it]

45


Scoring with LLM:  18%|█▊        | 120/661 [1:14:53<5:31:22, 36.75s/it]

70


Scoring with LLM:  18%|█▊        | 121/661 [1:15:24<5:13:04, 34.79s/it]

88


Scoring with LLM:  18%|█▊        | 122/661 [1:15:59<5:13:47, 34.93s/it]

65


Scoring with LLM:  19%|█▊        | 123/661 [1:16:35<5:15:30, 35.19s/it]

85


Scoring with LLM:  19%|█▉        | 124/661 [1:17:09<5:12:19, 34.90s/it]

90


Scoring with LLM:  19%|█▉        | 125/661 [1:17:53<5:35:14, 37.53s/it]

90


Scoring with LLM:  19%|█▉        | 126/661 [1:18:25<5:21:39, 36.07s/it]

75


Scoring with LLM:  19%|█▉        | 127/661 [1:18:58<5:11:11, 34.97s/it]

88


Scoring with LLM:  19%|█▉        | 128/661 [1:19:35<5:16:17, 35.61s/it]

92


Scoring with LLM:  20%|█▉        | 129/661 [1:20:09<5:13:04, 35.31s/it]

75


Scoring with LLM:  20%|█▉        | 130/661 [1:20:49<5:24:36, 36.68s/it]

55


Scoring with LLM:  20%|█▉        | 131/661 [1:21:26<5:23:45, 36.65s/it]

85


Scoring with LLM:  20%|█▉        | 132/661 [1:22:02<5:21:40, 36.49s/it]

85


Scoring with LLM:  20%|██        | 133/661 [1:22:53<5:59:32, 40.86s/it]

90


Scoring with LLM:  20%|██        | 134/661 [1:23:24<5:33:09, 37.93s/it]

85


Scoring with LLM:  20%|██        | 135/661 [1:23:55<5:14:27, 35.87s/it]

75


Scoring with LLM:  21%|██        | 136/661 [1:24:30<5:10:48, 35.52s/it]

82


Scoring with LLM:  21%|██        | 137/661 [1:25:02<5:01:35, 34.53s/it]

75


Scoring with LLM:  21%|██        | 138/661 [1:25:54<5:47:06, 39.82s/it]

85


Scoring with LLM:  21%|██        | 139/661 [1:26:30<5:37:02, 38.74s/it]

65


Scoring with LLM:  21%|██        | 140/661 [1:27:07<5:30:41, 38.08s/it]

95


Scoring with LLM:  21%|██▏       | 141/661 [1:27:44<5:28:01, 37.85s/it]

85


Scoring with LLM:  21%|██▏       | 142/661 [1:28:27<5:39:35, 39.26s/it]

85


Scoring with LLM:  22%|██▏       | 143/661 [1:29:02<5:28:17, 38.03s/it]

88


Scoring with LLM:  22%|██▏       | 144/661 [1:29:36<5:17:08, 36.81s/it]

85


Scoring with LLM:  22%|██▏       | 145/661 [1:30:14<5:19:29, 37.15s/it]

88


Scoring with LLM:  22%|██▏       | 146/661 [1:30:48<5:10:20, 36.16s/it]

88


Scoring with LLM:  22%|██▏       | 147/661 [1:31:22<5:04:31, 35.55s/it]

85


Scoring with LLM:  22%|██▏       | 148/661 [1:31:55<4:58:09, 34.87s/it]

85


❌ Error at row 148, attempt 1: 'overall_score_100'


Scoring with LLM:  23%|██▎       | 149/661 [1:33:19<7:02:42, 49.54s/it]

85


Scoring with LLM:  23%|██▎       | 150/661 [1:33:55<6:27:37, 45.51s/it]

65


Scoring with LLM:  23%|██▎       | 151/661 [1:34:53<6:57:21, 49.10s/it]

100


Scoring with LLM:  23%|██▎       | 152/661 [1:35:31<6:28:28, 45.79s/it]

85


Scoring with LLM:  23%|██▎       | 153/661 [1:36:08<6:07:37, 43.42s/it]

80


Scoring with LLM:  23%|██▎       | 154/661 [1:36:48<5:56:40, 42.21s/it]

88


Scoring with LLM:  23%|██▎       | 155/661 [1:37:28<5:49:53, 41.49s/it]

88


Scoring with LLM:  24%|██▎       | 156/661 [1:38:04<5:36:19, 39.96s/it]

88


Scoring with LLM:  24%|██▍       | 157/661 [1:38:50<5:51:36, 41.86s/it]

85


Scoring with LLM:  24%|██▍       | 158/661 [1:39:22<5:24:24, 38.70s/it]

70


Scoring with LLM:  24%|██▍       | 159/661 [1:39:42<4:38:12, 33.25s/it]

85


Scoring with LLM:  24%|██▍       | 160/661 [1:40:02<4:04:08, 29.24s/it]

80


Scoring with LLM:  24%|██▍       | 161/661 [1:40:39<4:23:16, 31.59s/it]

85


Scoring with LLM:  25%|██▍       | 162/661 [1:41:12<4:24:53, 31.85s/it]

75


Scoring with LLM:  25%|██▍       | 163/661 [1:41:29<3:49:16, 27.62s/it]

85


Scoring with LLM:  25%|██▍       | 164/661 [1:41:46<3:22:06, 24.40s/it]

72


Scoring with LLM:  25%|██▍       | 165/661 [1:42:14<3:30:17, 25.44s/it]

88


Scoring with LLM:  25%|██▌       | 166/661 [1:42:46<3:46:06, 27.41s/it]

95


Scoring with LLM:  25%|██▌       | 167/661 [1:43:11<3:39:40, 26.68s/it]

45


Scoring with LLM:  25%|██▌       | 168/661 [1:43:42<3:50:48, 28.09s/it]

88


Scoring with LLM:  26%|██▌       | 169/661 [1:44:01<3:26:19, 25.16s/it]

95


Scoring with LLM:  26%|██▌       | 170/661 [1:44:21<3:12:43, 23.55s/it]

20


Scoring with LLM:  26%|██▌       | 171/661 [1:44:52<3:32:34, 26.03s/it]

65


Scoring with LLM:  26%|██▌       | 172/661 [1:45:11<3:14:00, 23.81s/it]

88


Scoring with LLM:  26%|██▌       | 173/661 [1:45:39<3:23:19, 25.00s/it]

100


Scoring with LLM:  26%|██▋       | 174/661 [1:46:01<3:15:08, 24.04s/it]

75


Scoring with LLM:  26%|██▋       | 175/661 [1:46:34<3:38:33, 26.98s/it]

85


Scoring with LLM:  27%|██▋       | 176/661 [1:47:00<3:34:01, 26.48s/it]

88


Scoring with LLM:  27%|██▋       | 177/661 [1:47:29<3:39:15, 27.18s/it]

75


Scoring with LLM:  27%|██▋       | 178/661 [1:47:57<3:41:21, 27.50s/it]

70


Scoring with LLM:  27%|██▋       | 179/661 [1:48:41<4:21:12, 32.52s/it]

85


Scoring with LLM:  27%|██▋       | 180/661 [1:49:01<3:49:48, 28.67s/it]

85


Scoring with LLM:  27%|██▋       | 181/661 [1:49:32<3:54:22, 29.30s/it]

65


Scoring with LLM:  28%|██▊       | 182/661 [1:50:02<3:55:58, 29.56s/it]

85


Scoring with LLM:  28%|██▊       | 183/661 [1:50:37<4:09:06, 31.27s/it]

85


Scoring with LLM:  28%|██▊       | 184/661 [1:51:11<4:14:28, 32.01s/it]

88


Scoring with LLM:  28%|██▊       | 185/661 [1:51:48<4:26:29, 33.59s/it]

80


Scoring with LLM:  28%|██▊       | 186/661 [1:52:21<4:25:14, 33.50s/it]

70


Scoring with LLM:  28%|██▊       | 187/661 [1:53:03<4:44:32, 36.02s/it]

95


Scoring with LLM:  28%|██▊       | 188/661 [1:53:35<4:33:23, 34.68s/it]

85


Scoring with LLM:  29%|██▊       | 189/661 [1:54:14<4:42:35, 35.92s/it]

85


Scoring with LLM:  29%|██▊       | 190/661 [1:54:52<4:47:30, 36.63s/it]

85


Scoring with LLM:  29%|██▉       | 191/661 [1:55:26<4:40:54, 35.86s/it]

85


Scoring with LLM:  29%|██▉       | 192/661 [1:55:58<4:30:47, 34.64s/it]

88


Scoring with LLM:  29%|██▉       | 193/661 [1:56:32<4:30:01, 34.62s/it]

85


Scoring with LLM:  29%|██▉       | 194/661 [1:57:07<4:30:32, 34.76s/it]

60


Scoring with LLM:  30%|██▉       | 195/661 [1:57:41<4:27:22, 34.43s/it]

75


Scoring with LLM:  30%|██▉       | 196/661 [1:58:32<5:04:38, 39.31s/it]

85


Scoring with LLM:  30%|██▉       | 197/661 [1:59:11<5:04:04, 39.32s/it]

85


Scoring with LLM:  30%|██▉       | 198/661 [1:59:53<5:09:58, 40.17s/it]

85


Scoring with LLM:  30%|███       | 199/661 [2:00:27<4:55:05, 38.32s/it]

85


Scoring with LLM:  30%|███       | 200/661 [2:01:06<4:56:00, 38.53s/it]

85


Scoring with LLM:  30%|███       | 201/661 [2:01:37<4:36:36, 36.08s/it]

88


Scoring with LLM:  31%|███       | 202/661 [2:02:29<5:13:55, 41.04s/it]

95


Scoring with LLM:  31%|███       | 203/661 [2:03:17<5:27:52, 42.95s/it]

85


Scoring with LLM:  31%|███       | 204/661 [2:04:09<5:48:40, 45.78s/it]

85


Scoring with LLM:  31%|███       | 205/661 [2:04:50<5:37:51, 44.46s/it]

65


❌ Error at row 205, attempt 1: 'overall_score_100'


Scoring with LLM:  31%|███       | 206/661 [2:06:05<6:45:30, 53.47s/it]

85


Scoring with LLM:  31%|███▏      | 207/661 [2:07:09<7:08:16, 56.60s/it]

85


Scoring with LLM:  31%|███▏      | 208/661 [2:07:52<6:37:23, 52.63s/it]

90


Scoring with LLM:  32%|███▏      | 209/661 [2:08:33<6:10:32, 49.19s/it]

75


Scoring with LLM:  32%|███▏      | 210/661 [2:09:10<5:41:02, 45.37s/it]

85


Scoring with LLM:  32%|███▏      | 211/661 [2:09:48<5:23:19, 43.11s/it]

85


Scoring with LLM:  32%|███▏      | 212/661 [2:10:20<4:58:29, 39.89s/it]

88


Scoring with LLM:  32%|███▏      | 213/661 [2:10:53<4:43:16, 37.94s/it]

65


Scoring with LLM:  32%|███▏      | 214/661 [2:11:30<4:39:27, 37.51s/it]

95


Scoring with LLM:  33%|███▎      | 215/661 [2:12:12<4:50:01, 39.02s/it]

85


Scoring with LLM:  33%|███▎      | 216/661 [2:12:48<4:42:09, 38.04s/it]

95


Scoring with LLM:  33%|███▎      | 217/661 [2:13:19<4:25:33, 35.89s/it]

75


Scoring with LLM:  33%|███▎      | 218/661 [2:13:50<4:13:19, 34.31s/it]

85


Scoring with LLM:  33%|███▎      | 219/661 [2:14:20<4:04:57, 33.25s/it]

85


Scoring with LLM:  33%|███▎      | 220/661 [2:14:53<4:02:11, 32.95s/it]

85


Scoring with LLM:  33%|███▎      | 221/661 [2:15:15<3:38:53, 29.85s/it]

75


Scoring with LLM:  34%|███▎      | 222/661 [2:15:33<3:12:50, 26.36s/it]

65


Scoring with LLM:  34%|███▎      | 223/661 [2:15:57<3:05:44, 25.44s/it]

65


Scoring with LLM:  34%|███▍      | 224/661 [2:16:24<3:09:50, 26.07s/it]

82


Scoring with LLM:  34%|███▍      | 225/661 [2:16:45<2:57:48, 24.47s/it]

95


Scoring with LLM:  34%|███▍      | 226/661 [2:17:22<3:24:44, 28.24s/it]

85


Scoring with LLM:  34%|███▍      | 227/661 [2:17:54<3:33:08, 29.47s/it]

88


Scoring with LLM:  34%|███▍      | 228/661 [2:18:29<3:44:07, 31.06s/it]

85


Scoring with LLM:  35%|███▍      | 229/661 [2:19:08<4:00:50, 33.45s/it]

65


Scoring with LLM:  35%|███▍      | 230/661 [2:19:52<4:21:34, 36.41s/it]

100


Scoring with LLM:  35%|███▍      | 231/661 [2:20:08<3:38:09, 30.44s/it]

85


Scoring with LLM:  35%|███▌      | 232/661 [2:20:44<3:48:47, 32.00s/it]

55


Scoring with LLM:  35%|███▌      | 233/661 [2:21:08<3:32:35, 29.80s/it]

70


Scoring with LLM:  35%|███▌      | 234/661 [2:21:34<3:23:51, 28.65s/it]

85


Scoring with LLM:  36%|███▌      | 235/661 [2:22:01<3:19:40, 28.12s/it]

75


Scoring with LLM:  36%|███▌      | 236/661 [2:22:23<3:06:20, 26.31s/it]

85


Scoring with LLM:  36%|███▌      | 237/661 [2:22:49<3:03:47, 26.01s/it]

90


Scoring with LLM:  36%|███▌      | 238/661 [2:23:13<2:59:12, 25.42s/it]

85


Scoring with LLM:  36%|███▌      | 239/661 [2:23:37<2:56:34, 25.11s/it]

88


Scoring with LLM:  36%|███▋      | 240/661 [2:23:57<2:44:58, 23.51s/it]

85


Scoring with LLM:  36%|███▋      | 241/661 [2:24:30<3:04:32, 26.36s/it]

65


Scoring with LLM:  37%|███▋      | 242/661 [2:24:51<2:52:36, 24.72s/it]

55


Scoring with LLM:  37%|███▋      | 243/661 [2:25:20<3:01:11, 26.01s/it]

85


Scoring with LLM:  37%|███▋      | 244/661 [2:25:47<3:02:33, 26.27s/it]

85


Scoring with LLM:  37%|███▋      | 245/661 [2:26:10<2:55:45, 25.35s/it]

85


Scoring with LLM:  37%|███▋      | 246/661 [2:26:39<3:02:42, 26.41s/it]

85


Scoring with LLM:  37%|███▋      | 247/661 [2:27:06<3:03:10, 26.55s/it]

85


Scoring with LLM:  38%|███▊      | 248/661 [2:27:28<2:55:07, 25.44s/it]

75


Scoring with LLM:  38%|███▊      | 249/661 [2:28:02<3:11:00, 27.82s/it]

85


Scoring with LLM:  38%|███▊      | 250/661 [2:28:19<2:49:14, 24.71s/it]

75


Scoring with LLM:  38%|███▊      | 251/661 [2:28:49<2:58:49, 26.17s/it]

85


Scoring with LLM:  38%|███▊      | 252/661 [2:29:16<3:00:27, 26.47s/it]

70


Scoring with LLM:  38%|███▊      | 253/661 [2:29:44<3:03:06, 26.93s/it]

88


Scoring with LLM:  38%|███▊      | 254/661 [2:30:04<2:48:29, 24.84s/it]

80


Scoring with LLM:  39%|███▊      | 255/661 [2:30:29<2:47:52, 24.81s/it]

70


Scoring with LLM:  39%|███▊      | 256/661 [2:30:53<2:45:44, 24.56s/it]

85


Scoring with LLM:  39%|███▉      | 257/661 [2:31:16<2:42:20, 24.11s/it]

75


Scoring with LLM:  39%|███▉      | 258/661 [2:31:39<2:40:01, 23.82s/it]

95


Scoring with LLM:  39%|███▉      | 259/661 [2:32:12<2:57:49, 26.54s/it]

85


Scoring with LLM:  39%|███▉      | 260/661 [2:32:33<2:47:08, 25.01s/it]

85


Scoring with LLM:  39%|███▉      | 261/661 [2:32:54<2:38:50, 23.83s/it]

85


Scoring with LLM:  40%|███▉      | 262/661 [2:33:26<2:55:04, 26.33s/it]

95


Scoring with LLM:  40%|███▉      | 263/661 [2:33:44<2:36:38, 23.61s/it]

85


Scoring with LLM:  40%|███▉      | 264/661 [2:34:06<2:33:13, 23.16s/it]

85


Scoring with LLM:  40%|████      | 265/661 [2:34:36<2:46:03, 25.16s/it]

85


Scoring with LLM:  40%|████      | 266/661 [2:35:20<3:23:55, 30.98s/it]

75


Scoring with LLM:  40%|████      | 267/661 [2:35:39<2:59:32, 27.34s/it]

85


Scoring with LLM:  41%|████      | 268/661 [2:36:17<3:19:37, 30.48s/it]

70


Scoring with LLM:  41%|████      | 269/661 [2:36:54<3:32:03, 32.46s/it]

90


Scoring with LLM:  41%|████      | 270/661 [2:37:23<3:24:18, 31.35s/it]

85


Scoring with LLM:  41%|████      | 271/661 [2:37:44<3:04:26, 28.38s/it]

35


Scoring with LLM:  41%|████      | 272/661 [2:38:07<2:52:56, 26.67s/it]

85


Scoring with LLM:  41%|████▏     | 273/661 [2:38:35<2:56:08, 27.24s/it]

85


Scoring with LLM:  41%|████▏     | 274/661 [2:39:00<2:50:21, 26.41s/it]

75


Scoring with LLM:  42%|████▏     | 275/661 [2:39:32<3:01:28, 28.21s/it]

80


Scoring with LLM:  42%|████▏     | 276/661 [2:40:05<3:09:35, 29.55s/it]

55


Scoring with LLM:  42%|████▏     | 277/661 [2:40:26<2:53:19, 27.08s/it]

88


Scoring with LLM:  42%|████▏     | 278/661 [2:40:53<2:51:31, 26.87s/it]

85


Scoring with LLM:  42%|████▏     | 279/661 [2:41:10<2:33:42, 24.14s/it]

80


Scoring with LLM:  42%|████▏     | 280/661 [2:41:45<2:52:24, 27.15s/it]

85


Scoring with LLM:  43%|████▎     | 281/661 [2:42:18<3:03:49, 29.03s/it]

85


Scoring with LLM:  43%|████▎     | 282/661 [2:42:40<2:49:57, 26.91s/it]

88


Scoring with LLM:  43%|████▎     | 283/661 [2:43:08<2:50:49, 27.11s/it]

85


Scoring with LLM:  43%|████▎     | 284/661 [2:43:34<2:49:01, 26.90s/it]

85


Scoring with LLM:  43%|████▎     | 285/661 [2:43:55<2:38:28, 25.29s/it]

85


Scoring with LLM:  43%|████▎     | 286/661 [2:44:19<2:33:58, 24.64s/it]

75


Scoring with LLM:  43%|████▎     | 287/661 [2:44:52<2:50:54, 27.42s/it]

90


Scoring with LLM:  44%|████▎     | 288/661 [2:45:16<2:43:21, 26.28s/it]

85


Scoring with LLM:  44%|████▎     | 289/661 [2:45:48<2:52:51, 27.88s/it]

85


Scoring with LLM:  44%|████▍     | 290/661 [2:46:10<2:41:19, 26.09s/it]

85


Scoring with LLM:  44%|████▍     | 291/661 [2:46:37<2:42:37, 26.37s/it]

75


Scoring with LLM:  44%|████▍     | 292/661 [2:47:06<2:46:56, 27.14s/it]

85


Scoring with LLM:  44%|████▍     | 293/661 [2:47:29<2:39:51, 26.06s/it]

85


Scoring with LLM:  44%|████▍     | 294/661 [2:47:59<2:45:52, 27.12s/it]

92


Scoring with LLM:  45%|████▍     | 295/661 [2:48:19<2:32:16, 24.96s/it]

100


Scoring with LLM:  45%|████▍     | 296/661 [2:48:45<2:34:13, 25.35s/it]

95


Scoring with LLM:  45%|████▍     | 297/661 [2:49:21<2:53:40, 28.63s/it]

85


Scoring with LLM:  45%|████▌     | 298/661 [2:49:58<3:08:16, 31.12s/it]

85


Scoring with LLM:  45%|████▌     | 299/661 [2:50:25<3:00:46, 29.96s/it]

75


Scoring with LLM:  45%|████▌     | 300/661 [2:50:54<2:58:41, 29.70s/it]

95


Scoring with LLM:  46%|████▌     | 301/661 [2:51:16<2:42:41, 27.12s/it]

85


Scoring with LLM:  46%|████▌     | 302/661 [2:51:57<3:08:03, 31.43s/it]

90


Scoring with LLM:  46%|████▌     | 303/661 [2:52:27<3:04:41, 30.96s/it]

95


❌ Error at row 303, attempt 1: 'overall_score_100'


Scoring with LLM:  46%|████▌     | 304/661 [2:53:20<3:44:24, 37.71s/it]

75


Scoring with LLM:  46%|████▌     | 305/661 [2:53:42<3:14:49, 32.83s/it]

85


Scoring with LLM:  46%|████▋     | 306/661 [2:54:14<3:13:00, 32.62s/it]

85


Scoring with LLM:  46%|████▋     | 307/661 [2:55:06<3:47:39, 38.59s/it]

95


Scoring with LLM:  47%|████▋     | 308/661 [2:55:49<3:53:22, 39.67s/it]

95


Scoring with LLM:  47%|████▋     | 309/661 [2:56:25<3:46:29, 38.61s/it]

75


Scoring with LLM:  47%|████▋     | 310/661 [2:57:04<3:47:19, 38.86s/it]

80


Scoring with LLM:  47%|████▋     | 311/661 [2:57:39<3:40:10, 37.74s/it]

88


❌ Error at row 311, attempt 1: 'overall_score_100'


Scoring with LLM:  47%|████▋     | 312/661 [2:58:51<4:37:56, 47.78s/it]

85


❌ Error at row 312, attempt 1: 'overall_score_100'


Scoring with LLM:  47%|████▋     | 313/661 [3:00:46<6:35:23, 68.17s/it]

85


Scoring with LLM:  48%|████▊     | 314/661 [3:01:34<5:58:37, 62.01s/it]

85


Scoring with LLM:  48%|████▊     | 315/661 [3:02:19<5:27:26, 56.78s/it]

65


Scoring with LLM:  48%|████▊     | 316/661 [3:02:54<4:50:25, 50.51s/it]

90


Scoring with LLM:  48%|████▊     | 317/661 [3:03:30<4:23:53, 46.03s/it]

85


Scoring with LLM:  48%|████▊     | 318/661 [3:04:11<4:15:16, 44.65s/it]

85


Scoring with LLM:  48%|████▊     | 319/661 [3:04:43<3:52:46, 40.84s/it]

88


Scoring with LLM:  48%|████▊     | 320/661 [3:05:18<3:41:15, 38.93s/it]

65


Scoring with LLM:  49%|████▊     | 321/661 [3:06:03<3:51:35, 40.87s/it]

85


Scoring with LLM:  49%|████▊     | 322/661 [3:06:34<3:33:40, 37.82s/it]

80


Scoring with LLM:  49%|████▉     | 323/661 [3:07:15<3:37:53, 38.68s/it]

75


Scoring with LLM:  49%|████▉     | 324/661 [3:08:15<4:13:09, 45.07s/it]

80


Scoring with LLM:  49%|████▉     | 325/661 [3:08:53<4:01:28, 43.12s/it]

65


Scoring with LLM:  49%|████▉     | 326/661 [3:09:33<3:55:15, 42.14s/it]

75


Scoring with LLM:  49%|████▉     | 327/661 [3:10:10<3:46:13, 40.64s/it]

95


Scoring with LLM:  50%|████▉     | 328/661 [3:10:45<3:35:50, 38.89s/it]

90


Scoring with LLM:  50%|████▉     | 329/661 [3:11:16<3:21:25, 36.40s/it]

25


Scoring with LLM:  50%|████▉     | 330/661 [3:11:57<3:29:10, 37.92s/it]

85


Scoring with LLM:  50%|█████     | 331/661 [3:12:35<3:29:21, 38.07s/it]

85


Scoring with LLM:  50%|█████     | 332/661 [3:13:17<3:33:46, 38.99s/it]

85


Scoring with LLM:  50%|█████     | 333/661 [3:13:55<3:32:35, 38.89s/it]

85


❌ Error at row 333, attempt 1: 'overall_score_100'


Scoring with LLM:  51%|█████     | 334/661 [3:14:59<4:13:09, 46.45s/it]

88


Scoring with LLM:  51%|█████     | 335/661 [3:15:49<4:18:14, 47.53s/it]

85


❌ Error at row 335, attempt 1: 'overall_score_100'


Scoring with LLM:  51%|█████     | 336/661 [3:16:57<4:50:08, 53.57s/it]

75


Scoring with LLM:  51%|█████     | 337/661 [3:17:33<4:21:23, 48.41s/it]

95


Scoring with LLM:  51%|█████     | 338/661 [3:18:06<3:55:34, 43.76s/it]

85


Scoring with LLM:  51%|█████▏    | 339/661 [3:18:54<4:01:34, 45.01s/it]

72


Scoring with LLM:  51%|█████▏    | 340/661 [3:19:33<3:51:10, 43.21s/it]

95


Scoring with LLM:  52%|█████▏    | 341/661 [3:20:12<3:43:21, 41.88s/it]

92


Scoring with LLM:  52%|█████▏    | 342/661 [3:20:45<3:28:54, 39.29s/it]

95


Scoring with LLM:  52%|█████▏    | 343/661 [3:21:23<3:25:47, 38.83s/it]

85


Scoring with LLM:  52%|█████▏    | 344/661 [3:22:00<3:22:21, 38.30s/it]

85


Scoring with LLM:  52%|█████▏    | 345/661 [3:22:31<3:10:31, 36.18s/it]

85


Scoring with LLM:  52%|█████▏    | 346/661 [3:22:56<2:52:01, 32.77s/it]

85


Scoring with LLM:  52%|█████▏    | 347/661 [3:23:23<2:42:52, 31.12s/it]

88


Scoring with LLM:  53%|█████▎    | 348/661 [3:24:00<2:50:56, 32.77s/it]

75


Scoring with LLM:  53%|█████▎    | 349/661 [3:24:31<2:47:12, 32.16s/it]

75


Scoring with LLM:  53%|█████▎    | 350/661 [3:24:48<2:23:58, 27.78s/it]

75


Scoring with LLM:  53%|█████▎    | 351/661 [3:25:11<2:16:16, 26.37s/it]

85


Scoring with LLM:  53%|█████▎    | 352/661 [3:25:39<2:18:09, 26.83s/it]

95


Scoring with LLM:  53%|█████▎    | 353/661 [3:26:03<2:13:32, 26.01s/it]

85


Scoring with LLM:  54%|█████▎    | 354/661 [3:26:30<2:14:38, 26.31s/it]

65


Scoring with LLM:  54%|█████▎    | 355/661 [3:26:52<2:07:01, 24.91s/it]

80


Scoring with LLM:  54%|█████▍    | 356/661 [3:27:12<1:58:19, 23.28s/it]

80


Scoring with LLM:  54%|█████▍    | 357/661 [3:27:44<2:12:08, 26.08s/it]

85


❌ Error at row 357, attempt 1: 'overall_score_100'


Scoring with LLM:  54%|█████▍    | 358/661 [3:28:49<3:10:28, 37.72s/it]

55


Scoring with LLM:  54%|█████▍    | 359/661 [3:29:23<3:03:45, 36.51s/it]

85


Scoring with LLM:  54%|█████▍    | 360/661 [3:29:42<2:37:20, 31.36s/it]

65


Scoring with LLM:  55%|█████▍    | 361/661 [3:30:21<2:48:32, 33.71s/it]

90


Scoring with LLM:  55%|█████▍    | 362/661 [3:30:52<2:44:01, 32.91s/it]

75


Scoring with LLM:  55%|█████▍    | 363/661 [3:31:10<2:20:43, 28.33s/it]

88


Scoring with LLM:  55%|█████▌    | 364/661 [3:31:28<2:04:35, 25.17s/it]

85


Scoring with LLM:  55%|█████▌    | 365/661 [3:31:48<1:56:29, 23.61s/it]

85


Scoring with LLM:  55%|█████▌    | 366/661 [3:32:24<2:15:00, 27.46s/it]

75


Scoring with LLM:  56%|█████▌    | 367/661 [3:32:46<2:06:01, 25.72s/it]

85


Scoring with LLM:  56%|█████▌    | 368/661 [3:33:19<2:16:03, 27.86s/it]

95


Scoring with LLM:  56%|█████▌    | 369/661 [3:33:40<2:05:20, 25.75s/it]

80


Scoring with LLM:  56%|█████▌    | 370/661 [3:34:09<2:10:56, 27.00s/it]

85


Scoring with LLM:  56%|█████▌    | 371/661 [3:34:27<1:56:22, 24.08s/it]

65


Scoring with LLM:  56%|█████▋    | 372/661 [3:35:01<2:10:21, 27.06s/it]

85


Scoring with LLM:  56%|█████▋    | 373/661 [3:35:21<1:59:29, 24.90s/it]

85


Scoring with LLM:  57%|█████▋    | 374/661 [3:35:44<1:56:25, 24.34s/it]

88


Scoring with LLM:  57%|█████▋    | 375/661 [3:36:25<2:20:39, 29.51s/it]

85


Scoring with LLM:  57%|█████▋    | 376/661 [3:37:00<2:27:43, 31.10s/it]

75


Scoring with LLM:  57%|█████▋    | 377/661 [3:37:28<2:22:26, 30.09s/it]

85


Scoring with LLM:  57%|█████▋    | 378/661 [3:37:50<2:10:43, 27.71s/it]

65


Scoring with LLM:  57%|█████▋    | 379/661 [3:38:15<2:05:55, 26.79s/it]

70


Scoring with LLM:  57%|█████▋    | 380/661 [3:38:54<2:23:53, 30.72s/it]

90


Scoring with LLM:  58%|█████▊    | 381/661 [3:39:15<2:08:55, 27.63s/it]

75


Scoring with LLM:  58%|█████▊    | 382/661 [3:39:43<2:08:41, 27.68s/it]

85


Scoring with LLM:  58%|█████▊    | 383/661 [3:40:14<2:13:18, 28.77s/it]

75


❌ Error at row 383, attempt 1: 'overall_score_100'


Scoring with LLM:  58%|█████▊    | 384/661 [3:41:04<2:42:59, 35.31s/it]

82


❌ Error at row 384, attempt 1: 'overall_score_100'


Scoring with LLM:  58%|█████▊    | 385/661 [3:42:12<3:26:45, 44.95s/it]

85


Scoring with LLM:  58%|█████▊    | 386/661 [3:42:46<3:11:19, 41.74s/it]

85


Scoring with LLM:  59%|█████▊    | 387/661 [3:43:03<2:36:01, 34.17s/it]

88


Scoring with LLM:  59%|█████▊    | 388/661 [3:43:23<2:16:15, 29.95s/it]

85


Scoring with LLM:  59%|█████▉    | 389/661 [3:43:59<2:24:18, 31.83s/it]

85


Scoring with LLM:  59%|█████▉    | 390/661 [3:44:33<2:26:48, 32.50s/it]

85


Scoring with LLM:  59%|█████▉    | 391/661 [3:45:06<2:26:32, 32.57s/it]

65


Scoring with LLM:  59%|█████▉    | 392/661 [3:45:28<2:12:35, 29.58s/it]

70


Scoring with LLM:  59%|█████▉    | 393/661 [3:45:53<2:05:13, 28.04s/it]

75


❌ Error at row 393, attempt 1: 'overall_score_100'


Scoring with LLM:  60%|█████▉    | 394/661 [3:46:56<2:51:25, 38.52s/it]

80


Scoring with LLM:  60%|█████▉    | 395/661 [3:47:22<2:33:48, 34.70s/it]

75


Scoring with LLM:  60%|█████▉    | 396/661 [3:48:09<2:50:23, 38.58s/it]

85


Scoring with LLM:  60%|██████    | 397/661 [3:48:46<2:46:44, 37.90s/it]

80


Scoring with LLM:  60%|██████    | 398/661 [3:49:16<2:36:38, 35.74s/it]

55


❌ Error at row 398, attempt 1: No JSON block found


Scoring with LLM:  60%|██████    | 399/661 [4:16:23<37:20:46, 513.16s/it]

85


Scoring with LLM:  61%|██████    | 400/661 [4:16:44<26:29:29, 365.40s/it]

85


Scoring with LLM:  61%|██████    | 401/661 [4:17:15<19:08:42, 265.09s/it]

85


Scoring with LLM:  61%|██████    | 402/661 [4:17:36<13:47:38, 191.73s/it]

85


Scoring with LLM:  61%|██████    | 403/661 [4:18:19<10:32:59, 147.21s/it]

95


Scoring with LLM:  61%|██████    | 404/661 [4:18:49<7:59:41, 111.99s/it] 

95


Scoring with LLM:  61%|██████▏   | 405/661 [4:19:10<6:01:41, 84.77s/it] 

85


Scoring with LLM:  61%|██████▏   | 406/661 [4:19:36<4:45:33, 67.19s/it]

85


Scoring with LLM:  62%|██████▏   | 407/661 [4:19:55<3:42:25, 52.54s/it]

55


Scoring with LLM:  62%|██████▏   | 408/661 [4:20:20<3:07:19, 44.43s/it]

75


Scoring with LLM:  62%|██████▏   | 409/661 [4:20:42<2:38:35, 37.76s/it]

88


Scoring with LLM:  62%|██████▏   | 410/661 [4:21:09<2:23:33, 34.32s/it]

100


Scoring with LLM:  62%|██████▏   | 411/661 [4:21:34<2:11:34, 31.58s/it]

85


Scoring with LLM:  62%|██████▏   | 412/661 [4:22:08<2:13:54, 32.27s/it]

95


Scoring with LLM:  62%|██████▏   | 413/661 [4:22:28<1:58:22, 28.64s/it]

85


Scoring with LLM:  63%|██████▎   | 414/661 [4:23:02<2:05:01, 30.37s/it]

75


Scoring with LLM:  63%|██████▎   | 415/661 [4:23:25<1:55:14, 28.11s/it]

95


Scoring with LLM:  63%|██████▎   | 416/661 [4:23:53<1:54:37, 28.07s/it]

85


Scoring with LLM:  63%|██████▎   | 417/661 [4:24:07<1:37:18, 23.93s/it]

85


Scoring with LLM:  63%|██████▎   | 418/661 [4:24:25<1:29:51, 22.19s/it]

75


Scoring with LLM:  63%|██████▎   | 419/661 [4:24:55<1:38:05, 24.32s/it]

95


Scoring with LLM:  64%|██████▎   | 420/661 [4:25:10<1:27:26, 21.77s/it]

85


Scoring with LLM:  64%|██████▎   | 421/661 [4:25:35<1:30:45, 22.69s/it]

90


Scoring with LLM:  64%|██████▍   | 422/661 [4:26:01<1:33:41, 23.52s/it]

95


Scoring with LLM:  64%|██████▍   | 423/661 [4:26:42<1:54:40, 28.91s/it]

75


Scoring with LLM:  64%|██████▍   | 424/661 [4:27:22<2:06:42, 32.08s/it]

85


Scoring with LLM:  64%|██████▍   | 425/661 [4:27:59<2:11:48, 33.51s/it]

70


Scoring with LLM:  64%|██████▍   | 426/661 [4:28:16<1:52:37, 28.75s/it]

95


Scoring with LLM:  65%|██████▍   | 427/661 [4:28:38<1:43:32, 26.55s/it]

75


Scoring with LLM:  65%|██████▍   | 428/661 [4:29:05<1:43:43, 26.71s/it]

65


Scoring with LLM:  65%|██████▍   | 429/661 [4:29:29<1:40:43, 26.05s/it]

95


Scoring with LLM:  65%|██████▌   | 430/661 [4:30:00<1:45:26, 27.39s/it]

95


Scoring with LLM:  65%|██████▌   | 431/661 [4:30:24<1:41:19, 26.43s/it]

88


Scoring with LLM:  65%|██████▌   | 432/661 [4:30:46<1:35:59, 25.15s/it]

85


Scoring with LLM:  66%|██████▌   | 433/661 [4:31:15<1:39:25, 26.16s/it]

95


Scoring with LLM:  66%|██████▌   | 434/661 [4:31:34<1:31:18, 24.13s/it]

85


Scoring with LLM:  66%|██████▌   | 435/661 [4:31:54<1:25:51, 22.79s/it]

85


Scoring with LLM:  66%|██████▌   | 436/661 [4:32:25<1:35:24, 25.44s/it]

90


Scoring with LLM:  66%|██████▌   | 437/661 [4:32:44<1:26:54, 23.28s/it]

70


Scoring with LLM:  66%|██████▋   | 438/661 [4:33:02<1:20:37, 21.69s/it]

95


Scoring with LLM:  66%|██████▋   | 439/661 [4:33:18<1:13:55, 19.98s/it]

85


Scoring with LLM:  67%|██████▋   | 440/661 [4:33:51<1:28:59, 24.16s/it]

45


Scoring with LLM:  67%|██████▋   | 441/661 [4:34:25<1:39:07, 27.03s/it]

90


Scoring with LLM:  67%|██████▋   | 442/661 [4:34:41<1:26:55, 23.81s/it]

95


Scoring with LLM:  67%|██████▋   | 443/661 [4:35:03<1:23:54, 23.09s/it]

95


Scoring with LLM:  67%|██████▋   | 444/661 [4:35:38<1:36:35, 26.71s/it]

92


Scoring with LLM:  67%|██████▋   | 445/661 [4:36:08<1:40:00, 27.78s/it]

75


❌ Error at row 445, attempt 1: 'overall_score_100'


Scoring with LLM:  67%|██████▋   | 446/661 [4:37:10<2:16:24, 38.07s/it]

85


Scoring with LLM:  68%|██████▊   | 447/661 [4:37:37<2:03:22, 34.59s/it]

55


Scoring with LLM:  68%|██████▊   | 448/661 [4:38:00<1:50:22, 31.09s/it]

85


❌ Error at row 448, attempt 1: No JSON block found


Scoring with LLM:  68%|██████▊   | 449/661 [4:39:43<3:06:05, 52.67s/it]

75


Scoring with LLM:  68%|██████▊   | 450/661 [4:40:15<2:43:21, 46.45s/it]

45


❌ Error at row 450, attempt 1: 'overall_score_100'


Scoring with LLM:  68%|██████▊   | 451/661 [4:41:33<3:16:25, 56.12s/it]

75


Scoring with LLM:  68%|██████▊   | 452/661 [4:41:57<2:41:00, 46.22s/it]

85


Scoring with LLM:  69%|██████▊   | 453/661 [4:42:27<2:23:27, 41.38s/it]

90


Scoring with LLM:  69%|██████▊   | 454/661 [4:42:40<1:53:35, 32.93s/it]

85


Scoring with LLM:  69%|██████▉   | 455/661 [4:42:53<1:32:52, 27.05s/it]

75


Scoring with LLM:  69%|██████▉   | 456/661 [4:43:18<1:29:48, 26.28s/it]

88


❌ Error at row 456, attempt 1: 'overall_score_100'


❌ Error at row 456, attempt 2: 'overall_score_100'


Scoring with LLM:  69%|██████▉   | 457/661 [4:44:39<2:25:58, 42.93s/it]

65


Scoring with LLM:  69%|██████▉   | 458/661 [4:44:59<2:01:00, 35.76s/it]

80


Scoring with LLM:  69%|██████▉   | 459/661 [4:45:18<1:44:20, 30.99s/it]

85


Scoring with LLM:  70%|██████▉   | 460/661 [4:45:55<1:49:18, 32.63s/it]

85


Scoring with LLM:  70%|██████▉   | 461/661 [4:46:34<1:55:02, 34.51s/it]

65


Scoring with LLM:  70%|██████▉   | 462/661 [4:47:05<1:50:53, 33.44s/it]

65


Scoring with LLM:  70%|███████   | 463/661 [4:47:25<1:37:39, 29.59s/it]

45


Scoring with LLM:  70%|███████   | 464/661 [4:47:50<1:32:27, 28.16s/it]

65


Scoring with LLM:  70%|███████   | 465/661 [4:48:19<1:32:38, 28.36s/it]

95


Scoring with LLM:  70%|███████   | 466/661 [4:48:39<1:24:14, 25.92s/it]

88


Scoring with LLM:  71%|███████   | 467/661 [4:49:01<1:19:23, 24.55s/it]

80


❌ Error at row 467, attempt 1: 'overall_score_100'


❌ Error at row 467, attempt 2: No JSON block found


Scoring with LLM:  71%|███████   | 468/661 [4:51:14<3:03:51, 57.16s/it]

75


Scoring with LLM:  71%|███████   | 469/661 [4:51:59<2:51:19, 53.54s/it]

85


Scoring with LLM:  71%|███████   | 470/661 [4:52:30<2:28:55, 46.78s/it]

85


Scoring with LLM:  71%|███████▏  | 471/661 [4:52:47<2:00:23, 38.02s/it]

65


Scoring with LLM:  71%|███████▏  | 472/661 [4:53:11<1:45:46, 33.58s/it]

75


Scoring with LLM:  72%|███████▏  | 473/661 [4:53:47<1:48:12, 34.53s/it]

75


Scoring with LLM:  72%|███████▏  | 474/661 [4:54:24<1:49:18, 35.07s/it]

80


Scoring with LLM:  72%|███████▏  | 475/661 [4:54:48<1:38:37, 31.81s/it]

85


Scoring with LLM:  72%|███████▏  | 476/661 [4:55:17<1:35:52, 31.09s/it]

92


Scoring with LLM:  72%|███████▏  | 477/661 [4:55:40<1:27:42, 28.60s/it]

80


Scoring with LLM:  72%|███████▏  | 478/661 [4:56:25<1:41:43, 33.35s/it]

95


❌ Error at row 478, attempt 1: 'overall_score_100'


Scoring with LLM:  72%|███████▏  | 479/661 [4:57:19<2:00:38, 39.77s/it]

85


Scoring with LLM:  73%|███████▎  | 480/661 [4:57:59<1:59:44, 39.69s/it]

95


Scoring with LLM:  73%|███████▎  | 481/661 [4:58:33<1:53:47, 37.93s/it]

88


Scoring with LLM:  73%|███████▎  | 482/661 [4:58:53<1:37:00, 32.52s/it]

80


Scoring with LLM:  73%|███████▎  | 483/661 [4:59:33<1:43:25, 34.86s/it]

75


Scoring with LLM:  73%|███████▎  | 484/661 [5:00:05<1:40:23, 34.03s/it]

95


Scoring with LLM:  73%|███████▎  | 485/661 [5:00:28<1:29:48, 30.61s/it]

92


Scoring with LLM:  74%|███████▎  | 486/661 [5:01:02<1:32:22, 31.67s/it]

85


Scoring with LLM:  74%|███████▎  | 487/661 [5:01:37<1:35:04, 32.79s/it]

65


Scoring with LLM:  74%|███████▍  | 488/661 [5:02:01<1:26:32, 30.01s/it]

80


Scoring with LLM:  74%|███████▍  | 489/661 [5:02:27<1:22:39, 28.83s/it]

85


❌ Error at row 489, attempt 1: No JSON block found


❌ Error at row 489, attempt 2: No JSON block found


Scoring with LLM:  74%|███████▍  | 490/661 [5:30:06<24:36:14, 517.98s/it]

95


Scoring with LLM:  74%|███████▍  | 491/661 [5:30:44<17:39:57, 374.10s/it]

95


Scoring with LLM:  74%|███████▍  | 492/661 [5:31:14<12:42:46, 270.81s/it]

95


❌ Error at row 492, attempt 1: No JSON block found


Scoring with LLM:  75%|███████▍  | 493/661 [5:36:54<13:36:16, 291.52s/it]

95


❌ Error at row 493, attempt 1: 'overall_score_100'


Scoring with LLM:  75%|███████▍  | 494/661 [5:37:43<10:09:07, 218.85s/it]

100


Scoring with LLM:  75%|███████▍  | 495/661 [5:38:09<7:25:00, 160.85s/it] 

95


Scoring with LLM:  75%|███████▌  | 496/661 [5:38:43<5:38:04, 122.94s/it]

85


Scoring with LLM:  75%|███████▌  | 497/661 [5:39:15<4:21:05, 95.52s/it] 

75


Scoring with LLM:  75%|███████▌  | 498/661 [5:39:43<3:24:25, 75.25s/it]

95


Scoring with LLM:  75%|███████▌  | 499/661 [5:40:15<2:48:34, 62.43s/it]

70


Scoring with LLM:  76%|███████▌  | 500/661 [5:40:35<2:13:11, 49.64s/it]

95


Scoring with LLM:  76%|███████▌  | 501/661 [5:41:10<2:00:49, 45.31s/it]

85


Scoring with LLM:  76%|███████▌  | 502/661 [5:41:35<1:43:22, 39.01s/it]

88


Scoring with LLM:  76%|███████▌  | 503/661 [5:42:02<1:33:45, 35.61s/it]

95


Scoring with LLM:  76%|███████▌  | 504/661 [5:42:26<1:23:56, 32.08s/it]

20


Scoring with LLM:  76%|███████▋  | 505/661 [5:43:10<1:32:29, 35.57s/it]

0


Scoring with LLM:  77%|███████▋  | 506/661 [5:43:49<1:34:46, 36.69s/it]

30


Scoring with LLM:  77%|███████▋  | 507/661 [5:44:15<1:25:32, 33.33s/it]

20


Scoring with LLM:  77%|███████▋  | 508/661 [5:44:45<1:22:20, 32.29s/it]

0


Scoring with LLM:  77%|███████▋  | 509/661 [5:45:13<1:18:33, 31.01s/it]

10


Scoring with LLM:  77%|███████▋  | 510/661 [5:45:41<1:16:14, 30.30s/it]

85


Scoring with LLM:  77%|███████▋  | 511/661 [5:46:12<1:16:15, 30.50s/it]

88


Scoring with LLM:  77%|███████▋  | 512/661 [5:46:37<1:11:09, 28.65s/it]

0


Scoring with LLM:  78%|███████▊  | 513/661 [5:47:04<1:10:05, 28.41s/it]

20


Scoring with LLM:  78%|███████▊  | 514/661 [5:47:31<1:08:23, 27.91s/it]

20


Scoring with LLM:  78%|███████▊  | 515/661 [5:48:05<1:12:04, 29.62s/it]

0


Scoring with LLM:  78%|███████▊  | 516/661 [5:48:22<1:02:29, 25.86s/it]

0


Scoring with LLM:  78%|███████▊  | 517/661 [5:49:05<1:14:11, 30.91s/it]

0


Scoring with LLM:  78%|███████▊  | 518/661 [5:49:33<1:11:58, 30.20s/it]

95


Scoring with LLM:  79%|███████▊  | 519/661 [5:49:56<1:06:06, 27.93s/it]

95


Scoring with LLM:  79%|███████▊  | 520/661 [5:50:22<1:04:48, 27.58s/it]

100


Scoring with LLM:  79%|███████▉  | 521/661 [5:50:57<1:09:14, 29.68s/it]

85


Scoring with LLM:  79%|███████▉  | 522/661 [5:51:29<1:10:29, 30.43s/it]

75


Scoring with LLM:  79%|███████▉  | 523/661 [5:51:50<1:03:00, 27.39s/it]

65


Scoring with LLM:  79%|███████▉  | 524/661 [5:52:18<1:03:31, 27.82s/it]

95


❌ Error at row 524, attempt 1: 'overall_score_100'


Scoring with LLM:  79%|███████▉  | 525/661 [5:53:23<1:28:24, 39.01s/it]

100


Scoring with LLM:  80%|███████▉  | 526/661 [5:53:51<1:20:12, 35.65s/it]

75


Scoring with LLM:  80%|███████▉  | 527/661 [5:54:14<1:10:50, 31.72s/it]

80


Scoring with LLM:  80%|███████▉  | 528/661 [5:55:09<1:25:59, 38.79s/it]

95


Scoring with LLM:  80%|████████  | 529/661 [5:55:42<1:21:21, 36.98s/it]

45


Scoring with LLM:  80%|████████  | 530/661 [5:56:01<1:09:10, 31.68s/it]

75


Scoring with LLM:  80%|████████  | 531/661 [5:56:35<1:09:45, 32.19s/it]

35


❌ Error at row 531, attempt 1: 'overall_score_100'


❌ Error at row 531, attempt 2: 'overall_score_100'


Scoring with LLM:  80%|████████  | 532/661 [5:58:10<1:50:10, 51.25s/it]

10


Scoring with LLM:  81%|████████  | 533/661 [5:58:33<1:30:49, 42.57s/it]

10


Scoring with LLM:  81%|████████  | 534/661 [5:59:04<1:23:10, 39.30s/it]

65


Scoring with LLM:  81%|████████  | 535/661 [5:59:36<1:17:45, 37.03s/it]

45


Scoring with LLM:  81%|████████  | 536/661 [6:00:06<1:12:27, 34.78s/it]

80


Scoring with LLM:  81%|████████  | 537/661 [6:00:24<1:01:33, 29.79s/it]

85


Scoring with LLM:  81%|████████▏ | 538/661 [6:00:59<1:04:20, 31.39s/it]

95


❌ Error at row 538, attempt 1: 'overall_score_100'


❌ Error at row 538, attempt 2: 'overall_score_100'


Scoring with LLM:  82%|████████▏ | 539/661 [6:02:08<1:27:09, 42.87s/it]

55


Scoring with LLM:  82%|████████▏ | 540/661 [6:02:31<1:13:50, 36.62s/it]

85


Scoring with LLM:  82%|████████▏ | 541/661 [6:03:01<1:09:16, 34.64s/it]

35


❌ Error at row 541, attempt 1: 'overall_score_100'


Scoring with LLM:  82%|████████▏ | 542/661 [6:04:28<1:40:01, 50.43s/it]

20


Scoring with LLM:  82%|████████▏ | 543/661 [6:05:08<1:33:24, 47.49s/it]

85


Scoring with LLM:  82%|████████▏ | 544/661 [6:05:56<1:32:29, 47.43s/it]

20


Scoring with LLM:  82%|████████▏ | 545/661 [6:06:47<1:33:51, 48.55s/it]

20


Scoring with LLM:  83%|████████▎ | 546/661 [6:07:29<1:29:20, 46.61s/it]

88


❌ Error at row 546, attempt 1: 'overall_score_100'


Scoring with LLM:  83%|████████▎ | 547/661 [6:08:54<1:50:30, 58.16s/it]

65


❌ Error at row 547, attempt 1: 'overall_score_100'


Scoring with LLM:  83%|████████▎ | 548/661 [6:10:06<1:57:01, 62.14s/it]

20


Scoring with LLM:  83%|████████▎ | 549/661 [6:10:53<1:47:44, 57.72s/it]

80


Scoring with LLM:  83%|████████▎ | 550/661 [6:11:34<1:37:29, 52.70s/it]

30


Scoring with LLM:  83%|████████▎ | 551/661 [6:12:11<1:28:03, 48.04s/it]

85


Scoring with LLM:  84%|████████▎ | 552/661 [6:12:46<1:19:51, 43.96s/it]

70


❌ Error at row 552, attempt 1: 'overall_score_100'


❌ Error at row 552, attempt 2: 'overall_score_100'


❌ Error at row 552, attempt 3: 'overall_score_100'


Scoring with LLM:  84%|████████▎ | 553/661 [6:15:43<2:31:18, 84.06s/it]

88


Scoring with LLM:  84%|████████▍ | 554/661 [6:16:20<2:04:41, 69.92s/it]

20


Scoring with LLM:  84%|████████▍ | 555/661 [6:16:58<1:46:50, 60.48s/it]

20


❌ Error at row 555, attempt 1: 'overall_score_100'


Scoring with LLM:  84%|████████▍ | 556/661 [6:18:11<1:52:10, 64.10s/it]

82


Scoring with LLM:  84%|████████▍ | 557/661 [6:18:56<1:41:21, 58.48s/it]

65


Scoring with LLM:  84%|████████▍ | 558/661 [6:19:34<1:29:34, 52.18s/it]

85


❌ Error at row 558, attempt 1: 'overall_score_100'


❌ Error at row 558, attempt 2: 'overall_score_100'


❌ Error at row 558, attempt 3: 'overall_score_100'


Scoring with LLM:  85%|████████▍ | 559/661 [6:21:57<2:15:06, 79.47s/it]

85


Scoring with LLM:  85%|████████▍ | 560/661 [6:22:42<1:56:32, 69.23s/it]

100


❌ Error at row 560, attempt 1: 'overall_score_100'


Scoring with LLM:  85%|████████▍ | 561/661 [6:24:21<2:10:09, 78.09s/it]

60


❌ Error at row 561, attempt 1: 'overall_score_100'


Scoring with LLM:  85%|████████▌ | 562/661 [6:25:56<2:16:56, 83.00s/it]

40


Scoring with LLM:  85%|████████▌ | 563/661 [6:26:37<1:55:15, 70.56s/it]

30


Scoring with LLM:  85%|████████▌ | 564/661 [6:27:09<1:35:24, 59.01s/it]

20


Scoring with LLM:  85%|████████▌ | 565/661 [6:27:53<1:27:07, 54.46s/it]

80


Scoring with LLM:  86%|████████▌ | 566/661 [6:28:30<1:17:50, 49.17s/it]

80


Scoring with LLM:  86%|████████▌ | 567/661 [6:28:49<1:02:51, 40.12s/it]

70


Scoring with LLM:  86%|████████▌ | 568/661 [6:29:34<1:04:29, 41.60s/it]

40


Scoring with LLM:  86%|████████▌ | 569/661 [6:29:57<55:21, 36.11s/it]  

15


❌ Error at row 569, attempt 1: 'overall_score_100'


Scoring with LLM:  86%|████████▌ | 570/661 [6:30:53<1:03:53, 42.12s/it]

25


Scoring with LLM:  86%|████████▋ | 571/661 [6:31:20<56:18, 37.54s/it]  

65


Scoring with LLM:  87%|████████▋ | 572/661 [6:31:53<53:21, 35.97s/it]

100


Scoring with LLM:  87%|████████▋ | 573/661 [6:32:14<46:31, 31.72s/it]

95


Scoring with LLM:  87%|████████▋ | 574/661 [6:32:47<46:25, 32.02s/it]

85


Scoring with LLM:  87%|████████▋ | 575/661 [6:33:14<43:54, 30.63s/it]

85


Scoring with LLM:  87%|████████▋ | 576/661 [6:33:51<45:41, 32.26s/it]

95


Scoring with LLM:  87%|████████▋ | 577/661 [6:34:18<43:10, 30.84s/it]

85


Scoring with LLM:  87%|████████▋ | 578/661 [6:34:41<39:23, 28.47s/it]

75


Scoring with LLM:  88%|████████▊ | 579/661 [6:35:14<40:54, 29.93s/it]

95


Scoring with LLM:  88%|████████▊ | 580/661 [6:35:38<37:42, 27.93s/it]

85


Scoring with LLM:  88%|████████▊ | 581/661 [6:36:10<39:03, 29.30s/it]

88


Scoring with LLM:  88%|████████▊ | 582/661 [6:36:34<36:30, 27.72s/it]

20


Scoring with LLM:  88%|████████▊ | 583/661 [6:37:23<44:11, 34.00s/it]

90


Scoring with LLM:  88%|████████▊ | 584/661 [6:37:51<41:31, 32.36s/it]

50


Scoring with LLM:  89%|████████▊ | 585/661 [6:38:18<38:42, 30.56s/it]

65


Scoring with LLM:  89%|████████▊ | 586/661 [6:38:46<37:17, 29.84s/it]

95


Scoring with LLM:  89%|████████▉ | 587/661 [6:39:02<31:50, 25.82s/it]

85


Scoring with LLM:  89%|████████▉ | 588/661 [6:39:21<28:56, 23.79s/it]

95


Scoring with LLM:  89%|████████▉ | 589/661 [6:39:52<31:12, 26.00s/it]

85


Scoring with LLM:  89%|████████▉ | 590/661 [6:40:18<30:28, 25.76s/it]

75


Scoring with LLM:  89%|████████▉ | 591/661 [6:40:53<33:25, 28.66s/it]

90


❌ Error at row 591, attempt 1: 'overall_score_100'


Scoring with LLM:  90%|████████▉ | 592/661 [6:41:51<43:02, 37.43s/it]

85


Scoring with LLM:  90%|████████▉ | 593/661 [6:42:19<39:06, 34.51s/it]

65


❌ Error at row 593, attempt 1: No JSON block found


Scoring with LLM:  90%|████████▉ | 594/661 [7:03:13<7:27:17, 400.57s/it]

95


Scoring with LLM:  90%|█████████ | 595/661 [7:03:39<5:16:50, 288.03s/it]

95


Scoring with LLM:  90%|█████████ | 596/661 [7:04:02<3:46:01, 208.63s/it]

85


❌ Error at row 596, attempt 1: 'overall_score_100'


Scoring with LLM:  90%|█████████ | 597/661 [7:04:43<2:48:52, 158.32s/it]

95


Scoring with LLM:  90%|█████████ | 598/661 [7:05:15<2:06:19, 120.30s/it]

85


Scoring with LLM:  91%|█████████ | 599/661 [7:05:40<1:34:58, 91.91s/it] 

95


❌ Error at row 599, attempt 1: 'overall_score_100'


❌ Error at row 599, attempt 2: 'overall_score_100'


Scoring with LLM:  91%|█████████ | 600/661 [7:07:26<1:37:44, 96.13s/it]

45


Scoring with LLM:  91%|█████████ | 601/661 [7:07:46<1:13:15, 73.26s/it]

45


❌ Error at row 601, attempt 1: 'overall_score_100'


Scoring with LLM:  91%|█████████ | 602/661 [7:08:47<1:08:14, 69.40s/it]

80


❌ Error at row 602, attempt 1: 'overall_score_100'


Scoring with LLM:  91%|█████████ | 603/661 [7:09:48<1:04:50, 67.08s/it]

100


❌ Error at row 603, attempt 1: 'overall_score_100'


Scoring with LLM:  91%|█████████▏| 604/661 [7:10:41<59:29, 62.63s/it]  

88


Scoring with LLM:  92%|█████████▏| 605/661 [7:11:03<47:18, 50.69s/it]

88


Scoring with LLM:  92%|█████████▏| 606/661 [7:11:38<42:02, 45.87s/it]

75


Scoring with LLM:  92%|█████████▏| 607/661 [7:12:01<35:00, 38.89s/it]

90


Scoring with LLM:  92%|█████████▏| 608/661 [7:12:24<30:13, 34.22s/it]

85


Scoring with LLM:  92%|█████████▏| 609/661 [7:12:50<27:25, 31.64s/it]

90


Scoring with LLM:  92%|█████████▏| 610/661 [7:13:15<25:19, 29.80s/it]

35


Scoring with LLM:  92%|█████████▏| 611/661 [7:13:46<25:09, 30.19s/it]

45


Scoring with LLM:  93%|█████████▎| 612/661 [7:14:05<21:55, 26.85s/it]

95


Scoring with LLM:  93%|█████████▎| 613/661 [7:14:45<24:30, 30.63s/it]

75


Scoring with LLM:  93%|█████████▎| 614/661 [7:15:13<23:32, 30.06s/it]

45


Scoring with LLM:  93%|█████████▎| 615/661 [7:15:38<21:43, 28.33s/it]

15


Scoring with LLM:  93%|█████████▎| 616/661 [7:16:01<20:11, 26.92s/it]

20


Scoring with LLM:  93%|█████████▎| 617/661 [7:16:25<19:05, 26.04s/it]

80


Scoring with LLM:  93%|█████████▎| 618/661 [7:16:54<19:17, 26.93s/it]

65


Scoring with LLM:  94%|█████████▎| 619/661 [7:17:23<19:10, 27.38s/it]

85


Scoring with LLM:  94%|█████████▍| 620/661 [7:17:55<19:40, 28.78s/it]

10


Scoring with LLM:  94%|█████████▍| 621/661 [7:18:20<18:25, 27.64s/it]

60


Scoring with LLM:  94%|█████████▍| 622/661 [7:18:43<17:00, 26.17s/it]

75


❌ Error at row 622, attempt 1: 'overall_score_100'


Scoring with LLM:  94%|█████████▍| 623/661 [7:19:38<22:10, 35.02s/it]

10


Scoring with LLM:  94%|█████████▍| 624/661 [7:20:05<20:03, 32.53s/it]

88


Scoring with LLM:  95%|█████████▍| 625/661 [7:20:33<18:45, 31.26s/it]

85


Scoring with LLM:  95%|█████████▍| 626/661 [7:20:59<17:15, 29.59s/it]

85


Scoring with LLM:  95%|█████████▍| 627/661 [7:21:18<14:58, 26.43s/it]

75


❌ Error at row 627, attempt 1: 'overall_score_100'


Scoring with LLM:  95%|█████████▌| 628/661 [7:22:36<23:04, 41.95s/it]

25


Scoring with LLM:  95%|█████████▌| 629/661 [7:22:58<19:12, 36.03s/it]

90


Scoring with LLM:  95%|█████████▌| 630/661 [7:23:45<20:16, 39.23s/it]

20


Scoring with LLM:  95%|█████████▌| 631/661 [7:24:07<17:02, 34.07s/it]

65


Scoring with LLM:  96%|█████████▌| 632/661 [7:24:40<16:21, 33.85s/it]

85


Scoring with LLM:  96%|█████████▌| 633/661 [7:25:00<13:51, 29.69s/it]

85


Scoring with LLM:  96%|█████████▌| 634/661 [7:25:30<13:17, 29.52s/it]

85


Scoring with LLM:  96%|█████████▌| 635/661 [7:25:47<11:17, 26.05s/it]

82


Scoring with LLM:  96%|█████████▌| 636/661 [7:26:18<11:26, 27.46s/it]

95


Scoring with LLM:  96%|█████████▋| 637/661 [7:26:49<11:22, 28.42s/it]

75


Scoring with LLM:  97%|█████████▋| 638/661 [7:27:22<11:23, 29.71s/it]

65


Scoring with LLM:  97%|█████████▋| 639/661 [7:27:47<10:22, 28.29s/it]

85


Scoring with LLM:  97%|█████████▋| 640/661 [7:28:17<10:04, 28.79s/it]

95


Scoring with LLM:  97%|█████████▋| 641/661 [7:28:35<08:33, 25.65s/it]

85


Scoring with LLM:  97%|█████████▋| 642/661 [7:29:02<08:18, 26.22s/it]

85


Scoring with LLM:  97%|█████████▋| 643/661 [7:29:32<08:12, 27.37s/it]

85


Scoring with LLM:  97%|█████████▋| 644/661 [7:30:03<08:01, 28.33s/it]

75


❌ Error at row 644, attempt 1: 'overall_score_100'


Scoring with LLM:  98%|█████████▊| 645/661 [7:31:32<12:25, 46.62s/it]

85


Scoring with LLM:  98%|█████████▊| 646/661 [7:31:58<10:03, 40.25s/it]

85


Scoring with LLM:  98%|█████████▊| 647/661 [7:32:22<08:16, 35.44s/it]

85


Scoring with LLM:  98%|█████████▊| 648/661 [7:32:55<07:29, 34.60s/it]

85


Scoring with LLM:  98%|█████████▊| 649/661 [7:33:17<06:10, 30.84s/it]

75


Scoring with LLM:  98%|█████████▊| 650/661 [7:33:42<05:20, 29.17s/it]

85


Scoring with LLM:  98%|█████████▊| 651/661 [7:34:07<04:39, 27.95s/it]

88


Scoring with LLM:  99%|█████████▊| 652/661 [7:34:29<03:54, 26.04s/it]

85


Scoring with LLM:  99%|█████████▉| 653/661 [7:34:53<03:23, 25.47s/it]

85


Scoring with LLM:  99%|█████████▉| 654/661 [7:35:22<03:06, 26.57s/it]

65


Scoring with LLM:  99%|█████████▉| 655/661 [7:35:51<02:44, 27.47s/it]

85


Scoring with LLM:  99%|█████████▉| 656/661 [7:36:15<02:12, 26.42s/it]

85


Scoring with LLM:  99%|█████████▉| 657/661 [7:36:45<01:49, 27.41s/it]

85


Scoring with LLM: 100%|█████████▉| 658/661 [7:37:04<01:14, 24.89s/it]

80


Scoring with LLM: 100%|█████████▉| 659/661 [7:37:34<00:52, 26.30s/it]

65


Scoring with LLM: 100%|█████████▉| 660/661 [7:37:56<00:25, 25.11s/it]

65


❌ Error at row 660, attempt 1: 'overall_score_100'


Scoring with LLM: 100%|██████████| 661/661 [7:39:15<00:00, 41.31s/it]

Scoring with LLM: 100%|██████████| 661/661 [7:39:15<00:00, 41.69s/it]

85





In [3]:
df.to_json('/home/ali/Review_Quality_Benchmark/data/processed/HA_ALL_qwen.json', orient='records')

In [4]:
df

Unnamed: 0,paper_id,title,abstract,review_text,authors,reviewer,review_date,review_rating,review_confidence,review_soundness,...,llm_Fairness,llm_Actionability,llm_Constructiveness,llm_Relevance Alignment,llm_Clarity and Readability,llm_Usage of Technical Terms,llm_Factuality,llm_Overall Quality,llm_Sentiment Polarity,llm_Politeness
0,123,Navigating Text-To-Image Customization: From L...,Text-to-image generative models have garnered ...,**Summary:** \nThis paper presents an open-sou...,"['~SHIH-YING_YEH1', '~Yu-Guan_Hsieh1', '~Zhido...",Reviewer_EGJf,1701662567826,6.0,3.0,3.0,...,3,4,3,3,5,3,partially factual,55,neutral,polite
1,123,Navigating Text-To-Image Customization: From L...,Text-to-image generative models have garnered ...,This paper proposes a comprehensive library fo...,"['~SHIH-YING_YEH1', '~Yu-Guan_Hsieh1', '~Zhido...",Reviewer_DWom,1699636125239,6.0,3.0,3.0,...,4,4,4,5,4,4,5,75,neutral,4
2,123,Navigating Text-To-Image Customization: From L...,Text-to-image generative models have garnered ...,"This author introduces LyCORIS, an open source...","['~SHIH-YING_YEH1', '~Yu-Guan_Hsieh1', '~Zhido...",Reviewer_PnHf,1699636125143,6.0,4.0,4.0,...,4,4,4,5,4,5,partially factual,75,positive,polite
3,123,Navigating Text-To-Image Customization: From L...,Text-to-image generative models have garnered ...,"The authors propose LyCORIS, an open-source li...","['~SHIH-YING_YEH1', '~Yu-Guan_Hsieh1', '~Zhido...",Reviewer_ekPo,1699636125075,8.0,4.0,3.0,...,5,5,5,5,4,5,partially factual,85,neutral,polite
4,0,$\nu$-ensembles: Improving deep ensemble calib...,We present a method to improve the calibration...,"This paper introduces ν-ensembles, a novel dee...","['~Konstantinos_Pitas1', '~Julyan_Arbel1']",Reviewer_HFRa,1699636992453,3.0,4.0,2.0,...,5,5,5,5,5,5,factual,92,neutral,polite
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
656,118,Module Extraction for Efficient Object Query o...,The extraction of logically-independent fragme...,The submission addresses the problem of partit...,,Anonymous,03/May/2014,,,,...,4,4,3,5,4,5,partially factual,85,negative,polite
657,61,EARTh: an Environmental Application Reference ...,The paper aims at providing a description of E...,This revision addresses my concerns. I am part...,,Natasha Noy,22/Jul/2013,,,,...,5,2,3,5,5,5,factual,80,positive,polite
658,76,Facilitating Data Discovery by Connecting Rela...,"In this study, we investigate two approaches t...",The paper presents and compares RDF/XML (in th...,,Anonymous,15/Jun/2013,,,,...,4,3,3,4,4,5,factual,65,negative,polite
659,76,Facilitating Data Discovery by Connecting Rela...,"In this study, we investigate two approaches t...",This paper investigates two different approach...,,Ghislain Hachey,17/Jun/2013,,,,...,4,4,4,5,3,5,partially factual,65,neutral,polite


In [5]:
# Filter the first 50 rows and check for rows without null values in "llm_" columns
non_null_count = df.iloc[:][[col for col in df.columns if col.startswith("llm_")]].dropna().shape[0]
print(non_null_count)
print(df.shape)

241
(661, 58)
