In [4]:
!pip install ace_tools

Collecting ace_tools
  Downloading ace_tools-0.0-py3-none-any.whl.metadata (300 bytes)
Downloading ace_tools-0.0-py3-none-any.whl (1.1 kB)
Installing collected packages: ace_tools
Successfully installed ace_tools-0.0


In [16]:
# Устанавливаем библиотеку для красивого вывода (если ещё не установлено)
!pip install tabulate

import json
import pandas as pd
from tabulate import tabulate



In [18]:
# 1. Загружаем JSON-файл
with open('combined_sample50_judge.json', 'r', encoding='utf-8') as f:
    data = json.load(f)

In [20]:
# 2. Преобразуем в DataFrame
df = pd.json_normalize(data)

In [24]:
criteria = ['clarity', 'direct_address', 'bureaucratic_words', 'structure', 'brevity']

# Правильный способ получить список моделей
models = sorted({col.split('_', 1)[0] for col in df.columns if '_' in col})

rows = []
for m in models:
    row = {'model': m}
    for crit in criteria:
        col = f"{m}_{crit}"
        if col in df.columns:
            row[crit] = df[col].mean()
        else:
            row[crit] = float('nan')
    row['overall'] = sum(v for v in row.values() if isinstance(v, (int, float)))
    rows.append(row)

summary_df = pd.DataFrame(rows).sort_values('overall', ascending=False)
print(summary_df)

# 5. Выводим в консоль красиво
print(tabulate(summary_df, headers='keys', tablefmt='grid', floatfmt=".2f", showindex=False))

# 6. Сохраняем результат
summary_df.to_csv('judge_stats_summary.csv', index=False)


         model  clarity  direct_address  bureaucratic_words  structure  \
0  deepseek-v3      5.0            2.78                 5.0       4.84   
1        llama      NaN             NaN                 NaN        NaN   
2      mistral      NaN             NaN                 NaN        NaN   
3         qwen      NaN             NaN                 NaN        NaN   

   brevity  overall  
0     4.98     22.6  
1      NaN      NaN  
2      NaN      NaN  
3      NaN      NaN  
+-------------+-----------+------------------+----------------------+-------------+-----------+-----------+
| model       |   clarity |   direct_address |   bureaucratic_words |   structure |   brevity |   overall |
| deepseek-v3 |      5.00 |             2.78 |                 5.00 |        4.84 |      4.98 |     22.60 |
+-------------+-----------+------------------+----------------------+-------------+-----------+-----------+
| llama       |    nan    |           nan    |               nan    |      nan    |    

In [12]:
print(df.columns.tolist())

['llama_finetuned_clarity', 'llama_finetuned_direct_address', 'llama_finetuned_bureaucratic_words', 'llama_finetuned_structure', 'llama_finetuned_brevity', 'mistral_instruct_clarity', 'mistral_instruct_direct_address', 'mistral_instruct_bureaucratic_words', 'mistral_instruct_structure', 'mistral_instruct_brevity', 'qwen_finetuned_clarity', 'qwen_finetuned_direct_address', 'qwen_finetuned_bureaucratic_words', 'qwen_finetuned_structure', 'qwen_finetuned_brevity', 'deepseek-v3_clarity', 'deepseek-v3_direct_address', 'deepseek-v3_bureaucratic_words', 'deepseek-v3_structure', 'deepseek-v3_brevity']


In [22]:
print(models)     # ['deepseek-v3', …]
print(criteria)   # ['clarity', 'direct_address', 'bureaucratic_words', …]
print(df.columns.tolist())

['deepseek-v3', 'deepseek-v3_bureaucratic', 'deepseek-v3_direct', 'llama_finetuned', 'llama_finetuned_bureaucratic', 'llama_finetuned_direct', 'mistral_instruct', 'mistral_instruct_bureaucratic', 'mistral_instruct_direct', 'qwen_finetuned', 'qwen_finetuned_bureaucratic', 'qwen_finetuned_direct']
['clarity', 'direct_address', 'bureaucratic_words', 'structure', 'brevity']
['llama_finetuned_clarity', 'llama_finetuned_direct_address', 'llama_finetuned_bureaucratic_words', 'llama_finetuned_structure', 'llama_finetuned_brevity', 'mistral_instruct_clarity', 'mistral_instruct_direct_address', 'mistral_instruct_bureaucratic_words', 'mistral_instruct_structure', 'mistral_instruct_brevity', 'qwen_finetuned_clarity', 'qwen_finetuned_direct_address', 'qwen_finetuned_bureaucratic_words', 'qwen_finetuned_structure', 'qwen_finetuned_brevity', 'deepseek-v3_clarity', 'deepseek-v3_direct_address', 'deepseek-v3_bureaucratic_words', 'deepseek-v3_structure', 'deepseek-v3_brevity']


In [28]:
import json
import pandas as pd

# 1. Загружаем JSON-файл
with open('combined_sample50_judge.json', 'r', encoding='utf-8') as f:
    data = json.load(f)

# 2. Преобразуем в DataFrame
df = pd.json_normalize(data)

# 3. Явно задаём список моделей и критериев
models = [
    'llama_finetuned',
    'mistral_instruct',
    'qwen_finetuned',
    'deepseek-v3'
]
criteria = [
    'clarity',
    'direct_address',
    'bureaucratic_words',
    'structure',
    'brevity'
]

# 4. Считаем средний балл по критериям и общий балл
rows = []
for m in models:
    row = {'model': m}
    for crit in criteria:
        col_name = f"{m}_{crit}"
        # Проверяем, что такая колонка есть
        if col_name in df.columns:
            row[crit] = df[col_name].mean()
        else:
            row[crit] = float('nan')
    # Общий балл
    row['overall'] = sum(row[crit] for crit in criteria if pd.notna(row[crit]))
    rows.append(row)

summary_df = pd.DataFrame(rows).sort_values('overall', ascending=False)

# 5. Сохраняем и выводим результат
summary_df.to_csv('judge_stats_summary_explicit.csv', index=False)
summary_df


Unnamed: 0,model,clarity,direct_address,bureaucratic_words,structure,brevity,overall
3,deepseek-v3,5.0,2.78,5.0,4.84,4.98,22.6
2,qwen_finetuned,4.22,3.48,4.2,4.34,3.8,20.04
1,mistral_instruct,3.44,2.64,3.26,3.52,3.58,16.44
0,llama_finetuned,3.14,2.38,3.38,2.86,3.24,15.0


In [32]:
import json

# 1. Читаем оба JSON-файла
with open('combined_sample50_judge_1.json', 'r', encoding='utf-8') as f:
    data1 = json.load(f)

with open('combined_sample50_judge_round2.json', 'r', encoding='utf-8') as f:
    data2 = json.load(f)

# 2. Объединяем списки записей
#    Предполагается, что оба файла содержат JSON-массивы (lists)
combined = data1 + data2

# 3. Сохраняем в новый файл
with open('combined_sample50_judge_merged.json', 'w', encoding='utf-8') as f:
    json.dump(combined, f, ensure_ascii=False, indent=2)

print("Объединённый файл сохранён как combined_sample50_judge_merged.json")


Объединённый файл сохранён как combined_sample50_judge_merged.json


In [34]:
import json
import pandas as pd

# 1. Загружаем JSON-файл
with open('combined_sample50_judge_merged.json', 'r', encoding='utf-8') as f:
    data = json.load(f)

# 2. Преобразуем в DataFrame
df = pd.json_normalize(data)

# 3. Явно задаём список моделей и критериев
models = [
    'llama_finetuned',
    'mistral_instruct',
    'qwen_finetuned',
    'deepseek-v3'
]
criteria = [
    'clarity',
    'direct_address',
    'bureaucratic_words',
    'structure',
    'brevity'
]

# 4. Считаем средний балл по критериям и общий балл
rows = []
for m in models:
    row = {'model': m}
    for crit in criteria:
        col_name = f"{m}_{crit}"
        # Проверяем, что такая колонка есть
        if col_name in df.columns:
            row[crit] = df[col_name].mean()
        else:
            row[crit] = float('nan')
    # Общий балл
    row['overall'] = sum(row[crit] for crit in criteria if pd.notna(row[crit]))
    rows.append(row)

summary_df = pd.DataFrame(rows).sort_values('overall', ascending=False)

# 5. Сохраняем и выводим результат
summary_df.to_csv('judge_stats_summary_explicit.csv', index=False)
summary_df

Unnamed: 0,model,clarity,direct_address,bureaucratic_words,structure,brevity,overall
3,deepseek-v3,4.99,2.95,4.98,4.83,4.99,22.74
2,qwen_finetuned,4.15,3.58,4.18,4.36,3.88,20.15
1,mistral_instruct,3.45,2.69,3.28,3.51,3.56,16.49
0,llama_finetuned,3.1,2.53,3.31,2.93,3.23,15.1
