In [2]:
import pandas as pd
import json
import os
from pathlib import Path

# Get all files from outputs folder
outputs_folder = 'outputs'
files = [f for f in os.listdir(outputs_folder) if f.endswith('.json')]

# Parse JSON files and extract metrics
data = []
for filename in files:
    filepath = os.path.join(outputs_folder, filename)
    
    try:
        with open(filepath, 'r') as f:
            json_data = json.load(f)
        
        # Extract metadata
        metadata = json_data.get('metadata', {})
        grader = metadata.get('grader_name', 'Unknown')
        doc = metadata.get('document_name', 'Unknown')
        
        # Extract results - each metric with its rating, evidence, notes
        results = json_data.get('results', {})
        for metric, metric_data in results.items():
            data.append({
                'doc': doc,
                'grader': grader,
                'metric': metric,
                'rating': metric_data.get('rating', None),
                # 'evidence': metric_data.get('evidence', ''),
                'notes': metric_data.get('notes', '')
            })
    except json.JSONDecodeError:
        print(f"Error parsing {filename}")
        continue

# Create summary table
summary_df = pd.DataFrame(data)
print(summary_df)


      doc grader                       metric  rating notes
0   sdsds     VJ      Definition and Examples       4      
1   sdsds     VJ                 Permited Use       2      
2   sdsds     VJ               Prohibited Use       3      
3   sdsds     VJ           Academic Integrity       3      
4   sdsds     VJ                 Transparency       1      
..    ...    ...                          ...     ...   ...
65   sdsd   sdsd               Accountability       2      
66   sdsd   sdsd                        Risks       1      
67   sdsd   sdsd  Copyright and Data Privacy        4      
68   sdsd   sdsd       Training and Resources       3      
69   sdsd   sdsd                Policy Review       3      

[70 rows x 5 columns]


In [3]:
# Pivot the dataframe to get grader ratings as separate columns
summary_df = summary_df.pivot_table(
    index=['doc', 'metric'],
    columns='grader',
    values='rating',
    aggfunc='first'
).reset_index()

# Rename columns to be clearer
summary_df.columns.name = None
print(summary_df)


      doc                       metric   VJ  sdsd
0    sdsd           Academic Integrity  NaN   1.0
1    sdsd               Accountability  NaN   2.0
2    sdsd  Copyright and Data Privacy   NaN   4.0
3    sdsd      Definition and Examples  NaN   0.0
4    sdsd                 Permited Use  NaN   4.0
5    sdsd                Policy Review  NaN   3.0
6    sdsd               Prohibited Use  NaN   3.0
7    sdsd                        Risks  NaN   1.0
8    sdsd       Training and Resources  NaN   3.0
9    sdsd                 Transparency  NaN   2.0
10  sdsds           Academic Integrity  3.0   NaN
11  sdsds               Accountability  4.0   NaN
12  sdsds  Copyright and Data Privacy   4.0   NaN
13  sdsds      Definition and Examples  4.0   NaN
14  sdsds                 Permited Use  2.0   NaN
15  sdsds                Policy Review  2.0   NaN
16  sdsds               Prohibited Use  3.0   NaN
17  sdsds                        Risks  4.0   NaN
18  sdsds       Training and Resources  3.0   NaN
