In [None]:
from openai import OpenAI
from dotenv import dotenv_values
import json
import math
import numpy as np
import pandas as pd
from concurrent.futures import ThreadPoolExecutor

import functions.prompts as prompts

config = dotenv_values(".env")

client = OpenAI(
    api_key=config['DEEPINFRA_TOKEN'],
    base_url="https://api.deepinfra.com/v1/openai",
)

In [None]:
df = pd.read_csv("paper.csv")
df['rank'] = df['id'].apply(lambda x: x.split("_")[0])
df = df.groupby('rank').sample(n=3, random_state=42).reset_index(drop=True)

In [None]:
def gen_body(text, model="meta-llama/Llama-3.3-70B-Instruct-Turbo"):
    return {
            "model": model,
            "messages":[{
                    "role": "system",
                    "content": f"{prompts.top5()}\nPlease respond in valid JSON format that matches this schema: {str(prompts.Top5Model.model_json_schema())}. **IMPORTANT**: ONLY RESPOND WITH A JSON OBJECT CONTAINING SCORES ACCORDING TO THE ABOVE SCHEMA. THE RESPONSE MUST END WITH A CURLY BRACKET. DO NOT ADD ANALYSIS OR EXPLANATION."
                },
                {
                    "role": "user",
                    "content": text
                }, 
                {
                    "role": "assistant",
                    "content": "{"
                }],
        }

In [None]:
def llm_paper(client, i, df, add):
    text = ""
    f = open(f"output/{df.iloc[i]['id']}.txt", "r")
    text = f.read()
    f.close()

    paper = f"PAPER TITLE: {df.iloc[i]['name']}\n\n{add}\n\nPAPER TEXT: {text}"
    
    model = "google/gemma-3-27b-it"
    # model = "meta-llama/Llama-3.3-70B-Instruct-Turbo"
    
    scores = client.chat.completions.create(**gen_body(paper, model=model))
    
    return {
        "scores": [scores],
    }
    
def parse_r(r, id, typ):
    validateModel = prompts.Top5Model if typ == "top5" else prompts.AnalysisModel
    try:
        text = r.choices[0].message.content
        if(text.startswith("```json")):
            text = text.split("```json")[1].split("}")[0].replace("'", '"') + "}"
        else:
            text = "{" + text.split("}")[0].replace("'", '"') + "}"
        if(text.startswith("{{")):
            text = text[1:]
        return {
            "id": id,
            "scores": validateModel.model_validate(json.loads(text)).model_dump()
        }
    except Exception as e:
        print(f"Error {e} - {"{" + r.choices[0].message.content}")
        return {
            "id": id,
            "scores": None
        }

def parse_paper(rs):
    scores = [ parse_r(x, j, "top5") for j, x in enumerate(rs['scores']) ]
    return {
        "scores": scores,
    }

In [None]:
model_name = "gemma"

def update_df(df, i, no, score, typ, m):
    metrics = ['score'] if typ == "top5" else ['originality', 'rigor', 'scope', 'impact', 'written_by_ai']
    validateModel = prompts.Top5Model if typ == "top5" else prompts.AnalysisModel

    for _, metric in enumerate(metrics):
        column_name = f"{model_name}-{m}-{metric}-{int(no)+1}"
        
        if column_name not in df.columns:
            df[column_name] = None

        try:
            o = validateModel.model_validate(score)
            df.loc[i, column_name] = o.__dict__[metric]
        except:
            print(f"ERROR | Can't update the model in in {column_name}, skipping...")

In [None]:
institutions = [
       "Massachusetts Institute of Technology",
       "Harvard University",
       "London School of Economics and Political Science",
       "University of Cape Town",
       "Nanyang Technological University",
       "Chulalongkorn University",
   ]

top_names = [
       "Andrei Shleifer", "Daron Acemoglu", "James J. Heckman",
       "Joseph E. Stiglitz", "John List", "Carmen M. Reinhart",
       "Janet Currie", "Esther Duflo", "Asli Demirguc-Kunt",
       "Marianne Bertrand"
]


random_names = [
       "Bruce S. Green", "Alejandro L. James", "Billie J. Abels",
       "Paul A. Jenkins", "Gary L. Bodie", "Gail J. Doan",
       "Shirley S. Hodgkins", "Pattie K. Reinhardt",
       "Tara R. Weber", "Tabitha J. Cox"
]

In [None]:
t_names = ["top", "ran"]

import concurrent.futures

def evaluate_institution(client, i, df, ind, ins):
    print(f"Evaluating AFFILIATION {i} -> {df.iloc[i]['name']} for {ins}")
    x = llm_paper(client, i, df, f"AFFILIATION: {ins}")
    y = parse_paper(x)

    for j, s in enumerate(y['scores']):
        update_df(df, i, j, s['scores'], "top5", f"ins{ind}")
    
    return ind, ins

def evaluate_author(client, i, df, name_type_ind, t_name, ind2, name):
    print(f"Evaluating {t_name} {i} -> {df.iloc[i]['name']} for {name}")
    x = llm_paper(client, i, df, f"AUTHOR: {name}")
    y = parse_paper(x)

    for j, s in enumerate(y['scores']):
        update_df(df, i, j, s['scores'], "top5", f"{t_name}{ind2}")
    
    return name_type_ind, ind2, name

def evaluate_paper(client, i, df):
    print(f"Evaluating {i} -> {df.iloc[i]['name']}")
    
    with concurrent.futures.ThreadPoolExecutor(max_workers=30) as executor:
        institution_futures = []
        for ind, ins in enumerate(institutions):
            future = executor.submit(evaluate_institution, client, i, df, ind, ins)
            institution_futures.append(future)
        
        author_futures = []
        for ind, names in enumerate([top_names, random_names]):
            t_name = t_names[ind]
            for ind2, name in enumerate(names):
                future = executor.submit(evaluate_author, client, i, df, ind, t_name, ind2, name)
                author_futures.append(future)
        
        for future in concurrent.futures.as_completed(institution_futures):
            try:
                ind, ins = future.result()
                print(f"Completed institution {ins} (index {ind})")
            except Exception as exc:
                print(f"Institution evaluation generated an exception: {exc}")
        
        for future in concurrent.futures.as_completed(author_futures):
            try:
                name_type_ind, ind2, name = future.result()
                t_name = t_names[name_type_ind]
                print(f"Completed author {name} ({t_name}{ind2})")
            except Exception as exc:
                print(f"Author evaluation generated an exception: {exc}")
    
    print(f"Completed all evaluations for {i} -> {df.iloc[i]['name']}")

In [None]:
import math
import os
from concurrent.futures import ThreadPoolExecutor
import warnings
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)


if not os.path.exists(model_name):
    os.makedirs(model_name)

chunk = 50
for i in range(0, len(df), chunk):
    print(f"PROCESSING CHUNK {(i // chunk) + 1} of {math.ceil(len(df) / chunk)}")
    with ThreadPoolExecutor(max_workers=6) as executor:
        results = list(executor.map(
            evaluate_paper, 
            [client] * chunk,
            [j for j in range(i, min(i+chunk, len(df)))],
            [df] * len(df["file"]),
        ))
        
    df.to_csv(f"{model_name}/{(i // chunk) + 1}.csv", index=False)
    
df.to_csv(f"results/bias_l.csv")

In [None]:
df = pd.read_csv(f"results/bias_l.csv")

fallback = df[df.isna().any(axis=1)].index

with ThreadPoolExecutor(max_workers=200) as executor:
        results = list(executor.map(
            evaluate_paper, 
            [client] * len(fallback),
            fallback,
            [df] * len(fallback),
        ))

In [None]:
fallback = df[df.isna().any(axis=1)].index
fallback

In [None]:
df.to_csv(f"results/bias_l.csv")