In [65]:
FILENAME = 'W2901173781.json'
EXPERIMENT_NAME = 'gemini-1.5-flash'

import json
import Levenshtein
import csv
import math

POINTS_PER_AUTHOR_FIELD = 10
POINTS_PER_KEYWORD_FIELD = 10

def evaluate_authors(control_authors, experiment_authors):
    length = len(control_authors)
    
    possible_points = length * 2 * POINTS_PER_AUTHOR_FIELD # Possible points is 10 points per each name part (first/last). 
    
    total = (len(experiment_authors) - length) * 2 * POINTS_PER_AUTHOR_FIELD # account for extra authors found.
    
    # Question: Is the order of authors meaningful? Currently we are ensuring it is.
    for i in range(length):
        if len(experiment_authors) <= i:
            print(f"AUTHOR WAS NOT FOUND IN EXPERIMENT {i}")
        else:
            if experiment_authors[i]['first'] is not None:
                total += POINTS_PER_AUTHOR_FIELD - Levenshtein.distance(control_authors[i]['first'], experiment_authors[i]['first'])
            else:
                total -= POINTS_PER_AUTHOR_FIELD
            if experiment_authors[i]['last'] is not None:
                total += POINTS_PER_AUTHOR_FIELD - Levenshtein.distance(control_authors[i]['last'], experiment_authors[i]['last'])
            else:
                total -= POINTS_PER_AUTHOR_FIELD
    return total / possible_points

def evaluate_string(control_string, experiment_string):
    if (control_string is None and experiment_string) or (control_string and experiment_string is None):
        return 0;

    possible_points = len(control_string)
    return (possible_points - Levenshtein.distance(control_string, experiment_string)) / possible_points
    
def evaluate_keywords(control_keywords, experiment_keywords):
    length = len(control_keywords)

    if length == 0:
      if len(experiment_keywords) == 0:
        return 1.0
      else:
        return 0
    
    possible_points = length * POINTS_PER_KEYWORD_FIELD # Possible points is 10 points per keyword. 
    
    total = (len(experiment_keywords) - length) * POINTS_PER_KEYWORD_FIELD # account for extra keywords found.
    
    # Question: Is the order of keywords meaningful? Currently we are ensuring it is.
    for i in range(length):
        if len(experiment_keywords) <= i:
            print(f"KEYWORD WAS NOT FOUND IN EXPERIMENT {i}")
        else:
            if experiment_keywords[i] is not None:
                total += POINTS_PER_KEYWORD_FIELD - Levenshtein.distance(control_keywords[i], experiment_keywords[i])
            else:
                total -= POINTS_PER_KEYWORD_FIELD

    return total / possible_points

def evaluate_result(filename, debug=False):
    CONTROL = f'control/{filename}'
    EXPERIMENT = f'{EXPERIMENT_NAME}/results/{filename}'

    with open(CONTROL, 'r') as file:
      control = json.load(file)

    with open(EXPERIMENT, 'r') as file:
      experiment = json.load(file)

    author_score = evaluate_authors(control['authors'], experiment['authors'])
    title_score = evaluate_string(control['title'], experiment['title'])
    abstract_score = evaluate_string(control['abstract'], experiment['abstract'])
    keyword_score = evaluate_keywords(control['keywords'], experiment['keywords'])

    if debug:
        print('Author Score:', author_score)
        if author_score != 1.0:
          print("Expected Authors:", control['authors'])
          print("Model Authors:", experiment['authors'])
        
        print('Title Score:', title_score)
        if title_score != 1.0:
          print("Expected Title:", control['title'])
          print("Model Title:", experiment['title'])
        
        print('Abstract Score:', abstract_score)
        if abstract_score != 1.0:
          print("Expected Abstract:", control['abstract'])
          print("Model Abstract:", experiment['abstract'])
        
        print('Keyword Score:', keyword_score)
        if keyword_score != 1.0:
          print("Expected Keywords:", control['keywords'])
          print("Model Keywords:", experiment['keywords'])

    return (author_score + title_score + abstract_score + keyword_score) /4


histogram = { '100': 0, '90':0, '80': 0, '70': 0, '60':0, '50':0 }
count = 0
with open('records.csv', newline='') as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        id = row['OpenAlex ID'].removeprefix('https://openalex.org/')
        print(id)
        score = evaluate_result(f'{id}.json', debug=False)
        print('Total Score:', score)

        histogram[str(math.floor(score * 10)) + '0'] += 1
        count += 1
        print("\n")

print("Total:", count)
for key, value in histogram.items():
    print(key, value, f"({round(value/count, 2)*100}%)")



W2988715931
Total Score: 0.7448812448812449


W3202546816
Total Score: 0.75


W4212832069
Total Score: 1.0


W2901173781
Total Score: 0.9974132684114425


W3013783484
Total Score: 0.75


W4226140866
Total Score: 0.8037461442066705


W4399283731
Total Score: 1.0


W3126527592
Total Score: 1.0


W3093048371
Total Score: 0.5


W4205859241
Total Score: 0.9844097651421508


W4322154575
Total Score: 0.7255590230381171


W4297341531
Total Score: 0.9987632508833922


W4393156923
Total Score: 0.8223949337938975


W3000588783
Total Score: 0.9734127579335319


W3091235616
Total Score: 0.75


W4296103990
Total Score: 0.9998223169864962


W3170856023
Total Score: 1.0


W3164284701
Total Score: 0.71625


W3183339884
Total Score: 0.9994720168954594


W4393028888
Total Score: 0.7401041666666667


W4394813096
Total Score: 0.749766355140187


W4304614191
Total Score: 0.9998302783435167


W4386128506
Total Score: 0.9937366691646972


W4376255784
Total Score: 0.9997217584863661


W3177095392
Total Score: 