In [27]:
import json
import random
from os import listdir
from os.path import isfile, join
from IPython.display import clear_output

In [28]:
class Sample:
    help_text = '\n'.join([
        'Type A, B, and C in order of preference.',
        'For example, if you think B is the best and A is the worst,',
        'type "bac or BAC".',
    ])
  
    def __init__(self, prompt, truth, random, sentiment, rank=None):
        self.prompt = prompt
        self.truth = truth
        self.random = random
        self.sentiment = sentiment
        self.rank = rank
    
    @staticmethod
    def load_file(path):
        '''
        Create a list of Samples from a json file.
        The json should be either a single dict or a list of dicts.
        Each dict must contain the keys "prompt", "truth", "random", and "sentiment".
        '''
        with open(path) as f:
            j = json.load(f)
        if isinstance(j, dict):
            j = [j]
        return [Sample.load_dict(d) for d in j]
  
    @staticmethod
    def load_dict(d):
        return Sample(d['prompt'], d['truth'], d['random'], d['sentiment'], d.get('rank'))
  
    def evaluate(self):
        def print_header(text=None, c='='):
            if not text:
                print(c * 62)
                return
            left = max(30 - len(text) // 2, 10)
            right = max(30 - (len(text) + 1) // 2, 10)
            print(c * left, text, c * right)
            
        clear_output()
        print_header('Prompt')
        print(self.prompt)
        completions = [self.truth, self.random, self.sentiment]
        indices = list('ABC')
        random.shuffle(indices)
        mapping = {i: ci for i, ci in zip(indices, range(3))}
        for i in 'ABC':
            print_header(f'Completion {i}', c='-')
            print(completions[mapping[i]])
    
        while True:
            print_header(c='-')
            res = input(f'Rank samples ([h]elp, [q]uit) >>> ')
            if res.startswith('h'):
                print(self.help_text)
                continue
            if res.startswith('q'):
                return None
            res = res.upper()
            if len(res) != 3:
                print('Invalid rank, try again.')
            else:
                res = [res[0],res[1],res[2]]
                if set(res) != {'A', 'B', 'C'}:
                    print('Invalid rank, try again.')
                else:
                    break

        rank = []
        for i in res:
            ci = mapping[i]
            rank.append(['truth', 'random', 'sentiment'][ci])
        self.rank = rank
  
    def __str__(self):
        j = {
            'prompt': self.prompt,
            'truth': self.truth,
            'random': self.random,
            'sentiment': self.sentiment,
        }
        if self.rank:
            j['rank'] = self.rank
        return json.dumps(j, indent=4)

In [37]:
def evaluate_movie(src, test_count):
    with open(src, "r") as f:
        j = json.load(f)
        for i in range(test_count):
            t_id = j['test'+str(i+1)+'_id']
            t_prompt = j['test'+str(i+1)+'_input'][-200:]
            t_true = j['test'+str(i+1)+'_true'][0:200]
            t_random = "sample_random"
            t_sentiment = "sample_sentiment"
            #t_random = j['test'+str(i+1)+'_random']
            #t_sentiment = j['test'+str(i+1)+'_sentiment']
            s = Sample(t_prompt, t_true, t_random, t_sentiment) 
            s.evaluate()
            numbered_rank = [0,0,0]
            if(s.rank[0] == "rank"):
                numbered_rank[0] = 1
                if(s.rank[1] == "random"):
                    numbered_rank[1] = 2
                    numbered_rank[2] = 3
                else:
                    numbered_rank[1] = 3
                    numbered_rank[2] = 2
            elif(s.rank[0] == "random"):
                numbered_rank[1] = 1
                if(s.rank[1] == "rank"):
                    numbered_rank[0] = 2
                    numbered_rank[2] = 3
                else:
                    numbered_rank[0] = 3
                    numbered_rank[2] = 2
            else:
                numbered_rank[2] = 1
                if(s.rank[1] == "rank"):
                    numbered_rank[0] = 2
                    numbered_rank[1] = 3
                else:
                    numbered_rank[0] = 3
                    numbered_rank[1] = 2
                
            return str(numbered_rank[0]) + "," + str(numbered_rank[1]) + "," + str(numbered_rank[2])

def evaluate_entire_dir(srcpath, destpath):
    evaluations = ""
    dest = join(destpath, "evals.txt")
    test_count = 3
    for f in listdir(srcpath):
        src = join(srcpath, f)
        if isfile(src):
            evaluations += str(f[:-5]) + ',' + evaluate_movie(src, test_count) + '\n'
    with open(dest, "w+") as f:
        f.write(evaluations)

In [38]:
evaluate_entire_dir("./temp_test_cases/", "./evaluations/")

#s = Sample('the prompt', 'the truth', 'the random', 'the sentiment')
#s.evaluate()

#print(Sample.load_dict(json.loads(str(s))))

ave-the-world project, vain or not, is worth investing in, whatever the return. I could try, Mr. Daggett, but you understand only money and the power you think it buys, so why waste my time, indeed? 

------------------------ Completion A ------------------------
sample_sentiment
------------------------ Completion B ------------------------
INT. DRAWING ROOM, EAST WING, WAYNE MANOR - CONTINUOUS 
Close on the dinner tray. We hear a labored step approach. Bruce Wayne appears, leaning on a cane. Gaunt. Grey temples. He lifts the lid of his 
------------------------ Completion C ------------------------
sample_random
--------------------------------------------------------------


Rank samples ([h]elp, [q]uit) >>>  bac
