Skip to content
This repository has been archived by the owner on Sep 12, 2023. It is now read-only.

Commit

Permalink
✅ Update tests for v0.3
Browse files Browse the repository at this point in the history
  • Loading branch information
Iacopo Poli committed Jan 19, 2022
1 parent 5552b65 commit f630b0b
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 36 deletions.
31 changes: 17 additions & 14 deletions tests/create.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,25 +7,26 @@
class TestCreateEndpoint(unittest.TestCase):
def test_single_prompt(self):
# check types and single input
output_keys = {'execution_metadata', 'input_text', 'completions'}
output_keys = {'input_text', 'completions'}
creator = lightonmuse.Create("orion-fr")
sentence = "C'est quand même un truc magique, se dit le livreur, que d'avoir toujours"
n_tokens = 16
outputs, cost, rid = creator(sentence, n_tokens=n_tokens, seed=0, return_logprobs=True)
assert isinstance(outputs, list), "`outputs` is not list as expected"
assert len(outputs) == 1, f"`len(outputs) = {len(outputs)}` despite single input."
assert cost == n_tokens, f"`cost={cost}` despite {n_tokens} tokens requested."
assert cost['orion-fr@default']['total_tokens_generated'] == n_tokens, \
f"`cost={cost['orion-fr@default']['total_tokens_generated']}` despite {n_tokens} tokens requested."
assert isinstance(rid, str), f"Detected type {type(rid)} for `rid`, expected `str` instead."
assert output_keys == \
outputs[0].keys(), f"Set of keys is different than expected. Expected {output_keys}" \
f"got {outputs[0].keys()} instead."
assert outputs[0]["input_text"] == sentence, f"`text` field in `outputs` does not match the" \
f" input sentence."
f" input sentence."

# check scores logic
completion = outputs[0]["completions"][0]
score, normalized_score = completion["score"], completion["normalized_score"]
token_scores = [list(element.values())[0] for element in completion["token_scores"]]
score, normalized_score = completion["score"]["logprob"], completion["score"]["normalized_logprob"]
token_scores = [list(element.values())[0] for element in completion["score"]["token_logprobs"]]
n_tokens = len(token_scores)
assert len(token_scores) == n_tokens, f"Generated ({len(token_scores)}) but" \
f"requested {n_tokens} tokens."
Expand All @@ -46,8 +47,9 @@ def test_multiple_prompts(self):
assert isinstance(outputs, list), "`outputs` is not list as expected"
assert len(outputs) == len(sentence_list), f"`len(outputs) = {len(outputs)}` despite " \
f"{len(sentence_list)} prompts."
assert cost == n_tokens*len(sentence_list), f"`cost={cost}` despite {n_tokens} tokens for " \
f"{len(sentence_list)} prompts requested."
assert cost['orion-fr@default']['total_tokens_generated'] == n_tokens*len(sentence_list), \
f"`cost={cost['orion-fr@default']['total_tokens_generated']}` despite {n_tokens} tokens for " \
f"{len(sentence_list)} prompts requested."
assert isinstance(rid, str), f"Detected type {type(rid)} for `rid`, expected `str` instead."
assert outputs[1]["input_text"] == sentence_list[1], f"`text` field in `outputs` does not " \
f"match the input sentence."
Expand All @@ -58,11 +60,11 @@ def test_multiple_outputs(self):
n_tokens, n_completions, n_best = 16, 4, 2
outputs, cost, rid = creator(sentence, n_tokens=n_tokens, seed=0,
n_completions=n_completions, n_best=n_best)
assert len(outputs[0]["completions"]) == n_best, f"Returned " \
f"{len(outputs[0]['completions'])}" \
f"completions instead of {n_best}."
assert cost == n_tokens * n_completions, f"Cost={cost} despite asking for {n_tokens} " \
f"tokens for {n_completions} completions."
assert len(outputs[0]["completions"]) == n_best, f"Returned {len(outputs[0]['completions'])}" \
f"completions instead of {n_best}."
assert cost['orion-fr@default']["total_tokens_generated"] == n_tokens * n_completions, \
f"Cost={cost['orion-fr@default']['total_tokens_generated']} despite asking for {n_tokens} " \
f"tokens for {n_completions} completions."

def test_control(self):
# word bias
Expand Down Expand Up @@ -92,7 +94,8 @@ def test_control(self):
concat_prompt=True)
assert outputs[0]["completions"][0]["output_text"][-2:] == "et", f"Completion does not " \
f"end with stopword"
assert cost < n_tokens, f"Cost is higher than expected given generation ended at stopword."
assert cost["orion-fr@default"]["total_tokens_generated"] < n_tokens, f"Cost is higher than expected given " \
f"that generation ended at stopword."

def test_utilities(self):
# check reproducibility with the seed
Expand All @@ -107,7 +110,7 @@ def test_utilities(self):

# check tokens scores
outputs, _, _ = creator(sentence, n_tokens=n_tokens, seed=0, return_logprobs=False)
token_scores = outputs[0]["completions"][0]["token_scores"]
token_scores = outputs[0]["completions"][0]['score']["token_logprobs"]
assert token_scores is None, f"Token scores is not None, despite not asking for logprobs."

# check concat_prompt
Expand Down
3 changes: 2 additions & 1 deletion tests/fail.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

single_input_endpoints = [lightonmuse.Analyse,
lightonmuse.Create,
lightonmuse.Represent]
lightonmuse.Embed]
double_input_endpoints = [lightonmuse.Compare,
lightonmuse.Select]

Expand Down Expand Up @@ -59,5 +59,6 @@ def test_prompt_too_long(self):
f"did not raise message about input" \
f"too long."


if __name__ == '__main__':
unittest.main()
47 changes: 26 additions & 21 deletions tests/understand.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,21 +7,22 @@
class TestUnderstandEndpoints(unittest.TestCase):
def test_analyse(self):
# check types and single input
output_keys = {'execution_metadata', 'text', 'score', 'normalized_score', 'token_scores'}
output_keys = {'execution_metadata', 'text', 'score'}
analyser = lightonmuse.Analyse("orion-fr")
sentence = "Je voudrais un café et deux croissants, s'il vous plait."
outputs, cost, rid = analyser(sentence)
assert isinstance(outputs, list), "`outputs` is not list as expected"
assert len(outputs) == 1, f"`len(outputs) = {len(outputs)}` despite single input."
assert cost == 1, f"`cost={cost}` despite single Analyse call."
assert cost["orion-fr@default"]["batch_size"] == 1, f"`batch_size={cost['orion-fr@default']['batch_size']}` " \
f"despite single Analyse call."
assert isinstance(rid, str), f"Detected type {type(rid)} for `rid`, expected `str` instead."
assert output_keys == \
outputs[0].keys(), f"Set of keys is different than expected. Expected {output_keys}" \
f"got {outputs[0].keys()} instead."
assert outputs[0]["text"] == sentence, f"`text` field in `outputs` does not match the" \
f" input sentence."
score, normalized_score = outputs[0]["score"], outputs[0]["normalized_score"]
n_tokens = len(outputs[0]["token_scores"])
score, normalized_score = outputs[0]["score"]["logprob"], outputs[0]["score"]["normalized_logprob"]
n_tokens = len(outputs[0]["score"]["token_logprobs"])
assert score <= 0., f"Detected score > 0. This would give probability={math.exp(score)} " \
f"that is greater than 1."
assert math.isclose(normalized_score, score/n_tokens), f"Normalized score isn't close to " \
Expand All @@ -34,31 +35,32 @@ def test_analyse(self):
assert isinstance(outputs, list), "`outputs` is not list as expected"
assert len(outputs) == len(sentence_list), f"`len(outputs) = {len(outputs)}` despite " \
f"len(input)={len(sentence_list)}"
assert cost == len(sentence_list), f"`cost={cost}` despite len(input)={len(sentence_list)}"
assert cost["orion-fr@default"]["batch_size"] == len(sentence_list), \
f"`cost={cost['orion-fr@default']['batch_size']}` despite len(input)={len(sentence_list)}"
assert isinstance(rid, str), f"Detected type {type(rid)} for `rid`, expected `str` instead."

# check correct functioning
message = "The unlikely sentence is more likely than the normal one."
assert outputs[0]["normalized_score"] > outputs[1]["normalized_score"], f"{message}"
token_scores = [list(element.values())[0] for element in outputs[1]["token_scores"]]
assert outputs[0]["score"]["normalized_logprob"] > outputs[1]["score"]["normalized_logprob"], f"{message}"
token_scores = [list(element.values())[0] for element in outputs[1]["score"]["token_logprobs"]]
message = f"The most unlikely token is different than expected."
assert math.isclose(min(token_scores), token_scores[-1]), f"{message}"

def test_represent(self):
def test_embed(self):
# check types and single input
# TODO: update this when `txt`->`text` upstream
output_keys = {'execution_metadata', 'txt', 'embedding'}
representer = lightonmuse.Represent("orion-fr")
output_keys = {'execution_metadata', 'text', 'embedding'}
representer = lightonmuse.Embed("orion-fr")
sentence = "Je voudrais un café et deux croissants, s'il vous plait."
outputs, cost, rid = representer(sentence)
assert isinstance(outputs, list), "`outputs` is not list as expected"
assert len(outputs) == 1, f"`len(outputs) = {len(outputs)}` despite single input."
assert cost == 1, f"`cost={cost}` despite single Represent call."
assert cost['orion-fr@default']['batch_size'] == 1, \
f"`cost={cost['orion-fr@default']['batch_size']}` despite single Represent call."
assert isinstance(rid, str), f"Detected type {type(rid)} for `rid`, expected `str` instead."
assert output_keys == \
outputs[0].keys(), f"Set of keys is different than expected. Expected {output_keys}" \
f"got {outputs[0].keys()} instead."
assert outputs[0]["txt"] == sentence, f"`txt` field in `outputs` does not match the" \
assert outputs[0]["text"] == sentence, f"`text` field in `outputs` does not match the" \
f" input sentence."
embedding = outputs[0]["embedding"]
assert isinstance(embedding, list)
Expand All @@ -71,7 +73,8 @@ def test_represent(self):
assert isinstance(outputs, list), "`outputs` is not list as expected"
assert len(outputs) == len(sentence_list), f"`len(outputs) = {len(outputs)}` despite " \
f"len(input)={len(sentence_list)}"
assert cost == len(sentence_list), f"`cost={cost}` despite len(input)={len(sentence_list)}"
assert cost["orion-fr@default"]["batch_size"] == len(sentence_list), \
f"`batch size={cost['orion-fr@default']['batch_size']}` despite len(input)={len(sentence_list)}"
assert isinstance(rid, str), f"Detected type {type(rid)} for `rid`, expected `str` instead."

first_embedding, second_embedding = outputs[0]["embedding"], outputs[1]["embedding"]
Expand All @@ -89,7 +92,8 @@ def test_select(self):
outputs, cost, rid = selecter(reference, candidates)
assert isinstance(outputs, list), "`outputs` is not list as expected"
assert len(outputs) == 1, f"`len(outputs) = {len(outputs)}` despite single reference."
assert cost == len(candidates), f"`cost={cost}` despite {candidates} candidates."
assert cost["orion-fr@default"]["batch_size"] == len(candidates), \
f"`batch size={cost['orion-fr@default']['batch_size']}` despite {len(candidates)} candidates."
assert isinstance(rid, str), f"Detected type {type(rid)} for `rid`, expected `str` instead."
assert output_keys == \
outputs[0].keys(), f"Set of keys is different than expected. Expected " \
Expand All @@ -103,9 +107,9 @@ def test_select(self):
assert len(rankings) == len(candidates), f"Got {len(rankings)} elements in rankings " \
f"while {len(candidates)} candidates were given."

scores = [element["score"] for element in rankings]
normalized_scores = [element["normalized_score"] for element in rankings]
n_tokens = [len(element["token_scores"]) for element in rankings]
scores = [element["score"]["logprob"] for element in rankings]
normalized_scores = [element["score"]["normalized_logprob"] for element in rankings]
n_tokens = [len(element["score"]["token_logprobs"]) for element in rankings]
message = f"Normalized score isn't close to score divided by number of tokens."
assert all([math.isclose(ns, s/n)]
for s, ns, n in zip(scores, normalized_scores, n_tokens)), message
Expand All @@ -117,21 +121,22 @@ def test_select(self):
conjunction = "est equivalent à"
outputs, cost, rid = selecter(reference, candidates, conjunction=conjunction)
rankings = outputs[0]["rankings"]
normalized_scores = [element["normalized_score"] for element in rankings]
normalized_scores = [element["score"]["normalized_logprob"] for element in rankings]
best_score_with_conj = max(normalized_scores)
assert best_score_with_conj > best_score_no_conj, f"Conjunction `{conjunction}` does not" \
f"improve the score."

def test_compare(self):
output_keys = {"reference", "similarities", "execution_metadata"}
output_keys = {"reference", "similarities", "best", "execution_metadata"}
comparer = lightonmuse.Compare("orion-fr")
reference = "Je suis content"
correct, wrong, out_of_context = "Je suis heureux", "Je suis triste", "Hello world adhsh"
candidates = [wrong, correct, out_of_context]
outputs, cost, rid = comparer(reference, candidates)
assert isinstance(outputs, list), "`outputs` is not list as expected"
assert len(outputs) == 1, f"`len(outputs) = {len(outputs)}` despite single reference."
assert cost == len(candidates)+1, f"`cost={cost}` different from {candidates} candidates+1."
assert cost['orion-fr@default']['batch_size'] == len(candidates)+1, \
f"`batch_size={cost['orion-fr@default']['batch_size']}` different from {candidates} candidates+1."
assert isinstance(rid, str), f"Detected type {type(rid)} for `rid`, expected `str` instead."
assert output_keys == \
outputs[0].keys(), f"Set of keys is different than expected. Expected " \
Expand Down

0 comments on commit f630b0b

Please sign in to comment.