In [77]:
import sys
import time

import ollama
import jericho
import re
import json
import random

from sklearn.metrics.pairwise import cosine_similarity

In [78]:
GAMES_DIR = "z-machine-games-master/jericho-game-suite"
game = 'zork1.z5'
env = jericho.FrotzEnv(f"{GAMES_DIR}/{game}")

In [79]:
initial_observation, info = env.reset()
print(initial_observation)

parts = initial_observation.split(' ')
middle = len(parts) // 2
first_half = ' '.join(parts[:middle+3])
second_half = ' '.join(parts[middle+3:])

print('first')
print(first_half)
print()
print('second')
print(second_half)


Copyright (c) 1981, 1982, 1983 Infocom, Inc. All rights reserved.
ZORK is a registered trademark of Infocom, Inc.
Revision 88 / Serial number 840726

West of House
You are standing in an open field west of a white house, with a boarded front door.
There is a small mailbox here.


first
Copyright (c) 1981, 1982, 1983 Infocom, Inc. All rights reserved.
ZORK is a registered trademark of Infocom, Inc.
Revision 88 / Serial number 840726

West of House
You are

second
standing in an open field west of a white house, with a boarded front door.
There is a small mailbox here.




In [80]:
def embedding(text):
    embed_result = ollama.embed(model='nomic-embed-text:latest', input=text)
    # print(type(embed_result.embeddings))
    # print(embed_result.embeddings)
    return embed_result.embeddings

In [81]:
actual = embedding(initial_observation)
actual

[[-0.0014886277,
  0.010068945,
  -0.19945478,
  -0.018023327,
  0.035953607,
  -0.008978469,
  0.0097029405,
  0.058672596,
  0.034784913,
  -0.03094729,
  -0.020580718,
  0.01579939,
  0.069692254,
  -0.005062978,
  0.01126855,
  -0.016463663,
  0.020039767,
  -0.042341523,
  -0.021642724,
  0.02187665,
  0.030637015,
  -0.03917064,
  -0.02898359,
  -0.0057050115,
  0.0804724,
  -0.0043678638,
  0.06926821,
  -0.019397575,
  0.0022302105,
  -0.017359061,
  0.025040606,
  0.02327057,
  -0.04558667,
  0.007455741,
  -0.0145879425,
  -0.046178255,
  0.042500045,
  0.029049922,
  -0.019262258,
  0.027467836,
  0.07635169,
  0.030272393,
  -0.032215085,
  0.011302644,
  0.028227817,
  -0.0006288466,
  -0.026790373,
  0.039220303,
  0.05582576,
  0.003302612,
  0.014946019,
  -0.047161233,
  0.038680255,
  0.005494777,
  -0.030154463,
  -0.018588303,
  -0.012354904,
  -0.0018213715,
  0.019962713,
  0.031198138,
  0.108115435,
  0.1230652,
  0.008889427,
  0.044911023,
  0.045845993,
  -0.

In [82]:
prompt = f'Below is the first half of the very first text prompt given by the text adventure game Zork. Please complete it exactly as it is given by Zork. Do not include anything except your completion.\n\n"{first_half}" (completion here...)'

In [83]:
def test_llama():
    llama_response = ollama.generate(model='llama3.2:3b', prompt=prompt).response
    rebuilt_llama = first_half + ' ' + llama_response
    # print(rebuilt_llama)
    llama_embed = embedding(rebuilt_llama)
    sim = cosine_similarity(llama_embed, actual)
    # print(sim)
    return {'text': rebuilt_llama, 'sim': sim}

In [84]:
def test_qwen():
    qwen_response = ollama.generate(model='qwen3', prompt=prompt, options={'num_ctx': 2048}).response
    rebuilt_qwen = first_half + ' ' + qwen_response
    # print(rebuilt_qwen)
    qwen_embed = embedding(rebuilt_qwen)
    sim = cosine_similarity(qwen_embed, actual)
    # print(sim)
    return {'text': rebuilt_qwen, 'sim': sim}

In [85]:
def test_gpt():
    gpt_response = ollama.generate(model='gpt-oss', prompt=prompt, options={'num_ctx': 2048}).response
    rebuilt_gpt = first_half + ' ' + gpt_response
    # print(rebuilt_gpt)
    gptoss_embed = embedding(rebuilt_gpt)
    sim = cosine_similarity(gptoss_embed, actual)
    # print(sim)
    return {'text': rebuilt_gpt, 'sim': sim}

In [86]:
llama_tests = [test_llama() for _ in range(30)]

In [None]:
mean_llama = sum(x['sim'] for x in llama_tests) / 30
mean_llama

array([[27.97977343]])

In [None]:
qwen_tests = [test_qwen() for _ in range(30)]

In [None]:
mean_qwen = sum(x['sim'] for x in qwen_tests) / 30
mean_qwen

In [None]:
gpt_tests = [test_gpt() for _ in range(30)]

In [None]:
mean_gpt = sum(x['sim'] for x in gpt_tests) / 30
mean_gpt