## Data Loading

In [2]:
from IPython.display import display
from PIL import Image
from datasets import Dataset
TRAIN_TEXT = "/bohr/train-7ul9/v2"
hint_description = Dataset.load_from_disk(TRAIN_TEXT + "/dataset/hint_descriptions")
hint_description = {
    x['ID']: {'description': x['Description'], 'icons': x['image']}
    for x in hint_description
}

# show example
display(hint_description[7]['icons'])
print(hint_description[7]['description'])

In [3]:
validation_data = Dataset.load_from_disk(TRAIN_TEXT + "/dataset/takehome_validation")
validation_data

## Implement keyword guesser

Internet access is allowed in Bohrium, so contestants could download pre-trained models from huggingface. However, since the servers are hosted on mainland China, they are subject to internet restrictions that blocks access to sources like huggingface. To circumvent this, we can use hosted mirror servers(which we'll also provide at the on-site round). For huggingface, you can set os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com'  to use a huggingface mirror that's accessible from Bohrium's server.

In [4]:
import os
os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com'

In [7]:
from sentence_transformers import SentenceTransformer
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

model = SentenceTransformer('/all-MiniLM-L6-v2')
embeddings = model.encode([
    'hello world',
    'fun and games'
])

print(f"Embedding shape: {embeddings.shape}")

In [45]:
# Save model to the /personal file in Bohrium file
model.save('mymodel1')
!cp mymodel1 /personal

In [115]:


choice_defs = {'sunflower': 'any plant of the genus Helianthus having large flower heads with dark disk florets and showy yellow rays', 'credit card': 'a card (usually plastic) that assures a seller that the person using it has a satisfactory credit rating and that the issuer will see to it that the seller receives payment for the merchandise delivered', 'dinosaur': 'any of numerous extinct terrestrial reptiles of the Mesozoic era', 'key': "metal device shaped in such a way that when it is inserted into the appropriate lock the lock's mechanism can be rotated", 'sundial': 'timepiece that indicates the daylight hours by the shadow that the gnomon casts on a calibrated dial', 'lawyer': 'a professional person authorized to practice law; conducts lawsuits or gives legal advice', 'doorbell': 'a push button at an outer door that gives a ringing or buzzing signal when pushed', 'trash can': 'a bin that holds rubbish until it is collected', 'crab': 'decapod having eyes on short stalks and a broad flattened carapace with a small abdomen folded under the thorax and pincers', 'xylophone': 'a percussion instrument with wooden bars tuned to produce a chromatic scale and with resonators; played with small mallets', 'queen': 'the only fertile female in a colony of social insects such as bees and ants and termites; its function is to lay eggs', 'ambulance': 'a vehicle that takes people to and from hospitals', 'space station': 'a manned artificial satellite in a fixed orbit designed for scientific research', 'wallet': 'a pocket-size case for holding papers and paper money', 'market': 'the world of commercial activity where goods and services are bought and sold', 'orchestra': 'a musical organization consisting of a group of instrumentalists including string players', 'chocolate': 'a beverage made from cocoa powder and milk and sugar; usually drunk hot', 'zipper': 'a fastener for locking together two toothed edges by means of a sliding tab', 'rhinoceros': 'massive powerful herbivorous odd-toed ungulate of southeast Asia and Africa having very thick skin and one or two horns on the snout', 'fashion': 'how something is done or how it happens', 'butterfly': 'diurnal insect typically having a slender body with knobbed antennae and broad colorful wings', 'truck': 'an automotive vehicle suitable for hauling', 'palm tree': 'any plant of the family Palmae having an unbranched trunk crowned by large pinnate or palmate leaves', 'cake': 'a block of solid substance (such as soap or wax)', 'radio': 'medium for communication', 'seal': 'fastener consisting of a resinous composition that is plastic when warm; used for sealing documents and parcels and letters', 'mailbox': 'a private box for delivery of mail', 'magnifying glass': 'light microscope consisting of a single convex lens that is used to produce an enlarged image', 'prison': 'a correctional institution where persons are confined while on trial or for punishment', 'polar bear': 'white bear of Arctic regions', 'mouse': 'any of numerous small rodents typically resembling diminutive rats having pointed snouts and small ears on elongated bodies with slender usually hairless tails', 'alumunium foil': '', 'harmonica': 'a small rectangular free-reed instrument having a row of free reeds set back in air holes and played by blowing into the desired hole', 'shell': 'ammunition consisting of a cylindrical metal casing containing an explosive charge and a projectile; fired from a large gun', 'boxers': 'underpants worn by men', 'tricycle': 'a vehicle with three wheels that is moved by foot pedals', 'peacock': 'European butterfly having reddish-brown wings each marked with a purple eyespot', 'kettle': 'a metal pot for stewing or boiling; usually has a lid', 'mountain': 'a land mass that projects well above its surroundings; higher than a hill', 'harbor': 'a sheltered port where ships can take on or discharge cargo', 'coffee': 'a beverage consisting of an infusion of ground coffee beans', 'fireworks': '(usually plural) a device with an explosive that burns at a low rate and with colored flames; can be used to illuminate areas or send signals etc.', 'pie': 'dish baked in pastry-lined pan often with a pastry top', 'gravity': "(physics) the force of attraction between all masses in the universe; especially the attraction of the earth's mass for bodies near its surface; ; ; --Albert Einstein", 'teacher': 'a person whose occupation is teaching', 'museum': 'a depository for collecting and displaying objects having scientific or historical or artistic value', 'bedroom': 'a room used primarily for sleeping', 'robe': 'any loose flowing garment', 'sunscreen': 'a cream spread on the skin; contains a chemical (as PABA) to filter out ultraviolet light and so protect from sunburn', 'robot': 'a mechanism that can move automatically', 'piano': 'a keyboard instrument that is played by depressing keys that cause hammers to strike tuned strings and produce sounds', 'baker': 'someone who bakes commercially', 'plankton': 'the aggregate of small plant and animal organisms that float or drift in great numbers in fresh or salt water', 'scarf': 'a garment worn around the head or neck or shoulders for warmth or decoration', 'bee': 'any of numerous hairy-bodied insects including social and solitary species', 'mosquito': 'two-winged insect whose female has a long proboscis to pierce the skin and suck the blood of humans and animals', 'accountant': 'someone who maintains and audits business accounts', 'umbrella': 'a lightweight handheld collapsible canopy', 'janitor': 'someone employed to clean and maintain a building', 'thief': 'a criminal who takes property belonging to someone else with the intention of keeping it or selling it', 'parrot': 'usually brightly colored zygodactyl tropical birds with short hooked beaks and the ability to mimic sounds', 'koala': 'sluggish tailless Australian arboreal marsupial with grey furry ears and coat; feeds on eucalyptus leaves and bark', 'refrigerator': 'white goods in which food can be stored at low temperatures', 'drone': 'stingless male bee in a colony of social bees (especially honeybees) whose sole function is to mate with the queen', 'dining room': 'a room used for dining', 'soap': 'a cleansing agent made from the salts of vegetable or animal fats', 'whistle': 'the sound made by something moving rapidly or by steam coming out of a small aperture', 'bicycle': 'a wheeled vehicle that has two wheels and is moved by foot pedals', 'train tracks': '', 'penguin': 'short-legged flightless birds of cold southern especially Antarctic regions having webbed feet and wings modified as flippers', 'octopus': 'tentacles of octopus prepared as food', 'hula hoop': '', 'ice skates': 'skate consisting of a boot with a steel blade fitted to the sole', 'nightmare': 'a situation resembling a terrifying dream', 'diving suit': 'a weighted and hermetically sealed garment supplied with air; worn by underwater divers', 'horseshoe': 'game equipment consisting of an open ring of iron used in playing horseshoes', 'dynamite': 'an explosive containing nitrate sensitized with nitroglycerin absorbed on wood pulp', 'surfboard': 'a narrow buoyant board for riding surf', 'toaster': 'someone who proposes a toast; someone who drinks to the health of success of someone or some venture', 'gloves': 'the handwear used by fielders in playing baseball', 'broom': 'a cleaning implement for sweeping; bundle of straws or twigs attached to a long handle', 'postal worker': '', 'lipstick': 'makeup that is used to color the lips', 'sewing machine': 'a textile machine used as a home appliance for sewing', 'salad': 'food mixtures either arranged on a plate or tossed and served with a moist dressing; usually consisting of or including greens', 'dam': 'a barrier constructed to contain the flow of water or to keep out the sea', 'pool': 'an excavation that is (usually) filled with water', 'fertilizer': 'any substance such as manure or a mixture of nitrates used to make soil more fertile', 'shovel': 'a hand tool for lifting loose material; consists of a curved container or scoop and a handle', 'speaker': 'someone who expresses in language; someone who talks (especially someone who delivers a public speech or someone especially garrulous)', 'seahorse': 'either of two large northern marine mammals having ivory tusks and tough hide over thick blubber', 'submarine': 'a submersible warship usually armed with torpedoes', 'pig': 'domestic swine', 'mango': 'large evergreen tropical tree cultivated for its large oval fruit', 'fire station': 'a station housing fire apparatus and firemen', 'ping-pong': 'a game (trademark Ping-Pong) resembling tennis but played on a table with paddles and a light hollow ball', 'hotel': 'a building where travelers can pay for lodging and meals and other services', 'carpet': 'floor covering consisting of a piece of thick heavy fabric (usually with nap or pile)', 'shoes': 'a particular situation', 'parachute': 'rescue equipment consisting of a device that fills with air and retards your fall'}

In [102]:

def hints_to_sentence(hints: list[int]) -> str:
  sentence = "The following hints at our target word:\n<HINT_PRIMARY>\n"
  for i, hint in enumerate(hints):
    sentence += f"{hint_description[hint]['description']}"
    if i == 0:
      sentence += "\n</HINT_PRIMARY>\n<HINT>\n"
    elif i < len(hints) - 1:
      sentence += "\n</HINT>\n<HINT>\n"
    else:
      sentence += "\n</HINT>"
  return sentence


def choice_to_doc(choice:str)->str:
  definition = choice_defs.get(choice.lower(), "")
  return f"Our target word: {choice}: {definition}"

print(hints_to_sentence([1, 2, 3]))

In [103]:
# Fine-tune the model

from sentence_transformers import SentenceTransformer, InputExample, losses
from sentence_transformers.evaluation import EmbeddingSimilarityEvaluator
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split

import os
os.environ["WANDB_DISABLED"] = "true"


train_examples = []
for val in validation_data:
  train_examples.append(InputExample(texts=[hints_to_sentence(val['hints']), choice_to_doc(val['label'])], label=1))


# Create DataLoader
train_dataloader = DataLoader(train_examples, shuffle=True, batch_size=2)


# Define loss function
train_loss = losses.CosineSimilarityLoss(model)

model.fit(
    train_objectives=[(train_dataloader, train_loss)],
    epochs=1,
    warmup_steps=5,
    output_path='./model',
    optimizer_params={'lr': 1e-6},
    weight_decay=0.01,
    save_best_model=True,
    show_progress_bar=True
)

In [104]:
ft_model_loaded = SentenceTransformer("./model") # Load fine-tuned model

def find_most_similar(query, sentences, model, top_k=10):
    # Encode query and sentences
    query_embedding = model.encode([query])
    sentence_embeddings = model.encode(sentences)

    # Calculate similarities
    similarities = cosine_similarity(query_embedding, sentence_embeddings)[0]

    # Get top-k most similar
    top_indices = np.argsort(similarities)[::-1][:top_k]

    results = []
    for idx in top_indices:
        results.append({
            'sentence': sentences[idx],
            'similarity': similarities[idx]
        })

    return results

def guess_words(hints: list[int], choices: list[str]) -> list[str]:
  query = hints_to_sentence(hints)
  results = find_most_similar(query, choices, ft_model_loaded)
  return [result['sentence'] for result in results]

In [105]:
import math

def score(guesses: list[str], gold: str):
    # Normalize to lowercase
    guesses = [g.lower() for g in guesses[:10]]
    gold = gold.lower()

    result = {
        "hits@10": 0.0,
        "ndcg@10": 0.0,
        "total_score": 0.0
    }

    if gold in guesses:
        rank = guesses.index(gold)
        result["hits@10"] = 1.0
        result["ndcg@10"] = 1.0 / math.log2(rank + 2)  # rank + 2 because index is 0-based
    else:
        result["hits@10"] = 0.0
        result["ndcg@10"] = 0.0

    result["total_score"] = 0.9 * result["hits@10"] + 0.1 * result["ndcg@10"]
    return result

print(score(['cat', 'dog', 'tree', 'flower', 'rock', 'water', 'fried rice', 'airplane', 'cactus', 'tiger'], gold='cactus'))

In [106]:
from tqdm.notebook import tqdm

# score on validation set
guesses = []
total_scores = 0.0
for example in tqdm(validation_data):
    guesses.append(guess_words(example['hints'], example['options']))

    total_scores += score(guesses[-1], example['label'])['total_score']


print(f"Average validation score: {total_scores / len(validation_data)}")

In [107]:
from sentence_transformers import InputExample

def mine_hard_negatives(validation_data, model, top_k=5):
    hard_negatives = []

    for example in tqdm(validation_data):
        hints = example['hints']
        gold = example['label']
        choices = example['options']

        query = hints_to_sentence(hints)
        query_emb = model.encode([query])[0]
        choice_embs = model.encode(choices)

        # Cosine similarities
        sims = cosine_similarity([query_emb], choice_embs)[0]
        sorted_indices = np.argsort(sims)[::-1]  # most to least similar

        # Get top-k most similar *wrong* choices
        for idx in sorted_indices:
            if choices[idx] != gold:
                hard_negatives.append(
                    InputExample(texts=[query, f"Our target word: {choices[idx]}"], label=0)
                )
                if len(hard_negatives) >= top_k:
                    break

    return hard_negatives

In [108]:
def build_training_dataset(validation_data, model):
    train_examples = []

    for val in validation_data:
        hints = val['hints']
        label = val['label']

        query = hints_to_sentence(hints)
        correct = f"Our target word: {label}"

        # Positive
        train_examples.append(InputExample(texts=[query, correct], label=1.0))

    # Now add hard negatives
    hard_negatives = mine_hard_negatives(validation_data, model)
    train_examples.extend(hard_negatives)

    return train_examples

In [109]:
from torch.utils.data import DataLoader
from sentence_transformers import losses

train_examples = build_training_dataset(validation_data, ft_model_loaded)
train_dataloader = DataLoader(train_examples, shuffle=True, batch_size=4)

train_loss = losses.CosineSimilarityLoss(model=ft_model_loaded)

ft_model_loaded.fit(
    train_objectives=[(train_dataloader, train_loss)],
    epochs=2,
    warmup_steps=10,
    output_path='./model_with_hard_negatives',
    show_progress_bar=True
)


In [110]:
ft_model_loaded = SentenceTransformer('./model_with_hard_negatives')

In [111]:
guesses = []
total_scores = 0.0
for example in tqdm(validation_data):
    guesses.append(guess_words(example['hints'], example['options']))
    total_scores += score(guesses[-1], example['label'])['total_score']

print(f"Average validation score: {total_scores / len(validation_data)}")


In [113]:
#Save model to the /personal file in Bohrium file
ft_model_loaded.save('mymodel2')
!cp -r mymodel2 /personal

## Model Submission

In [ ]:
model_code = """
from sentence_transformers import SentenceTransformer
from datasets import Dataset
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np



TRAIN_TEXT = "/bohr/model2-4bs9/v2/mymodel2/"
hint_description = Dataset.load_from_disk(TRAIN_TEXT + "/dataset/hint_descriptions")
hint_description = {
    x['ID']: {'description': x['Description'], 'icons': x['image']}
    for x in hint_description
}

model = SentenceTransformer("/bohr/model2-4bs9/v2/mymodel2/")


choice_defs = {'sunflower': 'any plant of the genus Helianthus having large flower heads with dark disk florets and showy yellow rays', 'credit card': 'a card (usually plastic) that assures a seller that the person using it has a satisfactory credit rating and that the issuer will see to it that the seller receives payment for the merchandise delivered', 'dinosaur': 'any of numerous extinct terrestrial reptiles of the Mesozoic era', 'key': "metal device shaped in such a way that when it is inserted into the appropriate lock the lock's mechanism can be rotated", 'sundial': 'timepiece that indicates the daylight hours by the shadow that the gnomon casts on a calibrated dial', 'lawyer': 'a professional person authorized to practice law; conducts lawsuits or gives legal advice', 'doorbell': 'a push button at an outer door that gives a ringing or buzzing signal when pushed', 'trash can': 'a bin that holds rubbish until it is collected', 'crab': 'decapod having eyes on short stalks and a broad flattened carapace with a small abdomen folded under the thorax and pincers', 'xylophone': 'a percussion instrument with wooden bars tuned to produce a chromatic scale and with resonators; played with small mallets', 'queen': 'the only fertile female in a colony of social insects such as bees and ants and termites; its function is to lay eggs', 'ambulance': 'a vehicle that takes people to and from hospitals', 'space station': 'a manned artificial satellite in a fixed orbit designed for scientific research', 'wallet': 'a pocket-size case for holding papers and paper money', 'market': 'the world of commercial activity where goods and services are bought and sold', 'orchestra': 'a musical organization consisting of a group of instrumentalists including string players', 'chocolate': 'a beverage made from cocoa powder and milk and sugar; usually drunk hot', 'zipper': 'a fastener for locking together two toothed edges by means of a sliding tab', 'rhinoceros': 'massive powerful herbivorous odd-toed ungulate of southeast Asia and Africa having very thick skin and one or two horns on the snout', 'fashion': 'how something is done or how it happens', 'butterfly': 'diurnal insect typically having a slender body with knobbed antennae and broad colorful wings', 'truck': 'an automotive vehicle suitable for hauling', 'palm tree': 'any plant of the family Palmae having an unbranched trunk crowned by large pinnate or palmate leaves', 'cake': 'a block of solid substance (such as soap or wax)', 'radio': 'medium for communication', 'seal': 'fastener consisting of a resinous composition that is plastic when warm; used for sealing documents and parcels and letters', 'mailbox': 'a private box for delivery of mail', 'magnifying glass': 'light microscope consisting of a single convex lens that is used to produce an enlarged image', 'prison': 'a correctional institution where persons are confined while on trial or for punishment', 'polar bear': 'white bear of Arctic regions', 'mouse': 'any of numerous small rodents typically resembling diminutive rats having pointed snouts and small ears on elongated bodies with slender usually hairless tails', 'alumunium foil': '', 'harmonica': 'a small rectangular free-reed instrument having a row of free reeds set back in air holes and played by blowing into the desired hole', 'shell': 'ammunition consisting of a cylindrical metal casing containing an explosive charge and a projectile; fired from a large gun', 'boxers': 'underpants worn by men', 'tricycle': 'a vehicle with three wheels that is moved by foot pedals', 'peacock': 'European butterfly having reddish-brown wings each marked with a purple eyespot', 'kettle': 'a metal pot for stewing or boiling; usually has a lid', 'mountain': 'a land mass that projects well above its surroundings; higher than a hill', 'harbor': 'a sheltered port where ships can take on or discharge cargo', 'coffee': 'a beverage consisting of an infusion of ground coffee beans', 'fireworks': '(usually plural) a device with an explosive that burns at a low rate and with colored flames; can be used to illuminate areas or send signals etc.', 'pie': 'dish baked in pastry-lined pan often with a pastry top', 'gravity': "(physics) the force of attraction between all masses in the universe; especially the attraction of the earth's mass for bodies near its surface; ; ; --Albert Einstein", 'teacher': 'a person whose occupation is teaching', 'museum': 'a depository for collecting and displaying objects having scientific or historical or artistic value', 'bedroom': 'a room used primarily for sleeping', 'robe': 'any loose flowing garment', 'sunscreen': 'a cream spread on the skin; contains a chemical (as PABA) to filter out ultraviolet light and so protect from sunburn', 'robot': 'a mechanism that can move automatically', 'piano': 'a keyboard instrument that is played by depressing keys that cause hammers to strike tuned strings and produce sounds', 'baker': 'someone who bakes commercially', 'plankton': 'the aggregate of small plant and animal organisms that float or drift in great numbers in fresh or salt water', 'scarf': 'a garment worn around the head or neck or shoulders for warmth or decoration', 'bee': 'any of numerous hairy-bodied insects including social and solitary species', 'mosquito': 'two-winged insect whose female has a long proboscis to pierce the skin and suck the blood of humans and animals', 'accountant': 'someone who maintains and audits business accounts', 'umbrella': 'a lightweight handheld collapsible canopy', 'janitor': 'someone employed to clean and maintain a building', 'thief': 'a criminal who takes property belonging to someone else with the intention of keeping it or selling it', 'parrot': 'usually brightly colored zygodactyl tropical birds with short hooked beaks and the ability to mimic sounds', 'koala': 'sluggish tailless Australian arboreal marsupial with grey furry ears and coat; feeds on eucalyptus leaves and bark', 'refrigerator': 'white goods in which food can be stored at low temperatures', 'drone': 'stingless male bee in a colony of social bees (especially honeybees) whose sole function is to mate with the queen', 'dining room': 'a room used for dining', 'soap': 'a cleansing agent made from the salts of vegetable or animal fats', 'whistle': 'the sound made by something moving rapidly or by steam coming out of a small aperture', 'bicycle': 'a wheeled vehicle that has two wheels and is moved by foot pedals', 'train tracks': '', 'penguin': 'short-legged flightless birds of cold southern especially Antarctic regions having webbed feet and wings modified as flippers', 'octopus': 'tentacles of octopus prepared as food', 'hula hoop': '', 'ice skates': 'skate consisting of a boot with a steel blade fitted to the sole', 'nightmare': 'a situation resembling a terrifying dream', 'diving suit': 'a weighted and hermetically sealed garment supplied with air; worn by underwater divers', 'horseshoe': 'game equipment consisting of an open ring of iron used in playing horseshoes', 'dynamite': 'an explosive containing nitrate sensitized with nitroglycerin absorbed on wood pulp', 'surfboard': 'a narrow buoyant board for riding surf', 'toaster': 'someone who proposes a toast; someone who drinks to the health of success of someone or some venture', 'gloves': 'the handwear used by fielders in playing baseball', 'broom': 'a cleaning implement for sweeping; bundle of straws or twigs attached to a long handle', 'postal worker': '', 'lipstick': 'makeup that is used to color the lips', 'sewing machine': 'a textile machine used as a home appliance for sewing', 'salad': 'food mixtures either arranged on a plate or tossed and served with a moist dressing; usually consisting of or including greens', 'dam': 'a barrier constructed to contain the flow of water or to keep out the sea', 'pool': 'an excavation that is (usually) filled with water', 'fertilizer': 'any substance such as manure or a mixture of nitrates used to make soil more fertile', 'shovel': 'a hand tool for lifting loose material; consists of a curved container or scoop and a handle', 'speaker': 'someone who expresses in language; someone who talks (especially someone who delivers a public speech or someone especially garrulous)', 'seahorse': 'either of two large northern marine mammals having ivory tusks and tough hide over thick blubber', 'submarine': 'a submersible warship usually armed with torpedoes', 'pig': 'domestic swine', 'mango': 'large evergreen tropical tree cultivated for its large oval fruit', 'fire station': 'a station housing fire apparatus and firemen', 'ping-pong': 'a game (trademark Ping-Pong) resembling tennis but played on a table with paddles and a light hollow ball', 'hotel': 'a building where travelers can pay for lodging and meals and other services', 'carpet': 'floor covering consisting of a piece of thick heavy fabric (usually with nap or pile)', 'shoes': 'a particular situation', 'parachute': 'rescue equipment consisting of a device that fills with air and retards your fall'}
def hints_to_sentence(hints: list[int]) -> str:
  sentence = "The following hints at our target word:\\n<HINT_PRIMARY>\\n"
  for i, hint in enumerate(hints):
    sentence += f"{hint_description[hint]['description']}"
    if i == 0:
      sentence += "\\n</HINT_PRIMARY>\\n<HINT>\\n"
    elif i < len(hints) - 1:
      sentence += "\\n</HINT>\\n<HINT>\\n"
    else:
      sentence += "\\n</HINT>"
  return sentence

def choice_to_doc(choice:str)->str:
  definition = choice_defs.get(choice.lower(), "")
  return f"Our target word: {choice}: {definition}"

def find_most_similar(query, sentences, model, top_k=10):
    # Encode query and sentences
    query_embedding = model.encode([query])
    sentence_embeddings = model.encode(sentences)

    # Calculate similarities
    similarities = cosine_similarity(query_embedding, sentence_embeddings)[0]

    # Get top-k most similar
    top_indices = np.argsort(similarities)[::-1][:top_k]

    results = []
    for idx in top_indices:
        results.append({
            'sentence': sentences[idx],
            'similarity': similarities[idx]
        })

    return results

def guess_words(hints: list[int], choices: list[str]) -> list[str]:
  query = hints_to_sentence(hints)
  results = find_most_similar(query, choices, model)
  return [result['sentence'] for result in results]
"""

with open("submission_model.py", "w") as f:
  f.write(model_code)

print("Inference code written to submission_model.py")

In [ ]:
import shutil
import os
import tempfile

# Create a temporary directory with your desired structure
with tempfile.TemporaryDirectory() as temp_dir:
    # Copy files to temp directory
    shutil.copy('submission_model.py', temp_dir)
    shutil.copytree('./model', os.path.join(temp_dir, 'model'))
    
    # Create the zip
    shutil.make_archive('submission', 'zip', temp_dir)