# Current working version

In [20]:
import sys, random, uuid, os, string
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from collections import defaultdict
from tqdm import tqdm
import torch
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
import nltk
nltk.download('wordnet')
nltk.download('names')
nltk.download('gazetteers')
from nltk.corpus import wordnet, names, gazetteers

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package names to /root/nltk_data...
[nltk_data]   Package names is already up-to-date!
[nltk_data] Downloading package gazetteers to /root/nltk_data...
[nltk_data]   Package gazetteers is already up-to-date!


In [2]:
def load_model(model_id):
    model = AutoModelForCausalLM.from_pretrained(model_id, dtype=torch.bfloat16, device_map="auto")
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    return model, tokenizer

def get_single_tokens(tokenizer, arr):
  return [a for a in arr if len( tokenizer(a, add_special_tokens=False)["input_ids"] ) == 1 ]

def get_single_token_nouns(tokenizer):
    """get list of nouns from wordnet that are single tokens in the given model"""
    nouns = [lemma.name() for syn in wordnet.all_synsets("n") for lemma in syn.lemmas()]
    nouns = [n for n in nouns if n.isalpha()]
    return get_single_tokens(tokenizer, nouns)

def get_single_token_names(tokenizer):
    """get list of names from nltk names corpus that are single tokens"""
    all_names = names.words('male.txt') + names.words('female.txt')
    return list(set(get_single_tokens(tokenizer, all_names)))


fictional_sci_fi_cities = ["Arrakeen","Coruscant","Theed","Omelas",
                           "Trantor","Giedi Prime","Ankh-Morpork",
                           "Minas Tirith","Caprica City","Terminus"]
real_countries = ["Japan","Brazil","Germany","Canada","Kenya",
                  "Australia","India","Norway","Mexico","South Africa"]

names = ["Ava","Liam","Noah","Emma","Maya",
         "Ethan","Sofia","Lucas","Isabella","Arjun"]

def generate_prompt():
    target_city, distractor_city = np.random.choice(fictional_sci_fi_cities, size=2, replace=False)
    target_country, distractor_country = np.random.choice(real_countries, size=2, replace=False)

    target_name, distractor_name = np.random.choice(names, size=2, replace=False)
    name_city_pairs = [(target_name, target_city), (distractor_name, distractor_city)]
    city_country_pairs = [(target_city, target_country), (distractor_city, distractor_country)]

    np.random.shuffle(name_city_pairs)
    np.random.shuffle(city_country_pairs)

    sentence1 = f"{name_city_pairs[0][0]} is from {name_city_pairs[0][1]}."
    sentence2 = f"{name_city_pairs[1][0]} is from {name_city_pairs[1][1]}."
    sentence3 = f"{city_country_pairs[0][0]} is in {city_country_pairs[0][1]}."
    sentence4 = f"{city_country_pairs[1][0]} is in {city_country_pairs[1][1]}."

    prompt = f"{sentence1} {sentence2} {sentence3} {sentence4} Therefore, {target_name} is from"

    return prompt, target_country, distractor_country, target_city, distractor_city

In [21]:
def x_before_y(text: str, x: str, y: str) -> bool:
    """does string x appear before y?"""
    x_pos = text.find(x)
    y_pos = text.find(y)
    if x_pos == -1:
        return False
    if y_pos == -1:
        return True
    return x_pos < y_pos

def correct_helper(df):
        """Decide if output is correct: is target token first, or does it come before distractor/direct"""
        target_first = np.array([outp.split()[0].translate(str.maketrans('', '', string.punctuation)) == target_country for outp, target_country in zip(df['outp'], df['target_country'])])
        target_before_distractor = np.array([x_before_y(outp, target_country, distractor_country) for outp, target_country, distractor_country in zip(df['outp'], df['target_country'], df['distractor_country'])])
        target_before_direct = np.array([x_before_y(outp, target_country, target_city) for outp, target_country, target_city in zip(df['outp'], df['target_country'], df['target_city'])])
        df['correct'] = target_first | target_before_distractor
        df['direct'] = target_first | target_before_direct
        return df

In [3]:
model, tokenizer = load_model('Qwen/Qwen3-4B')

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/726 [00:00<?, ?B/s]

model.safetensors.index.json: 0.00B [00:00, ?B/s]

Downloading (incomplete total...): 0.00B [00:00, ?B/s]

Fetching 3 files:   0%|          | 0/3 [00:00<?, ?it/s]



Loading weights:   0%|          | 0/398 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json:   0%|          | 0.00/11.4M [00:00<?, ?B/s]

In [56]:
dct = {
    'outp': [],
    'target_country': [],
    'distractor_country': [],
    'target_city': [],
    'distractor_city': []
}
for _ in tqdm(range(100)):
  prompt, target_country, distractor_country, target_city, distractor_city = generate_prompt()
  inputs = tokenizer.encode(prompt, return_tensor='pt')
  outp = model.generate(torch.tensor(inputs).cuda().unsqueeze(0), max_new_tokens=10)
  outp = tokenizer.batch_decode(outp[0][len(inputs):])
  dct['outp'].append(outp)
  dct['target_country'].append(target_country)
  dct['distractor_country'].append(distractor_country)
  dct['target_city'].append(target_city)
  dct['distractor_city'].append(distractor_city)
df = pd.DataFrame(dct)
df['outp'] = [x[0] for x in df['outp']]

df = correct_helper(df)
df.correct.mean()

np.float64(0.69)

# Chris additions


In [53]:
fictional_sci_fi_cities = ["Arrakeen","Coruscant","Theed","Omelas",
                           "Trantor","Giedi Prime","Ankh-Morpork",
                           "Minas Tirith","Caprica City","Terminus"]
real_countries = ["Japan","Brazil","Germany","Canada","Kenya",
                  "Australia","India","Norway","Mexico","South Africa"]

names = ["Ava","Liam","Noah","Emma","Maya",
         "Ethan","Sofia","Lucas","Isabella","Arjun"]


def generate_prompt(n_sets=2, fan_type='none', fan_degree=2, add_ac_pairs=False):

    n_names = n_sets + (int(fan_type == 'in') * fan_degree * n_sets)
    n_cities = n_sets + (int(fan_type == 'out') * fan_degree * n_sets)

    all_names = np.random.choice(names, size=n_names, replace=False)
    all_cities = np.random.choice(fictional_sci_fi_cities, size=n_cities, replace=False)
    all_countries = np.random.choice(real_countries, size=n_sets, replace=False)

    ab_pairs = []
    bc_pairs = []
    ac_map = {}
    ab_map = {}

    if fan_type == 'in': # N names from each city
        for i in range(n_sets):
            city = all_cities[i]
            country = all_countries[i]
            bc_pairs.append((city, country))
            for j in range(fan_degree):
                name = all_names[i * fan_degree + j]
                ab_pairs.append((name, city))
                ac_map[name] = country
                ab_map[name] = city

    elif fan_type == 'out':
        for i in range(n_sets):
            name = all_names[i]
            country = all_countries[i]
            ac_map[name] = country
            current_cities = []
            for j in range(fan_degree):
                city = all_cities[i * fan_degree + j]
                ab_pairs.append((name, city))
                bc_pairs.append((city, country))
                current_cities.append(city)
            ab_map[name] = current_cities # List for fan-out

    else:
        for i in range(n_sets):
            name = all_names[i]
            city = all_cities[i]
            country = all_countries[i]
            ab_pairs.append((name, city))
            bc_pairs.append((city, country))
            ac_map[name] = country
            ab_map[name] = city

    # choose target
    target_name = np.random.choice(list(ac_map.keys()))
    target_country = ac_map[target_name]
    target_city = '' if fan_type == 'out' else ab_map[target_name] # fan out means name is from multiple cities, just ignore

    # if zero or multiple distractors, just ignore; else find the other one
    distractor_country = [c for c in all_countries if c != target_country][0] if n_sets == 2 else ''
    distractor_city = [pair[0] for pair in bc_pairs if pair[1] == distractor_country][0] if n_sets == 2 else ''

    np.random.shuffle(ab_pairs)
    np.random.shuffle(bc_pairs)

    sentences = []
    for n, c in ab_pairs:
        sentences.append(f"{n} is from {c}.")
    for ci, co in bc_pairs:
        sentences.append(f"{ci} is in {co}.")
    if add_ac_pairs:
        for name, country in ac_map.items():
            if name != target_name:
                sentences.append(f"{name} is from {country}.")

    prompt_body = " ".join(sentences)
    prompt = f"{prompt_body} Therefore, {target_name} is from"

    return prompt, target_country, distractor_country, target_city, distractor_city

In [38]:
def testing_helper(**kwargs):
    dct = {
        'outp': [],
        'target_country': [],
        'distractor_country': [],
        'target_city': [],
        'distractor_city': []
    }
    for _ in tqdm(range(100)):
      prompt, target_country, distractor_country, target_city, distractor_city = generate_prompt(**kwargs)
      inputs = tokenizer.encode(prompt, return_tensor='pt')
      outp = model.generate(torch.tensor(inputs).cuda().unsqueeze(0), max_new_tokens=10)
      outp = tokenizer.batch_decode(outp[0][len(inputs):])
      dct['outp'].append(outp)
      dct['target_country'].append(target_country)
      dct['distractor_country'].append(distractor_country)
      dct['target_city'].append(target_city)
      dct['distractor_city'].append(distractor_city)
    df = pd.DataFrame(dct)
    df['outp'] = [x[0] for x in df['outp']]

    return correct_helper(df)

### Try 3 sets

In [24]:
three_sets = testing_helper(n_sets=3)
three_sets.correct.mean()

np.float64(0.81)

### Add distractor ACs into prompt

In [54]:
add_ac = testing_helper(add_ac_pairs=True)
add_ac.correct.mean()

100%|██████████| 100/100 [01:26<00:00,  1.16it/s]


np.float64(0.72)

### Fan in (2 names to 1 city)

In [55]:
fan_in = testing_helper(fan_type='in')
fan_in.correct.mean()

100%|██████████| 100/100 [01:28<00:00,  1.14it/s]


np.float64(0.72)

### Fan out (1 name from 2 cities)

In [57]:
fan_out = testing_helper(fan_type='out')
fan_out.correct.mean()

100%|██████████| 100/100 [01:28<00:00,  1.14it/s]


np.float64(0.66)