In [1]:
import re

def extract_clue_and_length(text):
    match = re.search(r"\(([\d,\-\s]+)\)$", text)  # Corrected regex: Escape the hyphen
    if match:
        lengths = match.group(1).replace(" ", "").split(",")  # Remove spaces and split by commas
        clue_text = re.sub(r"\([\d,\-\s]+\)$", "", text).strip()  # Remove the length part from clue
        return clue_text, lengths
    return text, None  # Fallback if no match is found

def format_answer(answer, lengths):
    curr_index = 0
    s = ""
    for l in lengths:
        if "-" not in l:
            s += answer[curr_index:curr_index + int(l)] + " "
            curr_index = curr_index + int(l)
        else:
            splits = l.split("-")
            split_w = []
            for split in splits:
                split_w.append(answer[curr_index:curr_index + int(split)])
                curr_index += int(split)
            s += "-".join(split_w) + " "


    return s.strip()  # Join and return


def letter_accuracy(guesses,answers):
    S = 0
    for i in range(len(answers)):
        score = 0
        guess = guesses[i]
        answer = answers[i]

        try:
            for j in range(len(answer)):
                
                if answer[j] == guess[j]:
                    score += 1
            
            if guess[j+1:]:
                score *= 0
        
        except:

            score = 0

        score /= len(answer)
        S += score
    return S/len(answers)

In [4]:
import pandas as pd
import os
import numpy as np

dir_path = "/root/Cryptic-Crosswords/Crossword Clues/test"
test_sets = []
for file in os.listdir(dir_path):
    test_set_partial = pd.read_csv(os.path.join(dir_path,file))
    test_sets.append(test_set_partial)

# Combine all three DataFrames
df = pd.concat([ts[["Date" ,"Clue", "Answer"]] for ts in test_sets], ignore_index=True)
df["Answer"].fillna("NULL", inplace=True)
df[["Clue", "Length"]] = df["Clue"].apply(lambda x: pd.Series(extract_clue_and_length(x)))
df["Answer"] = df.apply(lambda row: format_answer(row["Answer"], row["Length"]), axis=1)

In [None]:
clues = list(df["Clue"])
answers = list(df["Answer"])
lengths = list(df["Length"])

print("Test Set Size:",len(clues))

Test Set Size: 25906


In [6]:
import json
data = json.load(open('OtherTestSet/naive_random.json'))
test_data = data['test']
clues = [test_data[i]['clue'] for i in range(len(test_data))]
answers = [test_data[i]['soln'] for i in range(len(test_data))]
lengths = [test_data[i]['lengths'] for i in range(len(test_data))]

In [7]:
import transformers

pipeline = transformers.pipeline(
    "text-generation",
    model="microsoft/phi-4",
    model_kwargs={"torch_dtype": "auto"},
    device_map="cuda:3",
)

  from .autonotebook import tqdm as notebook_tqdm
Loading checkpoint shards: 100%|██████████| 6/6 [01:00<00:00, 10.12s/it]
Device set to use cuda:3


In [8]:
def phi(clue,length, fs = 0):

    if fs == 0:
        prompt = f"""Solve the following cryptic crossword clue.

Clue: {clue}  
Answer length: {length} (word lengths separated by commas; hyphen indicates a compound word)  

Return **only** the answer. No explanations or additional text."""

    elif fs == 1:
        prompt = f"""Solve the following cryptic crossword clue based on the examples below.  
Return **only** the answer—no explanations or additional text.

### Examples:

1. **Clue:** Trouser part of Greek attire is raised  
   **Answer length:** [4] (word lengths separated by commas; hyphen indicates a compound word)
   **Explanation:** 'Trouser' (meaning to steal) is the definition, while 'part of Greek attire' hides 'ekat', and 'is raised' (reversal indicator) flips it to form **TAKE**.  
   **Answer:** TAKE  

2. **Clue:** Goes drinking and nearly scores  
   **Answer length:** [4,3,3] (word lengths separated by commas; hyphen indicates a compound word)
   **Explanation:** A double definition where 'goes drinking' refers to **HITS THE BAR** (as in drinking), and 'nearly scores' refers to the football term **HITS THE BAR** (when the ball hits the goalpost instead of scoring).  
   **Answer:** HITS THE BAR  

3. **Clue:** Toilet break after surgery  
   **Answer length:** [4-2] (word lengths separated by commas; hyphen indicates a compound word)
   **Explanation:** 'After surgery' is the definition, while 'PO' (short for 'pot', slang for toilet) + 'STOP' (meaning break) together form **POST-OP**.  
   **Answer:** POST-OP  

4. **Clue:** Health centre for hypochondriacs?  
   **Answer length:** [4-5,6] (word lengths separated by commas; hyphen indicates a compound word)
   **Explanation:** 'Health centre' is the definition, while the clue plays on the irony of a **WELL-WOMAN CLINIC**—a real type of health centre—suggesting a contradiction (why would a 'well' woman need a clinic?).  
   **Answer:** WELL-WOMAN CLINIC  

Now solve the following crossword clue:  
**Clue:** {clue}  
**Answer length:** {length} (word lengths separated by commas; hyphen indicates a compound word)

**Answer:** """


    messages = [
        {"role": "system", "content": "You are a cryptic crossword solver."},
        {"role": "user", "content": prompt},
    ]

    outputs = pipeline(messages, max_new_tokens=20)
    return outputs[0]["generated_text"][-1]['content']

In [22]:
def phi_prompt_generator(clue,length, list_of_answer = [], repeat = 0):
#     prompt = f"""Solve the following cryptic crossword clue.

# Clue: {clue}  
# Answer length: {length} (word lengths separated by commas; hyphen indicates a compound word)  

# Return **only** the answer. No explanations or additional text."""

#     if repeat == 1:
#         prompt += f"\nThe previous answers {list_of_answer} are incorrect because they do not adhere to the length constraints provided. You must stick to the given length constraints at all costs. Please try again."

    prompt = f"""You are a cryptic crossword expert. You are given a clue for a cryptic crossword. Output only the answer. 
clue:
{clue} {length}
output:"""
    return prompt

def phi4(prompt, max_new_tokens=20):
    messages = [
        {"role": "system", "content": "You are a cryptic crossword solver."},
        {"role": "user", "content": prompt},
    ]

    outputs = pipeline(messages, max_new_tokens=max_new_tokens)
    return outputs[0]["generated_text"][-1]['content']
    
def phi4(prompt, max_new_tokens=20, top_k=25):
    messages = [
        {"role": "system", "content": "You are a cryptic crossword solver."},
        {"role": "user", "content": prompt},
    ]

    outputs = pipeline(messages, max_new_tokens=max_new_tokens, num_beams=top_k, num_return_sequences=top_k, early_stopping=True)
    return [output["generated_text"][-1]['content'] for output in outputs[:top_k]]

In [10]:
import re

def follows_pattern(S: str, P: str) -> bool:
    # Convert pattern P to a regex pattern
    regex_pattern = P.replace('x', '\\w')  # 'x' corresponds to any word character
    regex_pattern = '^' + regex_pattern + '$'  # Ensure full string match
    
    return bool(re.match(regex_pattern, S))

def generate_pattern(length_list):
    s = ""
    for l in length_list:
        l = str(l)
        if '-' in l:
            for j in l.split('-'):
                s += 'x' * int(j)
                s += '-'
            s = s[:-1] + ' '
        else:
            s += 'x' * int(l)
            s += ' '
    return s.strip()
    # return ' '.join([''.join(['x' * int(part) for part in group.split('-')]) for group in length_list])

# Example usage
S1, P1 = "on-the-go", "xx-xxx-xx"
print(follows_pattern(S1, P1))  # True

length_list = ['5', '2-5']
print(generate_pattern(length_list))  # 'xxxx-xxxx xx'

True
xxxxx xx-xxxxx


In [None]:
from tqdm import tqdm

count = 0
acc = 0
guesses = []
for idx in tqdm(range(len(clues[:2]))):
    answer = answers[idx]
    prompt = phi_prompt_generator(clues[idx],lengths[idx])
    guess = phi4(prompt, max_new_tokens=3)

    counter = 0

    list_of_guesses = []
    # while len(guess) != len(answer) and counter < 10:
    while follows_pattern(guess, generate_pattern(lengths[idx])) == False and counter < 0:
        list_of_guesses.append(guess)
        new_prompt = phi_prompt_generator(clues[idx],lengths[idx], list_of_guesses, repeat = 1)
        guess = phi4(new_prompt)
        counter += 1
        
    diff = len(guess) - len(answer)
    if abs(diff) < 1:
        count += 1
    if answer.lower() == guess.lower():
        # print(answer)
        # print(guess)
        print()
        acc += 1

    if idx % 100 == 0:
        print(acc/(idx+1), count/(idx+1))
    guesses.append(guess)

acc /= len(guesses)
count /= len(guesses)
print(count*100, acc*100)

In [27]:
guess

['Iodine',
 'Rheum',
 'Iodised',
 'Iodized',
 'Tremor',
 'Rheumatic',
 'Stomach',
 'Rheas',
 'Quaker O',
 'Spasmod',
 'Rheuma',
 'Stiff Neck',
 'Spas Mod',
 'Rheumat',
 'Muscle Rub',
 'Rheu',
 'Trem I',
 'Jellyfish',
 'Tremol',
 'Iodide',
 'Rheost',
 'Arthritic',
 'Tremors',
 'Tetanus',
 'Rheal']