In [36]:
import numpy as np
import re
import openai
from sklearn.model_selection import train_test_split
import pandas as pd
import unicodedata
import os
from together import Together
import string

In [37]:
# load CSV
df = pd.read_csv('/Users/ohmpatel/Downloads/nytcrosswords.csv', encoding="cp1252")

In [38]:
sample_df = df.sample(n=500)

In [39]:
def find_answer(sentence):
    for word in sentence.split():
        if len(word) >= 3 and word.upper() == word:
            return word
    return "NULL"

In [40]:
answers = []

client = Together(api_key='cbb9cc63e434536b5583155ac26b44bd457098245baf94ca1fc186981f49ad74')

clues = sample_df.Clue.to_list()
words = sample_df.Word.to_list()
count = 0

for i, clue in enumerate(clues):
    length = len(words[i])
    
    response = client.chat.completions.create(
        model="meta-llama/Llama-3-8b-chat-hf",
        messages=[{"role": "user", "content": f"The crossword clue is {clue}. The length of the answer is {length} characters. Write the answer in all caps and with no spaces."}]
    )
    
    answer = response.choices[0].message.content
    stripped = answer.translate(str.maketrans('', '', string.punctuation))
    answers.append((i, clue, words[i], stripped))
    
    count += 1
    if count % 50 == 0:
        print(count)
        print(i, words[i], stripped)

50
49 POETRY The answer is FICTION
100
99 THOU I think I can help you with that

The answer to the crossword clue What you used to be with a length of 4 characters is likely to be I WAS
150
149 RERAN The answer is REPLAYED
200
199 FEDS The answer is FBIAS
250
249 SHOE The answer is SHOES
300
299 IGOR The answer is IVAN
350
349 CHART The answer is SCENE
400
399 TOTALUP The answer is TOTALUP
450
449 EMMA The answer is PATR
500
499 TENDER The answer is WEAKLY


In [41]:
stripped = []
answers_df = pd.DataFrame(answers, columns=['index','Clue','Word','Guess'])
for output in answers_df['Guess']:
    stripped.append(find_answer(output))

In [42]:
answers_df['Cleaned_Guess'] = stripped

In [43]:
words = answers_df['Word'].to_list()
guesses = answers_df['Cleaned_Guess'].to_list()

In [44]:
# calculate accuracy by word
count = 0
for i in range(len(words)):
    word, guess = words[i], guesses[i]
    if word == guess:
        count += 1
print(f"Correct word prediction accuracy: {count / len(words)}")

Correct word prediction accuracy: 0.216


In [45]:
# calculate by letter accuracy
def letter_accuracy(words, guesses):
    correct_letters, total_letters = 0, 0
    for i in range(len(words)):
        word, guess = words[i], guesses[i]
        # null guesses
        if guess == "NULL":
            total_letters += len(word)
        else:
            # correct guess
            if word == guess:
                correct_letters += len(word)
                total_letters += len(word)

            else: 
                # Case 1: guess too short, adding padding
                if len(word) > len(guess):
                    while len(guess) < len(word):
                        guess += '!'
                # Case 2: guess too long, crop to len(word)
                elif len(word) < len(guess):
                    guess = guess[:len(word)]

                # Word, Guess now guaranteed to be same length
                for i in range(len(word)):
                    if word[i] == guess[i]:
                        total_letters += 1
                        correct_letters += 1
                    else: 
                        total_letters += 1 
    return correct_letters, total_letters

In [46]:
correct, total = letter_accuracy(words, guesses)
print(f"Correct letter prediction accuracy: {correct / total}")

Correct letter prediction accuracy: 0.3010752688172043


In [47]:
set_word_len = set([len(word) for word in words])

In [48]:
for length in set_word_len:
    idxs = [i for i in range(len(words)) if len(words[i]) == length]
    subgroup_words = [words[i] for i in idxs]
    subgroup_guesses = [guesses[i] for i in idxs]
    correct, total = letter_accuracy(subgroup_words, subgroup_guesses)
    print(f"Correct {length}-letter prediction accuracy: {correct / total}")

Correct 3-letter prediction accuracy: 0.2962962962962963
Correct 4-letter prediction accuracy: 0.4119718309859155
Correct 5-letter prediction accuracy: 0.2894308943089431
Correct 6-letter prediction accuracy: 0.3009950248756219
Correct 7-letter prediction accuracy: 0.29064039408866993
Correct 8-letter prediction accuracy: 0.1590909090909091
Correct 9-letter prediction accuracy: 0.06172839506172839
Correct 10-letter prediction accuracy: 0.1
Correct 11-letter prediction accuracy: 0.20454545454545456
Correct 13-letter prediction accuracy: 0.0
Correct 15-letter prediction accuracy: 0.16666666666666666


In [10]:
# Python program to generate word vectors using Word2Vec

# importing all necessary modules
from gensim.models import Word2Vec
from gensim.models import KeyedVectors

In [11]:
def load_embedding_model():
    """ Load GloVe Vectors
        Return:
            wv_from_bin: All 400000 embeddings, each length 200
    """
    import gensim.downloader as api
    wv_from_bin = api.load("glove-wiki-gigaword-200")
    print("Loaded vocab size %i" % len(list(wv_from_bin.index_to_key)))
    return wv_from_bin
wv_from_bin = load_embedding_model()

Loaded vocab size 400000


In [12]:
wv_from_bin.distance('page', 'talk')

0.6915422677993774

In [13]:
def cos_similarity_incl_null(words, guesses):
    cos_sim = []
    for i in range(len(words)):
        try: 
            cos_sim.append(wv_from_bin.distance(words[i].lower(), guesses[i].lower()))
        except:
            cos_sim.append(0)
    return sum(cos_sim) / len(cos_sim)

def cos_similarity_excl_null(words, guesses):
    cos_sim = []
    for i in range(len(words)):
        try: 
            cos_sim.append(wv_from_bin.distance(words[i].lower(), guesses[i].lower()))
        except:
            pass
    if len(cos_sim):
        return sum(cos_sim) / len(cos_sim)
    else:
        return 0

In [160]:
print(f"Cosine similarity w/ 0s for null guesses: {cos_similarity_incl_null(words, guesses)}")
print(f"Cosine similarity excluding null guesses: {cos_similarity_excl_null(words, guesses)}")

Cosine similarity w/ 0s for null guesses: 0.45806048088092355
Cosine similarity excluding null guesses: 0.6292039572540159


In [161]:
for length in set_word_len:
    idxs = [i for i in range(len(words)) if len(words[i]) == length]
    subgroup_words = [words[i] for i in idxs]
    subgroup_guesses = [guesses[i] for i in idxs]
    print(f"Cosine similarity {length}-letter w/ 0s for null guesses: {cos_similarity_incl_null(subgroup_words, subgroup_guesses)}")
    print(f"Cosine similarity {length}-letter excluding null guesses: {cos_similarity_excl_null(subgroup_words, subgroup_guesses)}")
    print()

Cosine similarity 3-letter w/ 0s for null guesses: 0.6275630133911141
Cosine similarity 3-letter excluding null guesses: 0.6447746572182965

Cosine similarity 4-letter w/ 0s for null guesses: 0.5327656174121194
Cosine similarity 4-letter excluding null guesses: 0.6279902886290775

Cosine similarity 5-letter w/ 0s for null guesses: 0.41969531750117234
Cosine similarity 5-letter excluding null guesses: 0.6295429762517585

Cosine similarity 6-letter w/ 0s for null guesses: 0.3633225873463612
Cosine similarity 6-letter excluding null guesses: 0.6205262011608643

Cosine similarity 7-letter w/ 0s for null guesses: 0.2829566756995876
Cosine similarity 7-letter excluding null guesses: 0.5995986699348405

Cosine similarity 8-letter w/ 0s for null guesses: 0.19600043441639858
Cosine similarity 8-letter excluding null guesses: 0.5451262082206085

Cosine similarity 9-letter w/ 0s for null guesses: 0.07995470653514604
Cosine similarity 9-letter excluding null guesses: 0.7395810354501009

Cosine sim

In [172]:
def is_correct_length(actual_answers, generated_answers):
    acc_array = np.array(actual_answers)
    gen_array = np.array(generated_answers)
    matches = np.sum([len(acc_array[i]) == len(gen_array[i]) for i in range(len(acc_array))])
    return matches / len(acc_array)

is_correct_length(words, guesses)

0.5392

In [181]:
client = Together(api_key='cbb9cc63e434536b5583155ac26b44bd457098245baf94ca1fc186981f49ad74')

clues = sample_df.Clue.to_list()
words = sample_df.Word.to_list()
count = 0


clue = clues[0]
length = len(words[0])

partial = "NERVE"

response = client.chat.completions.create(
    model="meta-llama/Llama-3-8b-chat-hf",
    messages=[{"role": "user", "content": f"The crossword clue is {clue}. The length of the answer is {length} characters, and some characters are filled in: {partial}. Write the answer in all caps and with no spaces."}]
)

resp = response.choices[0].message.content
stripped = resp.translate(str.maketrans('', '', string.punctuation))
print(stripped)


The answer is NERVES


In [177]:
clues[0]

'"You have some ___!"'

In [167]:
len(words)

2500

In [168]:
len(guesses)

2500

In [14]:
ANSWERS = ['HERB', 'CROW', 'HAVE', 'AMOR', 'HOPE', 'TOLET', 'LIBERATED', 'INLET', 'ELEVATOR', 'LOIRE', 'RYDER', 'ASTERN', 'EASTER', 'IVAN', 'ASP', 'FLEECE', 'NANA', 'DELAYED', 'UNAGING', 'IRAN', 'GUARDS', 'NAY', 'TEST', 'ACCESS', 'TENREC', 'ETAPE', 'STELE', 'REASONER', 'TOROS', 'TELLSOVER', 'ADEPT', 'ETAL', 'LIVE', 'RODE', 'DENY', 'SLED', 'HALER', 'EMILY', 'ROBED', 'BREVE', 'CHAT', 'ROTO', 'OPERATE', 'WED', 'HONORING', 'ALLINVAIN', 'VEER', 'ETTE', 'TILE', 'RAREFY', 'SECURE', 'TRENDS', 'ALEGAR', 'SEDUCE', 'ANNA', 'NAGY', 'ADIT', 'SERE', 'PLASTERED', 'ANTELOPE', 'ASSESS', 'ACCRETE', 'NEST', 'TOOLS', 'ANVIL', 'PEEVE', 'ERRED', 'STAR', 'TODO', 'ELAN', 'ALLY', 'TED']

In [15]:
GENERATED_ANSWERS = ['HERB', 'CROW', 'HAVE', 'AMOR', 'DANA', 'SCORP', 'ASSERTIVE', 'VIELA', 'ELEVATOR', 'TECOC', 'RYDER', 'ASTERN', 'EASTER', 'IVAN', 'ASP', 'GYPSEE', 'YANK', 'DEFERRED', 'AGELESS', 'IRAN', 'HELLHOUNDS', 'NAK', 'SWORD', 'CORRID', 'LORIS', 'LIVER', 'STELE', 'ANCHORER', 'TOROS', 'ECHOESIT', 'ADEPT', 'FOOT', 'BARB', 'RIDOF', 'FOGO', 'WATR', 'KRONE', 'EMILY', 'ROBED', 'SEMIB', 'CHAT', 'GYRA', 'OPERATE', 'TIE', 'INHONOR', 'ENGLISH', 'VEER', 'ETTE', 'TILE', 'RAREFY', 'SHOWER', 'EBBSAND', 'TARROR', 'ENTICE', 'ANNA', 'NAGY', 'ADIT', 'SERE', 'PLOUGHBOY', 'GOLFBALL', 'ASSIGN', 'COALESCE', 'NEST', 'TOOLS', 'OREIL', 'VEXAT', 'ERRED', 'CLAN', 'TODO', 'ELAN', 'ALLY', 'TED']

In [16]:
def cos_similarity_incl_null(words, guesses):
    cos_sim = []
    for i in range(len(words)):
        try: 
            cos_sim.append(wv_from_bin.distance(words[i].lower(), guesses[i].lower()))
        except:
            cos_sim.append(0)
    return sum(cos_sim) / len(cos_sim)

def cos_similarity_excl_null(words, guesses):
    cos_sim = []
    for i in range(len(words)):
        try: 
            cos_sim.append(wv_from_bin.distance(words[i].lower(), guesses[i].lower()))
        except:
            pass
    if len(cos_sim):
        return sum(cos_sim) / len(cos_sim)
    else:
        return 0

In [17]:
cos_similarity_excl_null(ANSWERS, GENERATED_ANSWERS)

0.32385506093794225

In [20]:
wv_from_bin.distance(ANSWERS[0].lower(), GENERATED_ANSWERS[0].lower())

0.0

In [22]:
wv_from_bin.distance('herb', 'herb')

0.0

In [28]:
wv_from_bin.distance('good', 'bad')

0.2890373468399048

In [29]:
from sentence_transformers import SentenceTransformer, util
import numpy as np

bi_encoder = SentenceTransformer('paraphrase-MiniLM-L6-v2')

def biencoder(clue, answers):
    def encode_texts(bi_encoder, texts):
        return bi_encoder.encode(texts)
    
    def calculate_similarity(clue_embedding, answer_embeddings):
        return util.dot_score(clue_embedding, answer_embeddings)[0].cpu().numpy()
    
    def softmax(x):
        e_x = np.exp(x - np.max(x))
        return e_x / e_x.sum()
    
    clue_embedding = encode_texts(bi_encoder, [clue])[0]
    answer_embeddings = encode_texts(bi_encoder, answers)
    
    similarity_scores = calculate_similarity(clue_embedding, answer_embeddings)
    probabilities = softmax(similarity_scores)
    
    answer_probabilities = {answer: prob for answer, prob in zip(answers, probabilities)}
    
    return answer_probabilities

In [32]:
util.cos_sim(bi_encoder.encode('herb'), bi_encoder.encode('herb'))

tensor([[1.0000]])

In [49]:
def cos_similarity_excl_null(words, guesses):
    cos_sim = []
    for i in range(len(words)):
        try: 
            sim = util.cos_sim(bi_encoder.encode(words[i].lower()), bi_encoder.encode(guesses[i].lower()))
            cos_sim.append(sim)
        except:
            pass
    if len(cos_sim):
        return sum(cos_sim) / len(cos_sim)
    else:
        return 0

In [50]:
cos_similarity_excl_null(words, guesses)

tensor([[0.4095]])