In [1]:
!pip install openai
import openai
import numpy as np
import pandas as pd
import os
import sys
from sentence_transformers import SentenceTransformer, util
import torch
import time
import json



  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# ishan device filepath
file_path = "/Users/ishan//Desktop/cs224n/02.json"

if os.path.exists(file_path) and os.path.getsize(file_path) > 0:
    try:

        with open(file_path, 'r') as file:
            data = json.load(file)
        print(data)
    except json.JSONDecodeError as e:
        print(f"Error decoding JSON: {e}")
    except Exception as e:
        print(f"An error occurred: {e}")
else:
    print("File does not exist or is empty.")

{'acrossmap': None, 'admin': False, 'answers': {'across': ['HERB', 'CROW', 'HAVE', 'AMOR', 'HOPE', 'TOLET', 'LIBERATED', 'INLET', 'ELEVATOR', 'LOIRE', 'RYDER', 'ASTERN', 'EASTER', 'IVAN', 'ASP', 'FLEECE', 'NANA', 'DELAYED', 'UNAGING', 'IRAN', 'GUARDS', 'NAY', 'TEST', 'ACCESS', 'TENREC', 'ETAPE', 'STELE', 'REASONER', 'TOROS', 'TELLSOVER', 'ADEPT', 'ETAL', 'LIVE', 'RODE', 'DENY', 'SLED'], 'down': ['HALER', 'EMILY', 'ROBED', 'BREVE', 'CHAT', 'ROTO', 'OPERATE', 'WED', 'HONORING', 'ALLINVAIN', 'VEER', 'ETTE', 'TILE', 'RAREFY', 'SECURE', 'TRENDS', 'ALEGAR', 'SEDUCE', 'ANNA', 'NAGY', 'ADIT', 'SERE', 'PLASTERED', 'ANTELOPE', 'ASSESS', 'ACCRETE', 'NEST', 'TOOLS', 'ANVIL', 'PEEVE', 'ERRED', 'STAR', 'TODO', 'ELAN', 'ALLY', 'TED']}, 'author': 'Martha J. DeWitt', 'autowrap': None, 'bbars': None, 'circles': None, 'clues': {'across': ['1. Fennel or sweet cicely', '5. Eat ___ (suffer humiliation)', '9. "To ___ and to Hold," Johnston novel', '13. Cupid', '14. Lange from Conn.', '15. House sign', '16. W

In [3]:
class Crossword:
    def __init__(self, data):
        self.data = data
        self.across_clues = {}
        self.down_clues = {}
        self.clue_to_positions = {}
        self.solution_dict = {}
        self.clue_grid = None
        self.neighbors = {}

    def initialize_solution_map(self):
        # first do across
        clues = self.data['clues']['across']
        answers = self.data['answers']['across']
        for i, clue in enumerate(clues):
            period_idx = clue.find('.')
            num, rest = clue[:period_idx], clue[period_idx+1:]
            self.solution_dict[f"{num}A"] = answers[i]

        # now do down
        clues = self.data['clues']['down']
        answers = self.data['answers']['down']
        for i, clue in enumerate(clues):
            period_idx = clue.find('.')
            num, rest = clue[:period_idx], clue[period_idx+1:]
            self.solution_dict[f"{num}D"] = answers[i]


    def initialize_clues(self):
        """
        Take in dictionary representing crossword and fill in dictionaries that hole clue codes (i.e. 1a/3d/18a/etc) 
        and map to corresppnding clue.
        """
        for clue in self.data['clues']['across']:
            period_idx = clue.find('.')
            num, rest = clue[:period_idx], clue[period_idx+1:]
            self.across_clues[f"{num}A"] = rest

        for clue in self.data['clues']['down']:
            period_idx = clue.find('.')
            num, rest = clue[:period_idx], clue[period_idx+1:]
            self.down_clues[f"{num}D"] = rest

    def initialize_clue_positions_mapping(self):
        """
        Take clue dictionary from self.across_clues and self.down_clues in the form {'1A': clue, etc ...}, 
        build a dictionary that maps clue ID to coordinates in grid
        """
        # first do across
        for clue in self.across_clues:
            num = int(clue[:-1])
            answer_len = len(self.solution_dict[clue])
            start = list(self.data['gridnums']).index(num)
            row, col = start // 15, start % 15 
            # this is across, so now that we have a start index, add corresponding coord to map
            coords = []
            for i in range(answer_len):
                coords.append((row, col + i))
            self.clue_to_positions[clue] = coords

        # now do down
        for clue in self.down_clues:
            num = int(clue[:-1])
            answer_len = len(self.solution_dict[clue])
            start = list(self.data['gridnums']).index(num)
            row, col = start // 15, start % 15 
            # this is across, so now that we have a start index, add corresponding coord to map
            coords = []
            for i in range(answer_len):
                coords.append((row + i, col))
            self.clue_to_positions[clue] = coords
    

    def initialize_clue_grid(self):
        """
        Represent a grid in the form of each cell being filled into to show what clue it corresponds to.
        For example:
        grid = [[('1A, 1D'), ('1A, 2D')],
                [('2A, 1D'), ('2A, 2D')]]
        """

        grid = [
            [[None, None] for _ in range(15)] for _ in range(15)
        ]
        
        for clue in self.across_clues.keys():
            coords = self.clue_to_positions[clue]
            for (x, y) in coords:
                grid[x][y][0] = clue

        for clue in self.down_clues.keys():
            coords = self.clue_to_positions[clue]
            for (x, y) in coords:
                grid[x][y][1] = clue

        self.clue_grid = grid


    def initialize(self):
        self.initialize_clues()
        self.initialize_solution_map()
        self.initialize_clue_positions_mapping()
        self.initialize_clue_grid()



In [4]:
trial = Crossword(data)
trial.initialize()

In [5]:
solutions = trial.solution_dict
down_answers = {}
across_answers = {}
for item in solutions:
    if item[-1] == 'D':
        down_answers[item] = solutions[item]
    else:
        across_answers[item] = solutions[item]
down_clues = trial.down_clues
across_clues = trial.across_clues

In [6]:
inputs = []

for item in across_clues:
    clue = across_clues[item]
    ans = across_answers[item]
    length = len(ans)
    input_text = str(clue) + ',' + ' ' + str(length) + ','
    inputs.append(input_text)

for item in down_clues:
    clue = down_clues[item]
    ans = down_answers[item]
    length = len(ans)
    input_text = str(clue) + ',' + ' ' + str(length) + ','
    inputs.append(input_text)



print(inputs)

[' Fennel or sweet cicely, 4,', ' Eat ___ (suffer humiliation), 4,', ' "To ___ and to Hold," Johnston novel, 4,', ' Cupid, 4,', ' Lange from Conn., 4,', ' House sign, 5,', ' What NOW wants women to be, 9,', ' Ocean arm, 5,', ' Follower of grain or freight, 8,', " Orleans's river, 5,", ' ___ Cup (golf prize), 5,', " Boatman's backward, 6,", ' March 26, 1978, 6,', ' Pavlov, 4,', ' Relative of a daboia, 3,', ' Defraud, 6,', ' Pram pusher, 4,', ' Put off, 7,', ' Describing eternal youth, 7,', " Pahlavi's country, 4,", ' Cerberus et al., 6,', ' Aye neutralizer, 3,', ' Put to the ___, 4,', ' Passageway, 6,', ' Madagascar mammal, 6,', ' Storehouse of a sort, 5,', ' Inscribed pillar, 5,', ' Newscaster, 8,', ' Bulls, in Barcelona, 5,', ' Repeats a report, 9,', ' Proficient, 5,', ' Abbr. often used on deeds, 4,', ' Kind of wire, 4,', ' Harassed, 4,', ' Abjure, 4,', ' Pung or monoski, 4,', " Item in a Czech's wallet, 5,", ' Girl in "Our Town", 5,', ' Togate, 5,', ' Longest modern musical note, 5,

In [41]:
answers = list(trial.solution_dict.values())

In [8]:
# input text: inputs
# solutions: answers

generated_answers = []
# Logistics and loading in model
!pip install openai==0.28

openai.api_key = 'sk-proj-8oLvnNGJLnlgW4SQOoHwT3BlbkFJ8c24SWE59CoO4sTxlDC7'

with open('/Users/ishan/Desktop/cs224n/fine_tuned_model_name.txt', 'r') as f:
    fine_tuned_model = f.read().strip()





In [11]:
def generate_top_answer(prompt, model, max_tokens=50):
    response = openai.ChatCompletion.create(
        model=model,
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": prompt}
        ],
        max_tokens=max_tokens,
        n=1, 
        stop=None,
        temperature=0.9,
        top_p=0.9
    )
    top_answer = response['choices'][0]['message']['content'].strip()
    return top_answer



In [None]:
generated_answers = []
for i in range(len(inputs)):
    cur_answer = generate_top_answer(inputs[i], fine_tuned_model)
    generated_answers.append(cur_answer)
generated_answers

In [58]:
answers

['HERB',
 'CROW',
 'HAVE',
 'AMOR',
 'HOPE',
 'TOLET',
 'LIBERATED',
 'INLET',
 'ELEVATOR',
 'LOIRE',
 'RYDER',
 'ASTERN',
 'EASTER',
 'IVAN',
 'ASP',
 'FLEECE',
 'NANA',
 'DELAYED',
 'UNAGING',
 'IRAN',
 'GUARDS',
 'NAY',
 'TEST',
 'ACCESS',
 'TENREC',
 'ETAPE',
 'STELE',
 'REASONER',
 'TOROS',
 'TELLSOVER',
 'ADEPT',
 'ETAL',
 'LIVE',
 'RODE',
 'DENY',
 'SLED',
 'HALER',
 'EMILY',
 'ROBED',
 'BREVE',
 'CHAT',
 'ROTO',
 'OPERATE',
 'WED',
 'HONORING',
 'ALLINVAIN',
 'VEER',
 'ETTE',
 'TILE',
 'RAREFY',
 'SECURE',
 'TRENDS',
 'ALEGAR',
 'SEDUCE',
 'ANNA',
 'NAGY',
 'ADIT',
 'SERE',
 'PLASTERED',
 'ANTELOPE',
 'ASSESS',
 'ACCRETE',
 'NEST',
 'TOOLS',
 'ANVIL',
 'PEEVE',
 'ERRED',
 'STAR',
 'TODO',
 'ELAN',
 'ALLY',
 'TED']

In [23]:
count = 0
for i in range(len(answers)):
    if generated_answers[i] == answers[i]:
        count += 1
count
34/72
    

0.4722222222222222

In [24]:
!pip install gensim

from gensim.models import Word2Vec
from gensim.models import KeyedVectors

def load_embedding_model():
    """ Load GloVe Vectors
        Return:
            wv_from_bin: All 400000 embeddings, each length 200
    """
    import gensim.downloader as api
    wv_from_bin = api.load("glove-wiki-gigaword-200")
    print("Loaded vocab size %i" % len(list(wv_from_bin.index_to_key)))
    return wv_from_bin
wv_from_bin = load_embedding_model()

Loaded vocab size 400000


In [26]:
def cos_similarity_incl_null(words, guesses):
    cos_sim = []
    for i in range(len(words)):
        try:
            cos_sim.append(wv_from_bin.distance(words[i].lower(), guesses[i].lower()))
        except:
            cos_sim.append(0)
    return sum(cos_sim) / len(cos_sim)

def cos_similarity_excl_null(words, guesses):
    cos_sim = []
    for i in range(len(words)):
        try:
            cos_sim.append(wv_from_bin.distance(words[i].lower(), guesses[i].lower()))
        except:
            pass
    if len(cos_sim):
        return sum(cos_sim) / len(cos_sim)
    else:
        return 0

In [27]:
print(f"Cosine similarity w/ 0s for null guesses: {cos_similarity_incl_null(answers, generated_answers)}")
print(f"Cosine similarity excluding null guesses: {cos_similarity_excl_null(answers, generated_answers)}")

Cosine similarity w/ 0s for null guesses: 0.2763023280745579
Cosine similarity excluding null guesses: 0.34901346704154684


In [44]:
print(answers, generated_answers)

['HERB', 'CROW', 'HAVE', 'AMOR', 'HOPE', 'TOLET', 'LIBERATED', 'INLET', 'ELEVATOR', 'LOIRE', 'RYDER', 'ASTERN', 'EASTER', 'IVAN', 'ASP', 'FLEECE', 'NANA', 'DELAYED', 'UNAGING', 'IRAN', 'GUARDS', 'NAY', 'TEST', 'ACCESS', 'TENREC', 'ETAPE', 'STELE', 'REASONER', 'TOROS', 'TELLSOVER', 'ADEPT', 'ETAL', 'LIVE', 'RODE', 'DENY', 'SLED', 'HALER', 'EMILY', 'ROBED', 'BREVE', 'CHAT', 'ROTO', 'OPERATE', 'WED', 'HONORING', 'ALLINVAIN', 'VEER', 'ETTE', 'TILE', 'RAREFY', 'SECURE', 'TRENDS', 'ALEGAR', 'SEDUCE', 'ANNA', 'NAGY', 'ADIT', 'SERE', 'PLASTERED', 'ANTELOPE', 'ASSESS', 'ACCRETE', 'NEST', 'TOOLS', 'ANVIL', 'PEEVE', 'ERRED', 'STAR', 'TODO', 'ELAN', 'ALLY', 'TED'] ['HERB', 'CROW', 'HAVE', 'AMOR', 'DANA', 'SCORP', 'ASSERTIVE', 'VIELA', 'ELEVATOR', 'TECOC', 'RYDER', 'ASTERN', 'EASTER', 'IVAN', 'ASP', 'GYPSEE', 'YANK', 'DEFERRED', 'AGELESS', 'IRAN', 'HELLHOUNDS', 'NAK', 'SWORD', 'CORRID', 'LORIS', 'LIVER', 'STELE', 'ANCHORER', 'TOROS', 'ECHOESIT', 'ADEPT', 'FOOT', 'BARB', 'RIDOF', 'FOGO', 'WATR', 'KR

In [38]:
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

def cosine_similarity_between_lists(answers, generated_answers):
    vectorizer = TfidfVectorizer().fit_transform(answers + generated_answers)
    vectors = vectorizer.toarray()

    answer_vectors = vectors[:len(answers)]
    generated_vectors = vectors[len(answers):]

    similarities = []
    for i in range(len(answers)):
        similarity = cosine_similarity([answer_vectors[i]], [generated_vectors[i]])[0][0]
        similarities.append(similarity)

    return similarities

similarities = cosine_similarity_between_lists(answers, generated_answers)
print(np.sum(np.array(similarities)))


32.0


In [55]:
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

def compute_cosine_similarity(word1, word2, vectorizer):
    vectors = vectorizer.transform([word1, word2])
    return cosine_similarity(vectors)[0, 1]

def overall_cosine_similarity(answers, generated_answers):
    vectorizer = CountVectorizer(analyzer='char', ngram_range=(1, 3)).fit(answers + generated_answers)
    
    similarities = []
    for word1, word2 in zip(answers, generated_answers):
        if word1 == word2:
            similarities.append(1.0)
        else:
            similarity = compute_cosine_similarity(word1, word2, vectorizer)
            similarities.append(similarity)
    
    return np.mean(similarities)

similarity_score = overall_cosine_similarity(answers, generated_answers)
print(f'Overall Cosine Similarity: {similarity_score:.4f}')


Overall Cosine Similarity: 0.5895


In [47]:
def extract_input_text(file_path):
    input_texts = []
    with open(file_path, 'r') as file:
        lines = file.readlines()
        for line in lines[:2500]:
            parts = line.strip().rsplit(', ', 2)  
            if len(parts) == 3:
                clue, length, _ = parts
                input_text = f"{clue}, {length},"
                input_texts.append(input_text)
    return input_texts

In [48]:
filepath = '/Users/ishan/Desktop/cs224n/gpt_3.5_test.txt'  
input_texts = extract_input_text(filepath)
input_texts

['Half of a stock market index name, 5,',
 'Tarkenton of three Super Bowls, 4,',
 'Former N.H.L. great, 3,',
 'Instruments used by the Beatles, 6,',
 '"I said ___!", 3,',
 'Backbone of a boat, 4,',
 'Linoleum cover, 3,',
 'Submarine, 5,',
 'Bartender?Æs supply, 6,',
 'Sched. uncertainty, 3,',
 'A commitment must be made here, 15,',
 'Kind of note, 4,',
 'Verizon competitor, 6,',
 'See 45-Across, 4,',
 'Gets concrete results?, 5,',
 'Veep from Tennessee, 6,',
 'Familia members, 4,',
 '___ deck, 4,',
 'Having attractive gams, 5,',
 'Condition, 5,',
 'Atlantic City attraction, 4,',
 'Super 8 alternative, 7,',
 'Private line?, 6,',
 'Part of a circle, 3,',
 '"Don\'t Tell ___" ("Cabaret" song), 4,',
 'Winter Olympics vehicle, 4,',
 'Abrogate a peace treaty, maybe, 5,',
 'Retirement nest eggs, 4,',
 'Card holder at a casino, 4,',
 '&#9794; and &#9792;, 5,',
 'Treasure of the Sierra Madre, 3,',
 'Sister of Calliope, 6,',
 'Makes fun of, in a way, 5,',
 '20-, 39- or 53-Across, say, 3,',
 'Hair

In [49]:
testing_inputs = []
import random
if len(input_texts) < 625:
    raise ValueError("The list does not contain enough items.")

random_lines = random.sample(input_texts, 625)

for line in random_lines:
    print(line.strip())
    testing_inputs.append(line)
len(testing_inputs)



Base fig., 3,
1973 War hit "The ___ Kid", 5,
Florida island resort, 7,
Actor Connery, 4,
More, in scores, 3,
TV actress Spelling, 4,
Harem show on HBO, 8,
Supply (with), 3,
N.Y.C.'s ___ Bridge, 3,
Folk song ?ôThe ___ Grey Goose (Is Dead)?ö, 3,
Floor cover that doesn't reach the walls, 7,
Bug collection?, 5,
Brass, 8,
Kid's ball material, 4,
Small glass container, 5,
Clickable address, for short, 3,
Third man in a ring, 3,
Certain outer coating, 7,
Yawn-inducing, 4,
Jell-O maker, 5,
Part of a circle, 3,
Former N.H.L. great, 3,
"Hi" follower, 3,
Cuba or Aruba, 4,
See 26-Across, 5,
"That really hurt!", 3,
Put, 5,
Back-baring top, 6,
"Fiddler" matchmaker, 5,
"The Lord of the Rings" race, 5,
Tournament ranking, 4,
Engine using a stream of compressed air, 6,
Get under the skin of, 3,
Juggling or magic, in a talent show, 3,
Someone who speaks like the quote in 25-Across, 4,
Improviser's asset, 3,
Sporty truck, for short, 3,
Most affected by pathos, 8,
Part of some tables, 4,
Fed, 4,
Big bicep

625

In [None]:
generated_answers = []
for i in range(len(testing_inputs)):
    cur_answer = generate_top_answer(testing_inputs[i], fine_tuned_model)
    generated_answers.append(cur_answer)
generated_answers

In [None]:
len(generated_answers)

In [59]:
# actual call for testing!!

import random
import openai

def read_file(file_path):
    with open(file_path, 'r') as file:
        lines = file.readlines()
    return lines

def extract_clues_answers(lines):
    clues = []
    answers = []
    for line in lines:
        parts = line.rsplit(',', 2)
        if len(parts) == 3:
            clue = parts[0].strip() + ',' + parts[1].strip() + ','
            answer = parts[2].strip()
            clues.append(clue)
            answers.append(answer)
    return clues, answers

def generate_top_answer(prompt, model, max_tokens=50):
    response = openai.ChatCompletion.create(
        model=model,
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": prompt}
        ],
        max_tokens=max_tokens,
        n=1,  # Request only one completion
        stop=None,
        temperature=0.9,
        top_p=0.9
    )
    top_answer = response['choices'][0]['message']['content'].strip()
    return top_answer

def get_generated_solutions(clues, model):
    generated_solutions = []
    for clue in clues:
        generated_solution = generate_top_answer(clue, model)
        generated_solutions.append(generated_solution)
    return generated_solutions

def calculate_accuracy(actual_solutions, generated_solutions):
    correct = 0
    for actual, generated in zip(actual_solutions, generated_solutions):
        if actual.lower() == generated.lower():
            correct += 1
    accuracy = correct / len(actual_solutions)
    return accuracy

# Main function
def main(file_path, model):
    lines = read_file(file_path)
    
    if len(lines) < 625:
        raise ValueError("The file does not contain enough lines.")
        
    random_lines = random.sample(lines, 625)
    clues, actual_solutions = extract_clues_answers(random_lines)
    generated_solutions = get_generated_solutions(clues, model)
    accuracy = calculate_accuracy(actual_solutions, generated_solutions)
    
    print(f'Accuracy: {accuracy:.2%}')
    return actual_solutions, generated_solutions, accuracy

# on ishan's machine
file_path = '/Users/ishan/Desktop/cs224n/gpt_3.5_test.txt'
model = fine_tuned_model

actual_solutions, generated_solutions, accuracy = main(file_path, model)

for actual, generated in zip(actual_solutions, generated_solutions):
    print(f'Actual: {actual}, Generated: {generated}')


Accuracy: 38.08%
Actual: FISHER, Generated: HEDOEST
Actual: MALAPROP, Generated: DALLOWAY
Actual: LINEAGE, Generated: DIAGRAM
Actual: AVA, Generated: VMA
Actual: GREAT, Generated: SEXTY
Actual: SASE, Generated: SASE
Actual: COHN, Generated: COHN
Actual: TRYST, Generated: SALSA
Actual: APERCU, Generated: CAPSUL
Actual: ACING, Generated: ACING
Actual: STERN, Generated: DICED
Actual: AMILE, Generated: ELAND
Actual: PANPIPE, Generated: ARMONICA
Actual: SLICK, Generated: GLIB
Actual: ABOIL, Generated: AFOOT
Actual: REALTIME, Generated: DEADWOOD
Actual: SAMOA, Generated: FJORDS
Actual: OUTRE, Generated: EERIE
Actual: KARL, Generated: ASIA
Actual: ALARUM, Generated: TREADON
Actual: ECUADOR, Generated: ECUADOR
Actual: FIRS, Generated: PINAS
Actual: BLUEMOON, Generated: ONEOFAKIND
Actual: ORE, Generated: ORE
Actual: FREELY, Generated: ATWILL
Actual: PHONY, Generated: FEIGN
Actual: HOPESO, Generated: HOPING
Actual: CEASES, Generated: USESUP
Actual: NOH, Generated: NOH
Actual: HERALD, Generated: 

In [60]:
actual_solutions

['FISHER',
 'MALAPROP',
 'LINEAGE',
 'AVA',
 'GREAT',
 'SASE',
 'COHN',
 'TRYST',
 'APERCU',
 'ACING',
 'STERN',
 'AMILE',
 'PANPIPE',
 'SLICK',
 'ABOIL',
 'REALTIME',
 'SAMOA',
 'OUTRE',
 'KARL',
 'ALARUM',
 'ECUADOR',
 'FIRS',
 'BLUEMOON',
 'ORE',
 'FREELY',
 'PHONY',
 'HOPESO',
 'CEASES',
 'NOH',
 'HERALD',
 'TINO',
 'ATOI',
 'TAME',
 'ORE',
 'TATA',
 'BYALLMEANS',
 'LANG',
 'SHES',
 'PICOTS',
 'KHAN',
 'CLEANERS',
 'DEARSANTA',
 'MONDE',
 'ORALS',
 'HEAROF',
 'AFRAME',
 'FAR',
 'KIN',
 'ELO',
 'PISTIL',
 'CACHE',
 'SBA',
 'POINTOFNORETURN',
 'DRIP',
 'FLIPFLOP',
 'TOQUE',
 'LILLE',
 'AMIDALA',
 'TESTY',
 'STP',
 'SNUB',
 'MALALA',
 'PLOTS',
 'EST',
 'IRS',
 'OLIVE',
 'MOPED',
 'ENTER',
 'OPER',
 'ONVIDEO',
 'LUNK',
 'AIRHEADS',
 'JEW',
 'PRIESTS',
 'PARSE',
 'TEL',
 'HANK',
 'BYLAW',
 'OSSA',
 'STANDS',
 'ARMADA',
 'MAGMA',
 'WIDEN',
 'SNOW',
 'PAN',
 'ROUTE',
 'REAL',
 'MEH',
 'ELSE',
 'CHAD',
 'TREND',
 'EDGE',
 'NAES',
 'EVE',
 'STAT',
 'TOYOTAS',
 'PAKISTAN',
 'DUG',
 'FELINE',

In [61]:
# use this cosine similarity function. The other one wasn't working for some reason

import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

def compute_cosine_similarity(word1, word2, vectorizer):
    vectors = vectorizer.transform([word1, word2])
    return cosine_similarity(vectors)[0, 1]

def overall_cosine_similarity(answers, generated_answers):
    vectorizer = CountVectorizer(analyzer='char', ngram_range=(1, 3)).fit(answers + generated_answers)
    
    similarities = []
    for word1, word2 in zip(answers, generated_answers):
        if word1 == word2:
            similarities.append(1.0)
        else:
            similarity = compute_cosine_similarity(word1, word2, vectorizer)
            similarities.append(similarity)
    
    return np.mean(similarities)

similarity_score = overall_cosine_similarity(actual_solutions, generated_solutions)
print(f'Overall Cosine Similarity: {similarity_score:.4f}')


Overall Cosine Similarity: 0.5138


In [75]:
def accuracy(answers, generated_answers):
    array1 = np.array(answers)
    array2 = np.array(generated_answers)

    matches = np.sum(array1 == array2)

    return matches / len(array1)

In [62]:
accuracy(actual_solutions, generated_solutions)

TypeError: 'float' object is not callable

In [63]:
# calculate by letter accuracy
def letter_accuracy(words, guesses):
    correct_letters, total_letters = 0, 0
    for i in range(len(guesses)):
        word, guess = words[i], guesses[i]
        # null guesses
        if guess == "NULL":
            total_letters += len(word)
        else:
            # correct guess
            if word == guess:
                correct_letters += len(word)
                total_letters += len(word)

            else:
                if len(word) > len(guess):
                    while len(guess) < len(word):
                        guess += '!'
                # Case 2: guess too long, crop to len(word)
                elif len(word) < len(guess):
                    guess = guess[:len(word)]

                # Word, Guess now guaranteed to be same length
                for i in range(len(word)):
                    if word[i] == guess[i]:
                        total_letters += 1
                        correct_letters += 1
                    else:
                        total_letters += 1
    return correct_letters, total_letters

In [64]:
correct, total = letter_accuracy(actual_solutions, generated_solutions)
print(f"Accuracy is {correct/total}")

Accuracy is 0.44318549692457104


In [65]:
set_word_len = set([len(word) for word in actual_solutions])
for length in set_word_len:
    idxs = [i for i in range(len(actual_solutions)) if len(actual_solutions[i]) == length]
    subgroup_words = [actual_solutions[i] for i in idxs]
    subgroup_guesses = [generated_solutions[i] for i in idxs]
    correct, total = letter_accuracy(subgroup_words, subgroup_guesses)
    print(f"Correct {length}-letter prediction accuracy: {correct / total}")

Correct 3-letter prediction accuracy: 0.5765765765765766
Correct 4-letter prediction accuracy: 0.522972972972973
Correct 5-letter prediction accuracy: 0.462111801242236
Correct 6-letter prediction accuracy: 0.4082125603864734
Correct 7-letter prediction accuracy: 0.3860182370820669
Correct 8-letter prediction accuracy: 0.23387096774193547
Correct 9-letter prediction accuracy: 0.2361111111111111
Correct 10-letter prediction accuracy: 0.4125
Correct 11-letter prediction accuracy: 0.09090909090909091
Correct 12-letter prediction accuracy: 1.0
Correct 15-letter prediction accuracy: 0.022222222222222223


In [79]:
!pip install gensim

# importing all necessary modules
from gensim.models import Word2Vec
from gensim.models import KeyedVectors

def load_embedding_model():
    """ Load GloVe Vectors
        Return:
            wv_from_bin: All 400000 embeddings, each length 200
    """
    import gensim.downloader as api
    wv_from_bin = api.load("glove-wiki-gigaword-200")
    print("Loaded vocab size %i" % len(list(wv_from_bin.index_to_key)))
    return wv_from_bin
wv_from_bin = load_embedding_model()

Loaded vocab size 400000


In [53]:
def cos_similarity_incl_null(words, guesses):
    cos_sim = []
    for i in range(len(words)):
        try:
            cos_sim.append(wv_from_bin.distance(words[i].lower(), guesses[i].lower()))
        except:
            cos_sim.append(0)
    return sum(cos_sim) / len(cos_sim)

def cos_similarity_excl_null(words, guesses):
    cos_sim = []
    for i in range(len(words)):
        try:
            cos_sim.append(wv_from_bin.distance(words[i].lower(), guesses[i].lower()))
        except:
            pass
    if len(cos_sim):
        return sum(cos_sim) / len(cos_sim)
    else:
        return 0

In [80]:
print(f"Cosine similarity w/ 0s for null guesses: {cos_similarity_incl_null(actual_solutions, generated_solutions)}")
print(f"Cosine similarity excluding null guesses: {cos_similarity_excl_null(actual_solutions, generated_solutions)}")

Cosine similarity w/ 0s for null guesses: 0.30846804798096417
Cosine similarity excluding null guesses: 0.4293820266995604


In [66]:
for length in set_word_len:
    idxs = [i for i in range(len(answers)) if len(actual_solutions[i]) == length]
    subgroup_words = [actual_solutions[i] for i in idxs]
    subgroup_guesses = [actual_solutions[i] for i in idxs]
    print(f"Cosine similarity {length}-letter w/ 0s for null guesses: {cos_similarity_incl_null(subgroup_words, subgroup_guesses)}")
    print(f"Cosine similarity {length}-letter excluding null guesses: {cos_similarity_excl_null(subgroup_words, subgroup_guesses)}")
    print()

Cosine similarity 3-letter w/ 0s for null guesses: 5.418604070490057e-09
Cosine similarity 3-letter excluding null guesses: 5.418604070490057e-09

Cosine similarity 4-letter w/ 0s for null guesses: 3.973642985026042e-09
Cosine similarity 4-letter excluding null guesses: 4.257474626813616e-09

Cosine similarity 5-letter w/ 0s for null guesses: 5.960464477539063e-09
Cosine similarity 5-letter excluding null guesses: 7.012311150045956e-09

Cosine similarity 6-letter w/ 0s for null guesses: 4.967053731282552e-09
Cosine similarity 6-letter excluding null guesses: 8.514949253627232e-09

Cosine similarity 7-letter w/ 0s for null guesses: 1.1920928955078126e-08
Cosine similarity 7-letter excluding null guesses: 1.9868214925130207e-08

Cosine similarity 8-letter w/ 0s for null guesses: 0.0
Cosine similarity 8-letter excluding null guesses: 0.0

Cosine similarity 9-letter w/ 0s for null guesses: 0.0
Cosine similarity 9-letter excluding null guesses: 0

Cosine similarity 10-letter w/ 0s for null 

ZeroDivisionError: division by zero

In [67]:
def is_correct_length(actual_answers, generated_answers):
    acc_array = np.array(actual_answers)
    gen_array = np.array(generated_answers)
    matches = np.sum([len(acc_array[i]) == len(gen_array[i]) for i in range(len(acc_array))])
    return matches / len(acc_array)

is_correct_length(actual_solutions, generated_solutions)

0.8368