# **Programming Assessment \#5**

Names: ABERIN, Shawn  LIM, Kyle  SINGSON, Raymond

More information on the assessment is found in our Canvas course.

# **Load Pre-trained Embeddings**

*While you don't have to separate your code into blocks, it might be easier if you separated loading / downloading your data from the main part of your solution. Consider placing all loading of data into the code block below.*

In [46]:
!pip install fasttext

Collecting fasttext
  Downloading fasttext-0.9.2.tar.gz (68 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/68.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━[0m [32m61.4/68.8 kB[0m [31m2.1 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m68.8/68.8 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting pybind11>=2.2 (from fasttext)
  Using cached pybind11-2.11.1-py3-none-any.whl (227 kB)
Building wheels for collected packages: fasttext
  Building wheel for fasttext (setup.py) ... [?25l[?25hdone
  Created wheel for fasttext: filename=fasttext-0.9.2-cp310-cp310-linux_x86_64.whl size=4199773 sha256=5f3fd826c87b47b1a31b9a15998cf456d2922d24f8bffcb268ef1de2cc4342f1
  Stored in directory: /root/.cache/pip/wheels/a5/13/75/f811c84a8ab36eedbaef977a6a58a98990e8e0f1967f98f394
Successfully built fa

In [53]:
import numpy as np
import fasttext.util
import fasttext
import urllib.request
import zipfile
import os

class Loaders:
    def __init__(self, downloaded):
        self.loaders = {
            "glove6B": self.load_word_vectors_glove6B,
            "fasttext": self.load_word_vectors_fasttext
        }
        self.srcs = {
            "glove6B": "https://nlp.stanford.edu/data/glove.6B.zip",
            "fasttext": None
        }
        if not downloaded:
            self.download("glove6B")
            self.download("fasttext")

    def load_word_vectors_glove6B(self, glove_file="glove.6B.50d.txt"):
        data = {}
        try:
            with open(glove_file, "r", encoding="utf8") as f:
                lines = (line.casefold().split() for line in f.readlines())
            dat = ((line[0], np.array(line[1:], dtype=np.float32)) for line in lines if line[0].isalnum())
            data.update(dat)
            wordslist = list(data.keys())
            vectors = data
            return wordslist, vectors
        except Exception as e:
            print(f"Error loading GloVe model: {e}")
            return None, None

    def load_word_vectors_fasttext(self, fasttext_file="cc.en.300.bin"):
        try:
            ft = fasttext.load_model(fasttext_file)
            wordslist = ft.get_words()
            vectors = {word: ft.get_word_vector(word) for word in wordslist}
            return wordslist, vectors
        except Exception as e:
            print(f"Error loading FastText model: {e}")
            return None, None

    def load(self, type="glove6B", fileloc="glove.6B.50d"):
        try:
            return self.loaders[type](fileloc)
        except Exception as e:
            print(f"Error loading {type} model: {e}")
            return None, None

    def download(self, type):
        url = self.srcs.get(type)
        if url:
            zip_file_name = f"{type}.zip"
            urllib.request.urlretrieve(url, zip_file_name)
            with zipfile.ZipFile(zip_file_name, 'r') as zip_ref:
                zip_ref.extractall(".")
            os.remove(zip_file_name)
            print(f"{type} downloaded and extracted successfully.")
        else:
            print(f"Download URL for {type} not found.")


# **Your Implementation**

*Again, you don't have to have everything in one block. Use the notebook according to your preferences with the goal of fulfilling the assessment in mind.*

In [56]:
import numpy as np
import random

class Semantle:
    def __init__(self, downloaded, vector_type="glove6B", fileloc="glove.6B.50d.txt"):
        self.SEN = -2
        self.prev = self.SEN
        self.loader = Loaders(downloaded=downloaded)
        self.word_bank, self.vec = self.loader.load(vector_type, fileloc)
        if self.word_bank is None:
            print("Error reading vector file. Exiting.")
            return
        self.maketarget()

    def normalize(self, vec):
        den = np.linalg.norm(vec)
        return vec / den if den != 0 else vec

    def maketarget(self):
        self.w = choose_word(self.word_bank)
        vec = self.vec[self.w]
        self.vec_w = self.normalize(vec)
        print(f"Randomly selected word: {self.w}")

    def init_temp(self, s):
        if 0.33 <= s <= 1:
            return "synonym"
        elif -1 <= s <= -0.33:
            return "antonym"
        return "unrelated"

    def check(self, guess):
        if guess == self.w:
            print(f"{self.w} is correct")
            return True

        the_temp = lambda s: "colder" if s < self.prev else "warmer"
        try:
            vec_guess = self.normalize(self.vec[guess])
            sim = np.dot(vec_guess, self.vec_w)
            temp = the_temp(sim) if self.prev != self.SEN else self.init_temp(sim)
            if sim > self.prev:
                self.prev = sim
            print(f"{guess} is {temp}, cosine similarity is: {sim}")
        except KeyError:
            print("Word not recognized, please try again")

        return False

def choose_word(word_bank):
    return random.choice(word_bank)


Enter vector type (glove6B or fasttext): glove6B
Trace
Error loading glove6b model: 'glove6b'
Error reading vector file. Exiting.
Hello World


In [58]:
import os

def main():
    vector_type = input("Enter vector type (glove6B or fasttext): ").lower().strip()

    if vector_type == "glove6b":
        fileloc = "/content/glove.6B.50d.txt"
        if os.path.exists(fileloc):
            downloaded = True
        else:
            downloaded = False
    else:
        downloaded = True

    game = Semantle(downloaded, vector_type="glove6B")

    if game.word_bank is None:
        return  # Exit if word vectors are not loaded successfully

    correct = False
    step = 0

    while not correct:
        print(f"Step: {step}")
        guess = input("Enter your guess: ").lower().strip()

        if guess == "ya mate kudyastop":
            break

        correct = game.check(guess)
        step += 1

    print("Thank you")

if __name__ == "__main__":
    main()

Enter vector type (glove6B or fasttext): fasttext
Randomly selected word: nanto
Step: 0
Enter your guess: tanto
tanto is unrelated, cosine similarity is: 0.24338853359222412
Step: 1
Enter your guess: naton
Word not recognized, please try again
Step: 2
Enter your guess: tanon
tanon is colder, cosine similarity is: 0.05850019305944443
Step: 3
Enter your guess: nano
nano is colder, cosine similarity is: 0.03551755100488663
Step: 4
Enter your guess: tonan
Word not recognized, please try again
Step: 5
Enter your guess: ya mate kudyastop
Thank you
