# **Programming Assessment \#5**

Names: ABERIN, Shawn  LIM, Kyle  SINGSON, Raymond

More information on the assessment is found in our Canvas course.

# **Load Pre-trained Embeddings**

*While you don't have to separate your code into blocks, it might be easier if you separated loading / downloading your data from the main part of your solution. Consider placing all loading of data into the code block below.*

In [1]:
! pip install fasttext





In [2]:
import numpy as np
import fasttext.util
import fasttext
import urllib.request
import zipfile
import os

class Loaders:
    def __init__(self):
        self.loaders = {
            "glove6b": self.load_word_vectors_glove6b,
            "fasttext": self.load_word_vectors_fasttext
        }
        self.srcs = {
            "glove6b": "https://nlp.stanford.edu/data/glove.6B.zip",
            "fasttext": None
        }
       
        

    def load_word_vectors_glove6b(self, glove_file="glove.6B.50d.txt"):
        data = {}
   
        with open(glove_file, "r", encoding="utf8") as f:
            lines = (line.casefold().split() for line in f.readlines())
        dat = ((line[0], np.array(line[1:], dtype=np.float32)) for line in lines if line[0].isalnum())
        data.update(dat)
        wordslist = list(data.keys())
        vectors = data
        return wordslist, vectors

    def load_word_vectors_fasttext(self, fasttext_file="cc.en.300.bin"):

        ft = fasttext.load_model(fasttext_file)
        wordslist = ft.get_words()
        vectors = {word: ft.get_word_vector(word) for word in wordslist}
        return wordslist, vectors


    def load(self, type="glove6b", fileloc="glove.6B.50d"):
        return self.loaders[type](fileloc)
        # try:
        #     return self.loaders[type](fileloc)
        # except Exception as e:
        #     print(f"Error loading {type} model: {e}")
        #     return None, None
    def download(self,type):
        if  type == "glove6b":
            self.download_glove()
        elif type == "fasttext":
           
            fasttext.util.download_model('en', if_exists='ignore')  # English
        else: print(f"No type {type}")
    def download_glove(self):

        try:
            url = self.srcs["glove6b"]
            zip_file_name = "glove6b.zip"
            urllib.request.urlretrieve(url, zip_file_name)
            print("downloading glove vecs")
            with zipfile.ZipFile(zip_file_name, 'r') as zip_ref:
                print("unzipping")
                zip_ref.extractall("./content")
            os.remove(zip_file_name)
            print("glove6b downloaded and extracted successfully.")
        except:
            print("Error downloading glove6b ")
        


# **Your Implementation**

*Again, you don't have to have everything in one block. Use the notebook according to your preferences with the goal of fulfilling the assessment in mind.*

In [3]:
import numpy as np
import random

class Semantle:
    def __init__(self, downloaded, vector_type="glove6b", fileloc="glove.6B.50d.txt",dbg=False):
        self.SEN = -2
        self.prev = self.SEN
        self.loader = Loaders()
        self.dbg = dbg
        if not downloaded:
            self.loader.download(vector_type)
        self.word_bank, self.vec = self.loader.load(vector_type, fileloc)
        if self.word_bank is None:
            print("Error reading vector file. Exiting.")
            return
        self.maketarget()
        
    def normalize(self, vec):
        den = np.linalg.norm(vec)
        return vec / den if den != 0 else vec

    def maketarget(self):
        self.w = choose_word(self.word_bank)
        vec = self.vec[self.w]
        self.vec_w = self.normalize(vec)
        if self.dbg:
            print(f"Randomly selected word: {self.w}")

    def init_temp(self, s):
        if 0.33 <= s <= 1:
            return "synonym"
        elif -1 <= s <= -0.33:
            return "antonym"
        return "unrelated"

    def check(self, guess):
        if guess == self.w:
            print(f"{self.w} is correct")
            return True

        
        try:
            vec_guess = self.normalize(self.vec[guess])
            sim = np.dot(vec_guess, self.vec_w)
            if sim > self.prev:
                self.prev = sim
            
            the_temp = lambda s: "colder" if s < self.prev else "warmer"
            
            temp = the_temp(sim) if self.prev == self.SEN else self.init_temp(sim)
            
            print(f"{guess} is {temp}, cosine similarity is: {sim}")
        except KeyError:
            print("Word not recognized, please try again")

        return False

def choose_word(word_bank):
    return random.choice(word_bank)


In [4]:


def main():
    vector_type = input("Enter vector type (glove6b or fasttext): ").lower().strip()
    fileloc = "./content/glove.6B.50d.txt"
    if vector_type == "glove6b":
        if os.path.exists(fileloc):
            downloaded = True
        else:
            downloaded = False
    elif vector_type == "fasttext":
         if not os.path.exists("./fastText"):
             ! git clone https://github.com/facebookresearch/fastText.git
        ! cd fastText
        ! runas python setup.py install
        fileloc = "cc.en.300.bin"
        if os.path.exists(fileloc):
            downloaded = True
        else:
            downloaded = False

    game = Semantle(downloaded, vector_type,fileloc)

    if game.word_bank is None:
        return  # Exit if word vectors are not loaded successfully

    correct = False
    step = 0

    while not correct:
        print(f"Step: {step}")
        guess = input("Enter your guess: ").lower().strip()

        if guess == "ya mate kudyastop":
            break

        correct = game.check(guess)
        step += 1

    print("Thank you")

if __name__ == "__main__":
    main()


Enter vector type (glove6b or fasttext):  fastText




KeyboardInterrupt: 