# **Programming Assessment \#5**

Names: ABERIN, Shawn  LIM, Kyle  SINGSON, Raymond

More information on the assessment is found in our Canvas course.

# **Load Pre-trained Embeddings**

*While you don't have to separate your code into blocks, it might be easier if you separated loading / downloading your data from the main part of your solution. Consider placing all loading of data into the code block below.*

In [11]:
import numpy as np
import fasttext.util
import fasttext
import urllib.request
import zipfile
import os

if not os.path.exists("./fastText"):
        ! git clone https://github.com/facebookresearch/fastText.git
! cd fastText
! runas python setup.py install

In [18]:
def_files = {
    "glove6b":"glove.6B.50d.txt",
    "fasttext":"wiki-news-300d-1M.vec"
        } 
class Loaders:
    def __init__(self, type="glove6b", loc=f'./content/glove6b/{def_files["glove6b"]}'):
        self.srcs = {
            "glove6b": "https://nlp.stanford.edu/data/glove.6B.zip",
            "fasttext": "https://dl.fbaipublicfiles.com/fasttext/vectors-english/wiki-news-300d-1M.vec.zip"
        }
        if not os.path.exists(loc):
            self.download(type)
        self.words, self.vecs = self.load(type,loc)
        

    def load(self, type,file,):
        try:
            print("loading stuff")
            data = {}
            with open(file, "r", encoding="utf8") as f:
                lines = (line.casefold().split() for line in f.readlines())
            dat = ((line[0], np.array(line[1:], dtype=np.float32)) for line in lines if line[0].isalnum())
            data.update(dat)
            wordslist = list(data.keys())
            vectors = data
            return wordslist, vectors
        except Exception as e:
            raise(f"load error. {e} happned")
        
    def download(self,type):
        try:
            
            url = self.srcs[type]
            zip_file_name = f"{type}.zip"
            print("downloading model")
            urllib.request.urlretrieve(url, zip_file_name)
            
            with zipfile.ZipFile(zip_file_name, 'r') as zip_ref:
                print("unzipping")
                zip_ref.extractall(f"./content/{type}")
            os.remove(zip_file_name)
            print("downloaded and extracted successfully.")
        except Exception as e:
            raise(f"Error downloading model. {e} happened ")

# **Your Implementation**

*Again, you don't have to have everything in one block. Use the notebook according to your preferences with the goal of fulfilling the assessment in mind.*

In [21]:
import random

class Semantle:
    def __init__(self, loader = None,dbg=False):
        self.SEN = -2
        self.prev = self.SEN
        try:
            self.loader = Loaders() if loader is None else loader
        except:
            raise ("Semantle vec-loader broke")
        self.dbg = dbg
        # if not downloaded:
        #     self.loader.download(vector_type)
        # self.word_bank, self.vec = self.loader.load(vector_type, fileloc)
        self.word_bank, self.vec = self.loader.words, self.loader.vecs
        
        self.maketarget()
        
    def normalize(self, vec):
        den = np.linalg.norm(vec)
        return vec / den if den != 0 else vec

    def maketarget(self):
        self.w = choose_word(self.word_bank)
        vec = self.vec[self.w]
        self.vec_w = self.normalize(vec)
        if self.dbg:
            print(f"Randomly selected word: {self.w}")

    def init_temp(self, s):
        if 0.33 <= s <= 1:
            return "synonym"
        elif -1 <= s <= -0.33:
            return "antonym"
        return "unrelated"
    def outcome (self,s):
        if s > self.prev: 
            self.prev = s
            return "warmer"
        return "colder"
    def check(self, guess):
        if guess == self.w:
            print(f"{self.w} is correct")
            return True

        
        try:
            vec_guess = self.normalize(self.vec[guess])
            sim = np.dot(vec_guess, self.vec_w)
            temp = self.outcome(sim)
            
            print(f"{guess} is {temp}, cosine similarity is: {sim}")
        except KeyError:
            print("Word not recognized, please try again")

        return False

def choose_word(word_bank):
    return random.choice(word_bank)


In [24]:
def main():
        
        try:
            vector_type = input("Enter vector type (glove6b or fasttext): ").casefold().strip()
            fileloc = '/'.join(( '.',"content",vector_type,def_files[vector_type]))
            loader = Loaders(type = vector_type,loc = fileloc)
            game = Semantle(loader,False)
        except KeyError as k:
            print(f"{vector_type} is not a recognized vector. stopping")
            return
        except:
            print("something broke")
       
        if game.word_bank is None:
            return  # Exit if word vectors are not loaded successfully
        
        correct = False
        step = 0
        
        while not correct:
            print(f"Step: {step}")
            guess = input("Enter your guess: ").lower().strip()
        
            if guess == "ya mate kudyastop":
                break
        
            correct = game.check(guess)
            step += 1
        
        print("Thank you")
    
if __name__ == "__main__":main()
    


Enter vector type (glove6b or fasttext):  fasttext


downloading model
unzipping
downloaded and extracted successfully.
loading stuff
Step: 0


Enter your guess:  debug


debug is warmer, cosine similarity is: 0.3070474863052368
Step: 1


Enter your guess:  tis


tis is colder, cosine similarity is: 0.2561137080192566
Step: 2


Enter your guess:  fix


fix is colder, cosine similarity is: 0.2718333303928375
Step: 3


Enter your guess:  bug


bug is colder, cosine similarity is: 0.22261756658554077
Step: 4


Enter your guess:  program


program is colder, cosine similarity is: 0.2954360842704773
Step: 5


Enter your guess:  ya mate kudyastop


Thank you
