In [None]:
import numpy as np 
import pandas as pd
import fasttext
import fasttext.util
import random
from itertools import permutations,combinations
from numpy.linalg import norm
import math

In [None]:
ft = fasttext.load_model(r'cc.tr.300.bin')


In [None]:
df = pd.read_csv("words.txt")
liste=np.array(df).flatten()

In [None]:
def get_word_vectors(word_list, ft):
    """
    Get word vectors of a word list from a fasttext object
    """
    return np.array(list(map(ft.get_word_vector, word_list)))

In [None]:
vectors = get_word_vectors(liste, ft)


In [None]:
def cos_similarity_w_matrix(vector, matrix):
    """
    Cosine similarity of a vector to all rows of matrix
    """
    return np.dot(vector, matrix.T) / (np.linalg.norm(vector) * np.linalg.norm(matrix, axis=1))

In [None]:
def cos_sim_matrix(matrix):
    """
    Pairwise cosine similarity of all rows of matrix
    """
    cs = lambda inp : cos_similarity_w_matrix(inp, matrix)
    return np.array(list(map(cs, matrix)))

In [None]:
word_distances = cos_sim_matrix(vectors)


In [None]:
def cosine_sim(vector1, vector2):
    dot_product = np.dot(vector1, vector2)
    norm_1 = np.linalg.norm(vector1)
    norm_2 = np.linalg.norm(vector2)
    return dot_product / (norm_1 * norm_2)

In [None]:
def results(u,show_dict=False,threshold=0.15):
    res1=[]
    for a,b in combinations(u,2):
        if cosine_sim(ft[a],ft[b])<threshold:
            res1.append((a,b))    
    return len(res1)


In [None]:
def random_dist_creator(word_distances,liste,list_length,threshold=0.14):
    temp9=[]

    temp9.append(liste[np.argpartition(word_distances[random.randint(0,len(liste)-1)], random.randint(0,10))[random.randint(0,10)]])
    
    
    while True:
        a=random.randint(0,len(liste)-1)
        b=random.randint(0,10)
        index=np.argpartition(word_distances[a], b)[b]
        y=cosine_sim(ft[temp9[-1]],ft[liste[index]])   
        if threshold>y:    
            temp9.append(liste[index])
            #print(temp9)
        if results(temp9)==(math.comb(list_length, 2)):
            break 
    return temp9


    
#Returns random sequential distant elements according to some restrictions


In [None]:
u=random_dist_creator(word_distances,liste,5)
u

In [None]:
for a,b in combinations(u,2):
    print(cosine_sim(ft[a],ft[b]))


## Semantic Tool Class (STEM-T=> Semantic Tool for Episodic Memory Task)

Semantic item class will include turn word to vector (get_word_vectors) cos similiarities (cos_similarity_w_matrix, cos_sim_matrix,cos_sim_matrix, cosine_sim) and furthest elements with regard to list which passes threshold (find_furthest,results) and nearest element (find_nearest)

In [None]:
class STEM(object):
    def __init__(self,word_list,ft,vecotr,matrix,vector1,vector2,u,threshold,word_distances,list_length):
        self.word_list=word_list
        self.ft= ft
        self.vector=vector
        self.matrix=matrix
        self.vector1=vector1
        self.vector2=vector2
        self.u=u
        self.threshold=threshold
        self.word_distances=word_distances
        self.list_length=list_length        
        
    def get_word_vectors(word_list, ft):
        return np.array(list(map(ft.get_word_vector, word_list)))
    def cos_similarity_w_matrix(vector, matrix):
        return np.dot(vector, matrix.T) / (np.linalg.norm(vector) * np.linalg.norm(matrix, axis=1))
    def cos_sim_matrix(matrix):
        cs = lambda inp : cos_similarity_w_matrix(inp, matrix)
        return np.array(list(map(cs, matrix)))
    def cos_sim_matrix(matrix):
        cs = lambda inp : cos_similarity_w_matrix(inp, matrix)
        return np.array(list(map(cs, matrix)))
    def inter_cosine_sim(vector1, vector2):
        dot_product = np.dot(vector1, vector2)
        norm_1 = np.linalg.norm(vector1)
        norm_2 = np.linalg.norm(vector2)
        return dot_product / (norm_1 * norm_2)
    def results(u,threshold=0.15):
        res1=[]
        for a,b in combinations(u,2):
            if cosine_sim(ft[a],ft[b])<threshold:
                res1.append((a,b))    
        return len(res1)
    def random_dist_creator(word_distances,liste,list_length,threshold=0.11):
        temp9=[]

        temp9.append(liste[np.argpartition(word_distances[random.randint(0,len(liste)-1)], random.randint(0,10))[random.randint(0,10)]])


        while True:
            a=random.randint(0,len(liste)-1)
            b=random.randint(0,10)
            index=np.argpartition(word_distances[a], b)[b]
            y=cosine_sim(ft[temp9[-1]],ft[liste[index]])   
            if threshold>y:    
                temp9.append(liste[index])
                #print(temp9)
            if results(temp9)==(math.comb(list_length, 2)):
                break 
        return temp9


    
#Returns random sequential distant elements according to some restrictions


        return temp9


    
#Returns random sequential distant elements according to some restrictions



In [None]:
d= STEM.inter_cosine_sim(ft["kedi"],ft["köpek"])
d