In [2]:
import pandas as pd
import faiss
import spacy
from sentence_transformers import SentenceTransformer
import numpy as np

  from tqdm.autonotebook import tqdm, trange


In [None]:
!pip install -r requirements.txt

In [3]:
df = pd.read_csv("cards.csv")

df.head()


Unnamed: 0,id,name,effect,god,rarity,mana,type,set,collectable,live,art_id,lib_id,tribe.String,tribe.Valid,attack.Int64,attack.Valid,health.Int64,health.Valid
0,1210,Jinxblade Duelist,"At the end of each turn, give +1 health to thi...",deception,common,3,creature,core,True,True,C799,L2-211,guild,True,4,True,2,True
1,1036,Leyhoard Hatchling,"Each turn, set this creature's mana cost to 10...",magic,rare,10,creature,core,True,True,C745,L2-037,dragon,True,3,True,3,True
2,1302,Cursed Obelisks,Backline. Can't attack. At the end of your tur...,death,rare,3,creature,order,True,True,C8-003,L8-003,structure,True,1,True,4,True
3,1129,End Times,Destroy each creature. Then summon a 6/6 Demon...,death,epic,8,spell,core,True,True,C612,L2-130,,False,0,False,0,False
4,1132,The Old Ritual,"Destroy a creature, if you do, summon a 6/6 Ne...",death,common,4,spell,core,True,True,C609,L2-133,,False,0,False,0,False


In [5]:
# preprocess

NLP = spacy.load("en_core_web_sm")
STOP_WORDS = NLP.Defaults.stop_words

def preprocess(text):
    doc = NLP(str(text))
    preprocessed = []
    for token in doc:
        if token.is_punct or token.like_num or token in STOP_WORDS or token.is_space:
            continue
        preprocessed.append(token.lemma_.lower().strip())
    return ' '.join(preprocessed)


In [7]:
df["processed_text"] = df["effect"].apply(preprocess)

In [9]:
model = SentenceTransformer('all-MiniLM-L6-v2')

df['embedding'] = df['processed_text'].apply(model.encode)

vector = model.encode(df['processed_text'])

df.to_csv("embed_data.csv")



In [10]:
# create indices
d = vector.shape[1]
index = faiss.IndexFlatL2(d)

index.add(vector)

faiss.write_index(index, "cards_faiss.index")

In [18]:
search = "Roar: The three strongest enemy creatures gain burn +2 and go to sleep"

# test_pre = preprocess(search)
# embedding = model.encode(test_pre)
embedding = model.encode(search)
print(embedding.shape)

svec = np.array(embedding).reshape(1 ,-1)
print(svec.shape)

dist, pos = index.search(svec, k=10)

pos = pos.flatten()

print(dist)
print(pos)

names = [df["name"][x] for x in pos]
print(names)

(384,)
(1, 384)
[[0.24051377 0.4508497  0.49977115 0.51034075 0.51136345 0.5825199
  0.61133766 0.62695944 0.6269886  0.6373252 ]]
[1567  851  973  617 1175 1562  847  182  389 1702]
['Witherfingers', 'Solemn Lecturer', 'Infernal Footsoldier', 'Conniving Djinn', 'Dream Stalker', 'Dark Precarion', 'Thalia, Charite Temptress', 'Atlant Regulator', 'The Nocturnal', 'Lethargy Mage']
