In [1]:
from time import sleep
from langame import LangameClient
from google.cloud.firestore import DocumentSnapshot

c = LangameClient(path_to_config_file="../config.yaml")

In [2]:
existing_memes = []
for e in c._firestore_client.collection("memes").stream():
    existing_memes.append((e.id, e.to_dict()))
print(f"Fetched {len(existing_memes)} memes.")

Fetched 4274 memes.


In [None]:
from autofaiss import build_index
import numpy as np
import openai
import os
embeddings = []
memes_with_embedding = []
for e in existing_memes:
    if "embedding" in e[1]:
        memes_with_embedding.append(e)
        # turn to np array
        memes_with_embedding[-1][1]["embedding"] = np.array(
            memes_with_embedding[-1][1]["embedding"]
        )
        # add to embeddings
        embeddings.append(memes_with_embedding[-1][1]["embedding"])
print(f"Total {len(memes_with_embedding)} memes with embeddings.")
# create dir "embeddings"
os.makedirs("embeddings", exist_ok=True)
np.save("embeddings/p1.npy", embeddings)
index, index_infos = build_index("embeddings", 
    index_path="indexes/knn.index",
    max_index_memory_usage="6G",
    current_memory_available="7G",
)

In [None]:
response = openai.Engine(id="ada-similarity").embeddings(input="artificial reality")
query = np.array([response["data"][0]["embedding"]], dtype=np.float32)

In [None]:
_, I = index.search(query, 60)
for e in I[0]:
    print(memes_with_embedding[e][1]["content"])

In [53]:
import shutil
from langame.conversation_starters import is_garbage
from sentence_transformers import SentenceTransformer
existing_memes = []
collection = c._firestore_client.collection("memes")
limit = 1000
if limit:
    collection = collection.limit(limit)
if embeddings:
    # https://stackoverflow.com/questions/49579693/how-do-i-get-documents-where-a-specific-field-exists-does-not-exists-in-firebase
    collection.order_by("embedding")
for e in collection.stream():
    existing_memes.append((e.id, e.to_dict()))
print(f"Got {len(existing_memes)} existing memes.")

Got 1000 existing memes.


In [54]:
embeddings = []
memes_with_embedding = []
sentence_embeddings_model_name = "sentence-transformers/LaBSE"
device = "cpu"

sentence_embeddings_model = SentenceTransformer(
    sentence_embeddings_model_name, device=device
)
for e in existing_memes:
    if is_garbage(e[1]):
        continue

    print(f"Encoding {e[1]['content']}")
    e[1]["embedding"] = sentence_embeddings_model.encode(e[1]["content"], show_progress_bar=False)

    if "embedding" in e[1]:
        memes_with_embedding.append(e)
        # turn to np array
        memes_with_embedding[-1][1]["embedding"] = np.array(
            memes_with_embedding[-1][1]["embedding"]
        )
        # add to embeddings
        embeddings.append(memes_with_embedding[-1][1]["embedding"])
# delete "embeddings" and "indexes" folders
for folder in ["embeddings", "indexes"]:
    if os.path.exists(folder):
        shutil.rmtree(folder)
print(len(embeddings))
os.makedirs("embeddings", exist_ok=True)
np.save("embeddings/p1.npy", embeddings)
index, _ = build_index(
    "embeddings",
    index_path="indexes/knn.index",
    max_index_memory_usage="6G",
    current_memory_available="7G",
)

Encoding Have natural disasters gotten worse with the increase in human existence? If so, why?
Encoding What would prompt you to make a dramatic change in your life. Is it worth it?
Encoding Why bother with subjectivity when there are open ended conjectures?
Encoding Do you have any type of disability or chronic illness?
Encoding What would people do when machines take a greater place in our society?
Encoding What is or was your favorite subject in school? What made you hate it?
Encoding Have you ever felt like you were living in a different world than the people around you?
Encoding Have there ever been moments or cases in your life where you stopped caring about one of the categories that form the extroverted ideal?
Encoding How do you choose between conflicting options, using only your intuition and no numbers? Why are some choices correct and others incorrect. Where do these intuitions come from? How can I test them better?
Encoding If you could revive/clone yourself in your 30-35 

100%|██████████| 1/1 [00:00<00:00, 704.33it/s]

		>>> Finished "-> Adding the vectors to the index" in 0.0032 secs
	>>> Finished "Creating the index" in 0.0049 secs
	Computing best hyperparameters 01/28/2022, 10:02:35
	>>> Finished "Computing best hyperparameters" in 0.0000 secs
The best hyperparameters are: 
	Compute fast metrics 01/28/2022, 10:02:35





1492
	>>> Finished "Compute fast metrics" in 10.0317 secs
	Saving the index on local disk 01/28/2022, 10:02:45
	>>> Finished "Saving the index on local disk" in 0.0022 secs
Recap:
{'99p_search_speed_ms': 39.20678197173398,
 'avg_search_speed_ms': 6.718375983500688,
 'compression ratio': 0.9999851588448833,
 'index_key': 'Flat',
 'index_param': '',
 'nb vectors': 987,
 'reconstruction error %': 0.0,
 'size in bytes': 3032109,
 'vectors dimension': 768}
>>> Finished "Launching the whole pipeline" in 10.0469 secs


In [55]:
query = sentence_embeddings_model.encode("intelligence", show_progress_bar=False)
_, I = index.search(np.array([query]), 20)
memes = [memes_with_embedding[i][1] for i in I[0]]

In [56]:
list(set([e["content"] for e in memes]))

['What is the mind?',
 'Is artificial intelligence itself a species from the universe?',
 'Would it be better to know less or more?',
 'Do you think that there is a unique "recipe" for knowledge?',
 'Do you think humans are the only intelligent life in the universe?',
 'What do you know now that you wish you knew when you were younger?',
 'Is it a possibility that the state become governed by an artificial intelligence?',
 'What do you feel about consciousness, what does it mean to you?',
 'Can science achieve its goal to give an explanation for everything?',
 'Do you think technology will ever be able to "understand" (or model?) the human mind?',
 'Does social status affect your intelligence?',
 'Does mysticism play a role in the study of science ?',
 'What if everything we knew was a lie?',
 'What do you think about the "knowledge economy"?',
 'How does knowledge affect our perception of the world?',
 'What is knowledge?',
 'What is your definition of intelligence?',
 "Is human intel