In [35]:
from langchain.schema import Document
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

In [15]:
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter

In [3]:
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

  from .autonotebook import tqdm as notebook_tqdm


In [32]:
import hashlib
import os
import json

list_of_documents = []

# Load speaker map from speaker_map.json
with open('speaker_map.json', 'r') as sm:
    speaker_map = json.load(sm)


for root, dirs, files in os.walk("."):
    for file in files:
        if file.endswith("TRANSCRIPT.txt"):
            loader = TextLoader(file_path=os.path.join(root, file))
            documents = loader.load()
            text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
            docs = text_splitter.split_documents(documents)
            full_path = os.path.join(root, file)
            # Add metadata
            for doc in docs:
                run_id = full_path.split("/")[1]
                speaker = file.split("-")[0]
                
                speaker_id = str(int(hashlib.sha1(f"{run_id}_{speaker}".encode()).hexdigest(), 16) % (10 ** 8))
                speaker_name = speaker_map.get(speaker_id, "Unknown")

                doc.metadata["speaker"] = speaker
                doc.metadata["run_id"] = run_id
                doc.metadata["speaker"] = speaker_name
                list_of_documents.append(doc)


In [36]:

db = FAISS.from_documents(list_of_documents, embeddings)
results_with_scores = db.similarity_search_with_score("What are drivers of Bitcoin adoption")
for doc, score in results_with_scores:
    print(f"Content: {doc.page_content}, Metadata: {doc.metadata}, Score: {score}")

Content: Hey Brian. Nice to talk to you. Yeah the recent run. I mean let's let's go through it. The story is completely unchanged here for Bitcoin. Right. Every four years Bitcoin the fraud the leverage it gets it gets completely wiped out. Twenty twenty two was an awful year. I mean why is Bitcoin rallying. Well there's no more sellers. Right. It's quite simple. The supply held for one year two years and three years is at all time high. Sixty seven fifty three and 40 percent. Right. It's a it's a completely inelastic supply relative to demand. And there's a growing group of price agnostic accumulators that buy every single day. It's that simple. It's a matter of flows. Certainly our equity markets and bond markets and volatility. Do they play. Do they play into this. Of course they do. And Bitcoin is increasingly a macro asset. But what S.P. S.V.B. showed with the deep hacking of U.S. D.C. showed what the Fed's B.T. F.P. program. Yeah what is that.I can say that, but how big of a deal