In [32]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS

In [5]:
file_path = "pages\data\Rebuilding Milo.pdf"
loader = PyPDFLoader(file_path)

docs = loader.load()

In [7]:
len(docs)

573

In [13]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1500, chunk_overlap = 200, add_start_index = True)
all_splits = text_splitter.split_documents(docs)

len(all_splits)

742

In [14]:
all_splits[500].page_content

'away from your head so that you’re assessing shoulder elevation in both\nan internally and an externally rotated position.\nWere you able to move your arm higher overhead when your thumb\nwas facing skyward? This internally rotated position takes slack oﬀ of the\nlats and teres major muscles and therefore allows someone with \x00exibility\nlimitations in these muscles to move the arm farther overhead.\nIf your arm elevation is limited, with no signi\x00cant diﬀerence between\nthe two hand positions, the mobility restriction may be coming from\ndeeper in the joint, such as a stiﬀ/restricted joint capsule. That kind of\nrestriction would require a rehabilitation professional to address\nappropriately.\nPec Minor and Major Flexibility Screening'

In [None]:
embeddings = HuggingFaceEmbeddings(
    model_name="./models/all-MiniLM-L6-v2"
)

No sentence-transformers model found with name ./models/all-MiniLM-L6-v2. Creating a new one with mean pooling.


In [30]:
print(embeddings.embed_query("hi"))

[-0.554943323135376, 0.24803955852985382, 0.14662761986255646, 0.3615624010562897, -0.14035078883171082, -0.2896285951137543, 0.2763034999370575, 0.09682708978652954, -0.29563552141189575, -0.2318134754896164, -0.11701453477144241, 0.13069932162761688, -0.028723830357193947, -0.265634149312973, 0.3674921691417694, 0.36251556873321533, -0.17196549475193024, -0.36322101950645447, -0.7630371451377869, -0.2183552235364914, -0.03729547932744026, 0.19890646636486053, -0.23185433447360992, 0.1515665203332901, -0.2620536983013153, -0.2603941261768341, 0.2817501127719879, 0.6049277782440186, -0.30666711926460266, -0.21612195670604706, 0.4345014989376068, 0.20340923964977264, 0.16308178007602692, 0.001062413095496595, 0.023808380588889122, 0.18687327206134796, -0.4796624481678009, -0.7383578419685364, 0.11065944284200668, 0.1400238275527954, -0.010887195356190205, -0.14383190870285034, 0.018757494166493416, 0.14938779175281525, 0.27082207798957825, -0.24601638317108154, 0.12385135143995285, 0.06

In [None]:
vectorstore = FAISS.from_documents(all_splits, embeddings)

vectorstore.save_local("pages/data/milo_index")