# Self Hosted
Let's load the `SelfHostedEmbeddings`, `SelfHostedHuggingFaceEmbeddings`, and `SelfHostedHuggingFaceInstructEmbeddings` classes.

In [1]:
import runhouse as rh
from langchain_community.embeddings import (
    SelfHostedEmbeddings,
    SelfHostedHuggingFaceEmbeddings,
    SelfHostedHuggingFaceInstructEmbeddings,
)

In [None]:
# For an on-demand A100 with GCP, Azure, or Lambda
gpu = rh.cluster(name="langchain-rh-a10x", instance_type="g5.4xlarge")
gpu.up_if_not()

# For an on-demand A10G with AWS (no single A100s on AWS)
# gpu = rh.cluster(name='rh-a10x', instance_type='g5.2xlarge', provider='aws')

# For an existing cluster
# gpu = rh.cluster(ips=['<ip of the cluster>'],
#                  ssh_creds={'ssh_user': '...', 'ssh_private_key':'<path_to_key>'},
#                  name='my-cluster')

In [None]:
embedding_env = rh.env(
    name="embeddings_env",
    reqs=[
        "transformers",
        "torch",
        "accelerate",
        "huggingface-hub",
        "sentence_transformers",
    ],
    secrets=["huggingface"],  # need for downloading models from huggingface
).to(system=gpu)

In [None]:
gpu.run(commands=["pip install langchain"])

In [5]:
embeddings = SelfHostedHuggingFaceEmbeddings(hardware=gpu, env=embedding_env)

INFO | 2024-03-24 14:32:10.464618 | Calling file_20240324_162817.exists_in_system
INFO | 2024-03-24 14:32:11.620303 | Time to call file_20240324_162817.exists_in_system: 1.16 seconds
INFO | 2024-03-24 14:32:11.623120 | Calling file_20240324_162817.resolved_state
INFO | 2024-03-24 14:32:12.780336 | Time to call file_20240324_162817.resolved_state: 1.16 seconds
INFO | 2024-03-24 14:32:12.782881 | Calling huggingface._write_to_file


[36mSecrets already exist in .cache/huggingface/token.
[0m

INFO | 2024-03-24 14:32:13.936439 | Time to call huggingface._write_to_file: 1.15 seconds
INFO | 2024-03-24 14:32:14.288792 | Calling embeddings_env.install
INFO | 2024-03-24 14:32:15.546474 | Time to call embeddings_env.install: 1.26 seconds
INFO | 2024-03-24 14:32:15.560158 | Sending module ModelPipeline to langchain-rh-a10x
INFO | 2024-03-24 14:32:16.029876 | Calling ModelPipeline._remote_init
INFO | 2024-03-24 14:32:17.192264 | Time to call ModelPipeline._remote_init: 1.16 seconds
INFO | 2024-03-24 14:32:17.200112 | Calling file_20240324_163213.exists_in_system
INFO | 2024-03-24 14:32:18.358517 | Time to call file_20240324_163213.exists_in_system: 1.16 seconds
INFO | 2024-03-24 14:32:18.363241 | Calling file_20240324_163213.resolved_state
INFO | 2024-03-24 14:32:19.523382 | Time to call file_20240324_163213.resolved_state: 1.16 seconds
INFO | 2024-03-24 14:32:19.533506 | Calling ModelPipeline.load_model


[36mLoad pretrained SentenceTransformer: sentence-transformers/all-mpnet-base-v2
[0m[36mUse pytorch device_name: cuda
[0m

INFO | 2024-03-24 14:32:32.750978 | Time to call ModelPipeline.load_model: 13.22 seconds
INFO | 2024-03-24 14:32:32.930918 | Calling file_20240324_162817.exists_in_system
INFO | 2024-03-24 14:32:34.087356 | Time to call file_20240324_162817.exists_in_system: 1.16 seconds
INFO | 2024-03-24 14:32:34.092653 | Calling file_20240324_162817.resolved_state
INFO | 2024-03-24 14:32:35.309964 | Time to call file_20240324_162817.resolved_state: 1.22 seconds
INFO | 2024-03-24 14:32:35.321040 | Calling huggingface._write_to_file


[36mSecrets already exist in .cache/huggingface/token.
[0m

INFO | 2024-03-24 14:32:36.542381 | Time to call huggingface._write_to_file: 1.22 seconds
INFO | 2024-03-24 14:32:36.884326 | Calling embeddings_env.install
INFO | 2024-03-24 14:32:38.044046 | Time to call embeddings_env.install: 1.16 seconds
INFO | 2024-03-24 14:32:38.058226 | Sending module TextModelEmbedding to langchain-rh-a10x
INFO | 2024-03-24 14:32:38.413637 | Calling TextModelEmbedding._remote_init
INFO | 2024-03-24 14:32:39.576731 | Time to call TextModelEmbedding._remote_init: 1.16 seconds
INFO | 2024-03-24 14:32:39.582776 | Calling TextModelEmbedding.load_embedding_model


[36mLoad pretrained SentenceTransformer: sentence-transformers/all-mpnet-base-v2
[0m[36mUse pytorch device_name: cuda
[0m

INFO | 2024-03-24 14:32:41.865010 | Time to call TextModelEmbedding.load_embedding_model: 2.28 seconds


In [6]:
text = "This is a test document."

In [7]:
query_result = embeddings.embed_query(text)

INFO | 2024-03-24 14:32:41.888804 | Calling TextModelEmbedding.embed_documents


[36m
Batches:   0%|          | 0/1 [00:00<?, ?it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00,  2.24it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00,  2.24it/s][0m

INFO | 2024-03-24 14:32:43.055540 | Time to call TextModelEmbedding.embed_documents: 1.17 seconds


And similarly for SelfHostedHuggingFaceInstructEmbeddings:

In [8]:
embeddings = SelfHostedHuggingFaceInstructEmbeddings(hardware=gpu, env=embedding_env)

INFO | 2024-03-24 14:32:43.231198 | Calling file_20240324_162817.exists_in_system
INFO | 2024-03-24 14:32:44.390289 | Time to call file_20240324_162817.exists_in_system: 1.16 seconds
INFO | 2024-03-24 14:32:44.393713 | Calling file_20240324_162817.resolved_state
INFO | 2024-03-24 14:32:45.652348 | Time to call file_20240324_162817.resolved_state: 1.26 seconds
INFO | 2024-03-24 14:32:45.663049 | Calling huggingface._write_to_file


[36mSecrets already exist in .cache/huggingface/token.
[0m

INFO | 2024-03-24 14:32:46.825030 | Time to call huggingface._write_to_file: 1.16 seconds
INFO | 2024-03-24 14:32:47.167111 | Calling embeddings_env.install
INFO | 2024-03-24 14:32:48.326555 | Time to call embeddings_env.install: 1.16 seconds
INFO | 2024-03-24 14:32:48.338368 | Sending module ModelPipeline to langchain-rh-a10x
INFO | 2024-03-24 14:32:48.694761 | Calling ModelPipeline._remote_init
INFO | 2024-03-24 14:32:49.953124 | Time to call ModelPipeline._remote_init: 1.26 seconds
INFO | 2024-03-24 14:32:49.959748 | Calling file_20240324_163246.exists_in_system
INFO | 2024-03-24 14:32:51.183328 | Time to call file_20240324_163246.exists_in_system: 1.22 seconds
INFO | 2024-03-24 14:32:51.186637 | Calling file_20240324_163246.resolved_state
INFO | 2024-03-24 14:32:52.413447 | Time to call file_20240324_163246.resolved_state: 1.23 seconds
INFO | 2024-03-24 14:32:52.419721 | Calling ModelPipeline.load_model


[36mLoad pretrained SentenceTransformer: hkunlp/instructor-large
[0m[36mUse pytorch device_name: cuda
[0m

INFO | 2024-03-24 14:33:03.599422 | Time to call ModelPipeline.load_model: 11.18 seconds
INFO | 2024-03-24 14:33:03.775977 | Calling file_20240324_162817.exists_in_system
INFO | 2024-03-24 14:33:05.005037 | Time to call file_20240324_162817.exists_in_system: 1.23 seconds
INFO | 2024-03-24 14:33:05.011337 | Calling file_20240324_162817.resolved_state
INFO | 2024-03-24 14:33:06.264976 | Time to call file_20240324_162817.resolved_state: 1.25 seconds
INFO | 2024-03-24 14:33:06.278060 | Calling huggingface._write_to_file


[36mSecrets already exist in .cache/huggingface/token.
[0m

INFO | 2024-03-24 14:33:07.441389 | Time to call huggingface._write_to_file: 1.16 seconds
INFO | 2024-03-24 14:33:07.784795 | Calling embeddings_env.install
INFO | 2024-03-24 14:33:09.000101 | Time to call embeddings_env.install: 1.22 seconds
INFO | 2024-03-24 14:33:09.012592 | Sending module TextModelEmbedding to langchain-rh-a10x
INFO | 2024-03-24 14:33:09.410513 | Calling TextModelEmbedding._remote_init
INFO | 2024-03-24 14:33:10.567109 | Time to call TextModelEmbedding._remote_init: 1.16 seconds
INFO | 2024-03-24 14:33:10.574694 | Calling TextModelEmbedding.load_embedding_model


[36mLoad pretrained SentenceTransformer: hkunlp/instructor-large
[0m[36mUse pytorch device_name: cuda
[0m

INFO | 2024-03-24 14:33:12.741675 | Time to call TextModelEmbedding.load_embedding_model: 2.17 seconds


In [9]:
query_result = embeddings.embed_query(text)

INFO | 2024-03-24 14:33:12.757713 | Calling TextModelEmbedding.embed_documents


[36m
Batches:   0%|          | 0/1 [00:00<?, ?it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00,  6.83it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00,  6.83it/s]
[0m

INFO | 2024-03-24 14:33:14.025257 | Time to call TextModelEmbedding.embed_documents: 1.27 seconds
