# Self Hosted
Let's load the `SelfHostedEmbeddings`, `SelfHostedHuggingFaceEmbeddings`, and `SelfHostedHuggingFaceInstructEmbeddings` classes.

In [1]:
import runhouse as rh
from langchain_community.embeddings import (
    SelfHostedEmbeddings,
    SelfHostedHuggingFaceEmbeddings,
    SelfHostedHuggingFaceInstructEmbeddings,
)

In [None]:
# For an on-demand A100 with GCP, Azure, or Lambda
gpu = rh.cluster(name='langchain-rh-a10x', instance_type='g5.4xlarge', provider='aws')

# For an on-demand A10G with AWS (no single A100s on AWS)
# gpu = rh.cluster(name='rh-a10x', instance_type='g5.2xlarge', provider='aws')

# For an existing cluster
# gpu = rh.cluster(ips=['<ip of the cluster>'],
#                  ssh_creds={'ssh_user': '...', 'ssh_private_key':'<path_to_key>'},
#                  name='my-cluster')

In [None]:
embedding_env = rh.env(
    name="embeddings_env",
    reqs=["transformers", "torch", "accelerate", "huggingface-hub", "sentence_transformers"],
    secrets=["huggingface"]  # need for downloading models from huggingface
).to(system=gpu)

In [None]:
gpu.run(commands=["pip install langchain"])

In [5]:
embeddings = SelfHostedHuggingFaceEmbeddings(hardware=gpu, env=embedding_env)

INFO | 2024-03-21 17:25:39.959610 | Calling file_20240321_192513.exists_in_system
INFO | 2024-03-21 17:25:41.285883 | Time to call file_20240321_192513.exists_in_system: 1.33 seconds
INFO | 2024-03-21 17:25:41.287783 | Calling file_20240321_192513.resolved_state
INFO | 2024-03-21 17:25:42.614145 | Time to call file_20240321_192513.resolved_state: 1.33 seconds
INFO | 2024-03-21 17:25:42.623187 | Calling huggingface._write_to_file


[36mSecrets already exist in .cache/huggingface/token.
[0m

INFO | 2024-03-21 17:25:43.950897 | Time to call huggingface._write_to_file: 1.33 seconds


Output()

INFO | 2024-03-21 17:25:50.346855 | Calling embeddings_env.install
INFO | 2024-03-21 17:25:51.670978 | Time to call embeddings_env.install: 1.32 seconds


Output()

INFO | 2024-03-21 17:25:56.930220 | Sending module ModelPipeline to sasha-rh-a10x


Output()

Output()

INFO | 2024-03-21 17:26:08.779435 | Calling ModelPipeline._remote_init
INFO | 2024-03-21 17:26:10.107082 | Time to call ModelPipeline._remote_init: 1.33 seconds
INFO | 2024-03-21 17:26:10.111817 | Calling file_20240321_192543.exists_in_system
INFO | 2024-03-21 17:26:11.590918 | Time to call file_20240321_192543.exists_in_system: 1.48 seconds
INFO | 2024-03-21 17:26:11.592707 | Calling file_20240321_192543.resolved_state
INFO | 2024-03-21 17:26:12.920418 | Time to call file_20240321_192543.resolved_state: 1.33 seconds
INFO | 2024-03-21 17:26:12.928229 | Calling ModelPipeline.load_model


[36mLoad pretrained SentenceTransformer: sentence-transformers/all-mpnet-base-v2
[0m[36mUse pytorch device_name: cuda
[0m

INFO | 2024-03-21 17:26:16.410587 | Time to call ModelPipeline.load_model: 3.48 seconds
INFO | 2024-03-21 17:26:16.920270 | Calling file_20240321_192513.exists_in_system
INFO | 2024-03-21 17:26:18.248568 | Time to call file_20240321_192513.exists_in_system: 1.33 seconds
INFO | 2024-03-21 17:26:18.250271 | Calling file_20240321_192513.resolved_state
INFO | 2024-03-21 17:26:19.576737 | Time to call file_20240321_192513.resolved_state: 1.33 seconds
INFO | 2024-03-21 17:26:19.584906 | Calling huggingface._write_to_file


[36mSecrets already exist in .cache/huggingface/token.
[0m

INFO | 2024-03-21 17:26:20.910074 | Time to call huggingface._write_to_file: 1.33 seconds


Output()

INFO | 2024-03-21 17:26:27.161647 | Calling embeddings_env.install
INFO | 2024-03-21 17:26:28.482529 | Time to call embeddings_env.install: 1.32 seconds


Output()

INFO | 2024-03-21 17:26:33.777176 | Sending module TextModelEmbedding to sasha-rh-a10x


Output()

Output()

INFO | 2024-03-21 17:26:45.544214 | Calling TextModelEmbedding._remote_init
INFO | 2024-03-21 17:26:46.865010 | Time to call TextModelEmbedding._remote_init: 1.32 seconds
INFO | 2024-03-21 17:26:46.869249 | Calling TextModelEmbedding.load_embedding_model


[36mLoad pretrained SentenceTransformer: sentence-transformers/all-mpnet-base-v2
[0m[36mUse pytorch device_name: cuda
[0m

INFO | 2024-03-21 17:26:49.348465 | Time to call TextModelEmbedding.load_embedding_model: 2.48 seconds


In [6]:
text = "This is a test document."

In [7]:
query_result = embeddings.embed_query(text)

INFO | 2024-03-21 17:26:56.089044 | Calling TextModelEmbedding.embed_documents


[36m
Batches:   0%|          | 0/1 [00:00<?, ?it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 110.21it/s]
[0m

INFO | 2024-03-21 17:26:57.572220 | Time to call TextModelEmbedding.embed_documents: 1.48 seconds


And similarly for SelfHostedHuggingFaceInstructEmbeddings:

In [8]:
embeddings = SelfHostedHuggingFaceInstructEmbeddings(hardware=gpu, env=embedding_env)

INFO | 2024-03-21 17:27:01.496519 | Calling file_20240321_192513.exists_in_system
INFO | 2024-03-21 17:27:02.819464 | Time to call file_20240321_192513.exists_in_system: 1.32 seconds
INFO | 2024-03-21 17:27:02.820573 | Calling file_20240321_192513.resolved_state
INFO | 2024-03-21 17:27:04.298018 | Time to call file_20240321_192513.resolved_state: 1.48 seconds
INFO | 2024-03-21 17:27:04.306892 | Calling huggingface._write_to_file


[36mSecrets already exist in .cache/huggingface/token.
[0m

INFO | 2024-03-21 17:27:05.635081 | Time to call huggingface._write_to_file: 1.33 seconds


Output()

INFO | 2024-03-21 17:27:11.761563 | Calling embeddings_env.install
INFO | 2024-03-21 17:27:13.095826 | Time to call embeddings_env.install: 1.33 seconds


Output()

INFO | 2024-03-21 17:27:18.157444 | Sending module ModelPipeline to sasha-rh-a10x


Output()

Output()

INFO | 2024-03-21 17:27:30.108116 | Calling ModelPipeline._remote_init
INFO | 2024-03-21 17:27:31.436523 | Time to call ModelPipeline._remote_init: 1.33 seconds
INFO | 2024-03-21 17:27:31.444053 | Calling file_20240321_192705.exists_in_system
INFO | 2024-03-21 17:27:32.777527 | Time to call file_20240321_192705.exists_in_system: 1.33 seconds
INFO | 2024-03-21 17:27:32.781536 | Calling file_20240321_192705.resolved_state
INFO | 2024-03-21 17:27:34.263703 | Time to call file_20240321_192705.resolved_state: 1.48 seconds
INFO | 2024-03-21 17:27:34.269561 | Calling ModelPipeline.load_model


[36mLoad pretrained SentenceTransformer: hkunlp/instructor-large
[0m[36mUse pytorch device_name: cuda
[0m

INFO | 2024-03-21 17:27:39.609262 | Time to call ModelPipeline.load_model: 5.34 seconds
INFO | 2024-03-21 17:27:40.119213 | Calling file_20240321_192513.exists_in_system
INFO | 2024-03-21 17:27:41.448917 | Time to call file_20240321_192513.exists_in_system: 1.33 seconds
INFO | 2024-03-21 17:27:41.452214 | Calling file_20240321_192513.resolved_state
INFO | 2024-03-21 17:27:42.782491 | Time to call file_20240321_192513.resolved_state: 1.33 seconds
INFO | 2024-03-21 17:27:42.790787 | Calling huggingface._write_to_file


[36mSecrets already exist in .cache/huggingface/token.
[0m

INFO | 2024-03-21 17:27:44.123898 | Time to call huggingface._write_to_file: 1.33 seconds


Output()

INFO | 2024-03-21 17:27:50.304257 | Calling embeddings_env.install
INFO | 2024-03-21 17:27:51.632450 | Time to call embeddings_env.install: 1.33 seconds


Output()

INFO | 2024-03-21 17:27:56.918548 | Sending module TextModelEmbedding to sasha-rh-a10x


Output()

Output()

INFO | 2024-03-21 17:28:08.600670 | Calling TextModelEmbedding._remote_init
INFO | 2024-03-21 17:28:09.927678 | Time to call TextModelEmbedding._remote_init: 1.33 seconds
INFO | 2024-03-21 17:28:09.934053 | Calling TextModelEmbedding.load_embedding_model


[36mLoad pretrained SentenceTransformer: hkunlp/instructor-large
[0m[36mUse pytorch device_name: cuda
[0m

INFO | 2024-03-21 17:28:12.277268 | Time to call TextModelEmbedding.load_embedding_model: 2.34 seconds


In [9]:
query_result = embeddings.embed_query(text)

INFO | 2024-03-21 17:28:16.568186 | Calling TextModelEmbedding.embed_documents


[36m
Batches:   0%|          | 0/1 [00:00<?, ?it/s]
Batches: 100%|██████████| 1/1 [00:00<00:00, 54.18it/s]
[0m

INFO | 2024-03-21 17:28:17.899417 | Time to call TextModelEmbedding.embed_documents: 1.33 seconds
