/
bench_embeddings.py
138 lines (114 loc) · 4.41 KB
/
bench_embeddings.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import time
from functools import partial
from typing import Callable, List, Optional, Tuple
import pandas as pd
from llama_index.core import SimpleDirectoryReader
from llama_index.core.base.embeddings.base import (
DEFAULT_EMBED_BATCH_SIZE,
BaseEmbedding,
)
from llama_index.embeddings import OpenAIEmbedding, resolve_embed_model
def generate_strings(num_strings: int = 100, string_length: int = 10) -> List[str]:
"""
Generate random strings sliced from the paul graham essay of the following form:
offset 0: [0:string_length], [string_length:2*string_length], ...
offset 1: [1:1+string_length], [1+string_length:1+2*string_length],...
...
""" # noqa: D415
content = (
SimpleDirectoryReader("../../examples/paul_graham_essay/data")
.load_data()[0]
.get_content()
)
content_length = len(content)
strings_per_loop = content_length / string_length
num_loops_upper_bound = int(num_strings / strings_per_loop) + 1
strings = []
for offset in range(num_loops_upper_bound + 1):
ptr = offset % string_length
while ptr + string_length < content_length:
strings.append(content[ptr : ptr + string_length])
ptr += string_length
if len(strings) == num_strings:
break
return strings
def create_open_ai_embedding(batch_size: int) -> Tuple[BaseEmbedding, str, int]:
return (
OpenAIEmbedding(embed_batch_size=batch_size),
"OpenAIEmbedding",
4096,
)
def create_local_embedding(
model_name: str, batch_size: int
) -> Tuple[BaseEmbedding, str, int]:
model = resolve_embed_model(f"local:{model_name}")
return (
model,
"hf/" + model_name,
model._langchain_embedding.client.max_seq_length, # type: ignore
)
def bench_simple_vector_store(
embed_models: List[Callable[[int], Tuple[BaseEmbedding, str, int]]],
num_strings: List[int] = [100],
string_lengths: List[int] = [64, 256],
embed_batch_sizes: List[int] = [1, DEFAULT_EMBED_BATCH_SIZE],
torch_num_threads: Optional[int] = None,
) -> None:
"""Benchmark embeddings."""
print("Benchmarking Embeddings\n---------------------------")
results = []
if torch_num_threads is not None:
import torch # pants: no-infer-dep
torch.set_num_threads(torch_num_threads)
max_num_strings = max(num_strings)
for string_length in string_lengths:
generated_strings = generate_strings(
num_strings=max_num_strings, string_length=string_length
)
for string_count in num_strings:
strings = generated_strings[:string_count]
for batch_size in embed_batch_sizes:
models = []
for create_model in embed_models:
models.append(create_model(batch_size=batch_size)) # type: ignore
for model in models:
time1 = time.time()
_ = model[0].get_text_embedding_batch(strings, show_progress=True)
time2 = time.time()
print(
f"Embedding with model {model[1]} with "
f"batch size {batch_size} and max_seq_length {model[2]} for "
f"{string_count} strings of length {string_length} took "
f"{time2 - time1} seconds."
)
results.append((model[1], batch_size, string_length, time2 - time1))
# TODO: async version
# print final results
print("\n\nFinal Results\n---------------------------")
results_df = pd.DataFrame(
results, columns=["model", "batch_size", "string_length", "time"]
)
print(results_df)
if __name__ == "__main__":
bench_simple_vector_store(
embed_models=[
# create_open_ai_embedding,
partial(
create_local_embedding,
model_name="sentence-transformers/all-MiniLM-L6-v2",
),
partial(
create_local_embedding,
model_name="sentence-transformers/all-MiniLM-L12-v2",
),
partial(
create_local_embedding,
model_name="BAAI/bge-small-en",
),
partial(
create_local_embedding,
model_name="sentence-transformers/all-mpnet-base-v2",
),
],
torch_num_threads=None,
)