/
index.html
163 lines (146 loc) · 5.82 KB
/
index.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>wllama.cpp demo</title>
<style>
body {
background-color: rgb(55, 55, 55);
color: rgb(222, 222, 222);
font-family: 'Courier New', Courier, monospace;
padding: 1em;
}
#output_cmpl {
border: 1px solid #aaa;
border-radius: 5px;
padding: 0.7em;
}
</style>
</head>
<body>
<h2>Completions</h2>
Model: <span id="cmpl_model"></span><br/>
<button id="btn_start_cmpl">Load model</button><br/>
Prompt: <input id="input_prompt" value="Once upon a time," /><br/>
Number of tokens: <input id="input_n_predict" value="50" type="number" /><br/>
<button id="btn_run_cmpl">Run completions</button>
<br/>
<br/>
Completion: <br/>
<div id="output_cmpl"></div>
<br/>
--------------------<br/>
<br/>
<h2>Embeddings</h2>
Model: <span id="embd_model"></span><br/>
<button id="btn_start_embd">Load model</button><br/>
Text A: <input id="input_a" value="What is the weather like?" /><br/>
Text B: <input id="input_b" value="Will it rain tomorrow?" /><br/>
<button id="btn_run_embd">Calculate embeddings and cosine distance</button><br/>
<br/>
Cosine distance = <span id="output_embd"></span>
<script type="module">
import { Wllama } from '../../esm/index.js';
const CONFIG_PATHS = {
'single-thread/wllama.js' : '../../esm/single-thread/wllama.js',
'single-thread/wllama.wasm' : '../../esm/single-thread/wllama.wasm',
'multi-thread/wllama.js' : '../../esm/multi-thread/wllama.js',
'multi-thread/wllama.wasm' : '../../esm/multi-thread/wllama.wasm',
'multi-thread/wllama.worker.mjs': '../../esm/multi-thread/wllama.worker.mjs',
};
const CMPL_MODEL = 'https://huggingface.co/ggml-org/models/resolve/main/tinyllamas/stories15M-q4_0.gguf';
const CMPL_MODEL_SIZE = '19MB';
const EMBD_MODEL = 'https://huggingface.co/ggml-org/models/resolve/main/bert-bge-small/ggml-model-f16.gguf';
const EMBD_MODEL_SIZE = '67MB';
async function main() {
setCmplDisable(true);
setEmbdDisable(true);
const getName = (url) => url.match(/\/resolve\/main(.*)/)[1];
elemCmplModel.textContent = `${getName(CMPL_MODEL)}, size: ${CMPL_MODEL_SIZE}`;
elemEmbdModel.textContent = `${getName(EMBD_MODEL)}, size: ${EMBD_MODEL_SIZE}`;
elemBtnStartCmpl.onclick = async () => {
elemBtnStartCmpl.disabled = true;
await startCompletions(CMPL_MODEL);
elemBtnStartCmpl.style.display = 'none';
};
elemBtnStartEmbd.onclick = async () => {
elemBtnStartEmbd.disabled = true;
await startEmbeddings(EMBD_MODEL);
elemBtnStartEmbd.style.display = 'none';
};
}
/////////////////////////////////////////////////////////////////////
// completions
async function startCompletions(modelUrl) {
const wllama = new Wllama(CONFIG_PATHS);
await wllama.loadModelFromUrl(modelUrl);
setCmplDisable(false);
elemBtnCompletions.onclick = async () => {
setCmplDisable(true);
await wllama.createCompletion(elemInput.value, {
nPredict: parseInt(elemNPredict.value),
sampling: {
temp: 0.5,
top_k: 40,
top_p: 0.9,
},
onNewToken: (token, piece, currentText) => {
elemOutputCmpl.textContent = currentText;
},
});
setCmplDisable(false);
};
}
/////////////////////////////////////////////////////////////////////
// embeddings
async function startEmbeddings(modelUrl) {
const wllama1 = new Wllama(CONFIG_PATHS);
await wllama1.loadModelFromUrl(modelUrl, {
// IMPORTANT: do not forget to set embeddings to true. If not set, "createEmbedding" will crash the app
embeddings: true,
pooling_type: 'LLAMA_POOLING_TYPE_MEAN', // depend on the model, you will need to change this
});
setEmbdDisable(false);
elemBtnEmbeddings.onclick = async () => {
setEmbdDisable(true);
const embdA = await wllama1.createEmbedding(elemInputA.value);
console.log({embdA});
const embdB = await wllama1.createEmbedding(elemInputB.value);
console.log({embdB});
// since embeddings are normalized, we don't need to calculate norm
const dotProd = embdA.reduce((acc, _, i) => acc + embdA[i]*embdB[i], 0);
elemOutputEmbd.textContent = dotProd;
setEmbdDisable(false);
};
}
/////////////////////////////////////////////////////////////////////
// DOM elements: completions
const elemCmplModel = document.getElementById('cmpl_model');
const elemBtnStartCmpl = document.getElementById('btn_start_cmpl');
const elemInput = document.getElementById('input_prompt');
const elemNPredict = document.getElementById('input_n_predict');
const elemBtnCompletions = document.getElementById('btn_run_cmpl');
const elemOutputCmpl = document.getElementById('output_cmpl');
// DOM elements: embeddings
const elemEmbdModel = document.getElementById('embd_model');
const elemBtnStartEmbd = document.getElementById('btn_start_embd');
const elemInputA = document.getElementById('input_a');
const elemInputB = document.getElementById('input_b');
const elemBtnEmbeddings = document.getElementById('btn_run_embd');
const elemOutputEmbd = document.getElementById('output_embd');
// utils
const setCmplDisable = (disabled) => {
elemInput.disabled = disabled;
elemNPredict.disabled = disabled;
elemBtnCompletions.disabled = disabled;
};
const setEmbdDisable = (disabled) => {
elemInputA.disabled = disabled;
elemInputB.disabled = disabled;
elemBtnEmbeddings.disabled = disabled;
};
main();
</script>
</body>
</html>