This repository has been archived by the owner on Dec 27, 2022. It is now read-only.
/
load.js
93 lines (82 loc) · 2.8 KB
/
load.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
const path = require("path");
const fs = require("fs-extra");
const fetch = require("node-fetch");
const xmldoc = require("xmldoc");
const lzma = require("lzma-native");
const load = (SOLA_HASH_PATH, relativePath, SOLA_SOLR_URL, SOLA_SOLR_CORE) =>
new Promise(async (resolve, reject) => {
const zipFilePath = `${path.join(SOLA_HASH_PATH, relativePath)}.xml.xz`;
console.log(`Loading ${zipFilePath} into solr`);
console.log("Unzipping files");
const zipFile = fs.readFileSync(zipFilePath);
const data = await lzma.decompress(zipFile);
console.log("Parsing xml");
const hashList = new xmldoc.XmlDocument(data).children
.filter(child => child.name === "doc")
.map(doc => {
const fields = doc.children.filter(child => child.name === "field");
return {
time: parseFloat(
fields.filter(field => field.attr.name === "id")[0].val
),
cl_hi: fields.filter(field => field.attr.name === "cl_hi")[0].val,
cl_ha: fields.filter(field => field.attr.name === "cl_ha")[0].val
};
})
.sort((a, b) => a.time - b.time);
const dedupedHashList = [];
hashList.forEach(currentFrame => {
if (
!dedupedHashList
.slice(-24) // get last 24 frames
.filter(frame => currentFrame.time - frame.time < 2) // select only frames within 2 sec
.some(frame => frame.cl_hi === currentFrame.cl_hi) // check for exact match frames
) {
dedupedHashList.push(currentFrame);
}
});
const xml = [
"<add>",
dedupedHashList
.map(doc =>
[
"<doc>",
'<field name="id">',
`<![CDATA[${relativePath}/${doc.time.toFixed(2)}]]>`,
"</field>",
'<field name="cl_hi">',
doc.cl_hi,
"</field>",
'<field name="cl_ha">',
doc.cl_ha,
"</field>",
"</doc>"
].join("")
)
.join("\n"),
"</add>"
].join("\n");
// fs.writeFileSync("debug.xml", xml);
try {
const coreInfo = await fetch(
`${SOLA_SOLR_URL}admin/cores?wt=json`
).then(res => res.json());
const selectedCoreName = Object.values(coreInfo.status)
.filter(e => e.name.indexOf(`${SOLA_SOLR_CORE}_`) === 0)
.sort((a, b) => a.index.numDocs - b.index.numDocs)[0].name; // choose least populated core
console.log(`Uploading xml to solr core ${selectedCoreName}`);
await fetch(
`${SOLA_SOLR_URL}${selectedCoreName}/update?wt=json&commit=true`,
{
method: "POST",
headers: { "Content-Type": "text/xml" },
body: xml
}
);
console.log("Completed");
resolve();
} catch (e) {
reject(new Error(e));
}
});
module.exports = { load };