Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Eliminate the separate metadata file #845

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
95 changes: 4 additions & 91 deletions main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ void checkdisk(std::vector<struct reader> *r) {
for (size_t i = 0; i < r->size(); i++) {
// Meta, pool, and tree are used once.
// Geometry and index will be duplicated during sorting and tiling.
used += (*r)[i].metapos + 2 * (*r)[i].geompos + 2 * (*r)[i].indexpos + (*r)[i].poolfile->len + (*r)[i].treefile->len;
used += 2 * (*r)[i].geompos + 2 * (*r)[i].indexpos + (*r)[i].poolfile->len + (*r)[i].treefile->len;
}

static int warned = 0;
Expand Down Expand Up @@ -1143,23 +1143,16 @@ int read_input(std::vector<source> &sources, char *fname, int maxzoom, int minzo
for (size_t i = 0; i < CPUS; i++) {
struct reader *r = &readers[i];

char metaname[strlen(tmpdir) + strlen("/meta.XXXXXXXX") + 1];
char poolname[strlen(tmpdir) + strlen("/pool.XXXXXXXX") + 1];
char treename[strlen(tmpdir) + strlen("/tree.XXXXXXXX") + 1];
char geomname[strlen(tmpdir) + strlen("/geom.XXXXXXXX") + 1];
char indexname[strlen(tmpdir) + strlen("/index.XXXXXXXX") + 1];

sprintf(metaname, "%s%s", tmpdir, "/meta.XXXXXXXX");
sprintf(poolname, "%s%s", tmpdir, "/pool.XXXXXXXX");
sprintf(treename, "%s%s", tmpdir, "/tree.XXXXXXXX");
sprintf(geomname, "%s%s", tmpdir, "/geom.XXXXXXXX");
sprintf(indexname, "%s%s", tmpdir, "/index.XXXXXXXX");

r->metafd = mkstemp_cloexec(metaname);
if (r->metafd < 0) {
perror(metaname);
exit(EXIT_FAILURE);
}
r->poolfd = mkstemp_cloexec(poolname);
if (r->poolfd < 0) {
perror(poolname);
Expand All @@ -1181,11 +1174,6 @@ int read_input(std::vector<source> &sources, char *fname, int maxzoom, int minzo
exit(EXIT_FAILURE);
}

r->metafile = fopen_oflag(metaname, "wb", O_WRONLY | O_CLOEXEC);
if (r->metafile == NULL) {
perror(metaname);
exit(EXIT_FAILURE);
}
r->poolfile = memfile_open(r->poolfd);
if (r->poolfile == NULL) {
perror(poolname);
Expand All @@ -1206,11 +1194,9 @@ int read_input(std::vector<source> &sources, char *fname, int maxzoom, int minzo
perror(indexname);
exit(EXIT_FAILURE);
}
r->metapos = 0;
r->geompos = 0;
r->indexpos = 0;

unlink(metaname);
unlink(poolname);
unlink(treename);
unlink(geomname);
Expand All @@ -1221,8 +1207,6 @@ int read_input(std::vector<source> &sources, char *fname, int maxzoom, int minzo
struct stringpool p;
memfile_write(r->treefile, &p, sizeof(struct stringpool));
}
// Keep metadata file from being completely empty if no attributes
serialize_int(r->metafile, 0, &r->metapos, "meta");

r->file_bbox[0] = r->file_bbox[1] = UINT_MAX;
r->file_bbox[2] = r->file_bbox[3] = 0;
Expand Down Expand Up @@ -1699,10 +1683,6 @@ int read_input(std::vector<source> &sources, char *fname, int maxzoom, int minzo
}

for (size_t i = 0; i < CPUS; i++) {
if (fclose(readers[i].metafile) != 0) {
perror("fclose meta");
exit(EXIT_FAILURE);
}
if (fclose(readers[i].geomfile) != 0) {
perror("fclose geom");
exit(EXIT_FAILURE);
Expand All @@ -1717,10 +1697,6 @@ int read_input(std::vector<source> &sources, char *fname, int maxzoom, int minzo
perror("stat geom\n");
exit(EXIT_FAILURE);
}
if (fstat(readers[i].metafd, &readers[i].metast) != 0) {
perror("stat meta\n");
exit(EXIT_FAILURE);
}
}

// Create a combined string pool and a combined metadata file
Expand All @@ -1729,9 +1705,8 @@ int read_input(std::vector<source> &sources, char *fname, int maxzoom, int minzo

// 2 * CPUS: One per input thread, one per tiling thread
long long pool_off[2 * CPUS];
long long meta_off[2 * CPUS];
for (size_t i = 0; i < 2 * CPUS; i++) {
pool_off[i] = meta_off[i] = 0;
pool_off[i] = 0;
}

char poolname[strlen(tmpdir) + strlen("/pool.XXXXXXXX") + 1];
Expand All @@ -1751,51 +1726,9 @@ int read_input(std::vector<source> &sources, char *fname, int maxzoom, int minzo

unlink(poolname);

char metaname[strlen(tmpdir) + strlen("/meta.XXXXXXXX") + 1];
sprintf(metaname, "%s%s", tmpdir, "/meta.XXXXXXXX");

int metafd = mkstemp_cloexec(metaname);
if (metafd < 0) {
perror(metaname);
exit(EXIT_FAILURE);
}

FILE *metafile = fopen_oflag(metaname, "wb", O_WRONLY | O_CLOEXEC);
if (metafile == NULL) {
perror(metaname);
exit(EXIT_FAILURE);
}

unlink(metaname);

std::atomic<long long> metapos(0);
std::atomic<long long> poolpos(0);

for (size_t i = 0; i < CPUS; i++) {
if (readers[i].metapos > 0) {
void *map = mmap(NULL, readers[i].metapos, PROT_READ, MAP_PRIVATE, readers[i].metafd, 0);
if (map == MAP_FAILED) {
perror("mmap unmerged meta");
exit(EXIT_FAILURE);
}
madvise(map, readers[i].metapos, MADV_SEQUENTIAL);
madvise(map, readers[i].metapos, MADV_WILLNEED);
if (fwrite(map, readers[i].metapos, 1, metafile) != 1) {
perror("Reunify meta");
exit(EXIT_FAILURE);
}
madvise(map, readers[i].metapos, MADV_DONTNEED);
if (munmap(map, readers[i].metapos) != 0) {
perror("unmap unmerged meta");
}
}

meta_off[i] = metapos;
metapos += readers[i].metapos;
if (close(readers[i].metafd) != 0) {
perror("close unmerged meta");
}

if (readers[i].poolfile->off > 0) {
if (fwrite(readers[i].poolfile->map, readers[i].poolfile->off, 1, poolfile) != 1) {
perror("Reunify string pool");
Expand All @@ -1812,17 +1745,6 @@ int read_input(std::vector<source> &sources, char *fname, int maxzoom, int minzo
perror("fclose pool");
exit(EXIT_FAILURE);
}
if (fclose(metafile) != 0) {
perror("fclose meta");
exit(EXIT_FAILURE);
}

char *meta = (char *) mmap(NULL, metapos, PROT_READ, MAP_PRIVATE, metafd, 0);
if (meta == MAP_FAILED) {
perror("mmap meta");
exit(EXIT_FAILURE);
}
madvise(meta, metapos, MADV_RANDOM);

char *stringpool = NULL;
if (poolpos > 0) { // Will be 0 if -X was specified
Expand Down Expand Up @@ -1907,9 +1829,8 @@ int read_input(std::vector<source> &sources, char *fname, int maxzoom, int minzo
if (!quiet) {
long long s = progress_seq;
long long geompos_print = geompos;
long long metapos_print = metapos;
long long poolpos_print = poolpos;
fprintf(stderr, "%lld features, %lld bytes of geometry, %lld bytes of separate metadata, %lld bytes of string pool\n", s, geompos_print, metapos_print, poolpos_print);
fprintf(stderr, "%lld features, %lld bytes of geometry, %lld bytes of string pool\n", s, geompos_print, poolpos_print);
}

if (indexpos == 0) {
Expand Down Expand Up @@ -2267,7 +2188,7 @@ int read_input(std::vector<source> &sources, char *fname, int maxzoom, int minzo

std::atomic<unsigned> midx(0);
std::atomic<unsigned> midy(0);
int written = traverse_zooms(fd, size, meta, stringpool, &midx, &midy, maxzoom, minzoom, outdb, outdir, buffer, fname, tmpdir, gamma, full_detail, low_detail, min_detail, meta_off, pool_off, initial_x, initial_y, simplification, layermaps, prefilter, postfilter, attribute_accum, filter);
int written = traverse_zooms(fd, size, stringpool, &midx, &midy, maxzoom, minzoom, outdb, outdir, buffer, fname, tmpdir, gamma, full_detail, low_detail, min_detail, pool_off, initial_x, initial_y, simplification, layermaps, prefilter, postfilter, attribute_accum, filter);

if (maxzoom != written) {
if (written > minzoom) {
Expand All @@ -2280,14 +2201,6 @@ int read_input(std::vector<source> &sources, char *fname, int maxzoom, int minzo
}
}

madvise(meta, metapos, MADV_DONTNEED);
if (munmap(meta, metapos) != 0) {
perror("munmap meta");
}
if (close(metafd) < 0) {
perror("close meta");
}

if (poolpos > 0) {
madvise((void *) stringpool, poolpos, MADV_DONTNEED);
if (munmap(stringpool, poolpos) != 0) {
Expand Down
1 change: 0 additions & 1 deletion plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -397,7 +397,6 @@ serial_feature parse_feature(json_pull *jp, int z, unsigned x, unsigned y, std::
sf.bbox[0] = sf.bbox[1] = LLONG_MAX;
sf.bbox[2] = sf.bbox[3] = LLONG_MIN;
sf.extent = 0;
sf.metapos = 0;
sf.has_id = false;

std::string layername = "unknown";
Expand Down
89 changes: 16 additions & 73 deletions serial.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -225,23 +225,19 @@ void serialize_feature(FILE *geomfile, serial_feature *sf, std::atomic<long long
serialize_long_long(geomfile, sf->extent, geompos, fname);
}

serialize_long_long(geomfile, sf->metapos, geompos, fname);
serialize_long_long(geomfile, sf->keys.size(), geompos, fname);

if (sf->metapos < 0) {
serialize_long_long(geomfile, sf->keys.size(), geompos, fname);

for (size_t i = 0; i < sf->keys.size(); i++) {
serialize_long_long(geomfile, sf->keys[i], geompos, fname);
serialize_long_long(geomfile, sf->values[i], geompos, fname);
}
for (size_t i = 0; i < sf->keys.size(); i++) {
serialize_long_long(geomfile, sf->keys[i], geompos, fname);
serialize_long_long(geomfile, sf->values[i], geompos, fname);
}

if (include_minzoom) {
serialize_byte(geomfile, sf->feature_minzoom, geompos, fname);
}
}

serial_feature deserialize_feature(FILE *geoms, std::atomic<long long> *geompos_in, char *metabase, long long *meta_off, unsigned z, unsigned tx, unsigned ty, unsigned *initial_x, unsigned *initial_y) {
serial_feature deserialize_feature(FILE *geoms, std::atomic<long long> *geompos_in, unsigned z, unsigned tx, unsigned ty, unsigned *initial_x, unsigned *initial_y) {
serial_feature sf;

deserialize_byte_io(geoms, &sf.t, geompos_in);
Expand Down Expand Up @@ -286,32 +282,15 @@ serial_feature deserialize_feature(FILE *geoms, std::atomic<long long> *geompos_

sf.layer >>= 6;

sf.metapos = 0;
deserialize_long_long_io(geoms, &sf.metapos, geompos_in);

if (sf.metapos >= 0) {
char *meta = metabase + sf.metapos + meta_off[sf.segment];
long long count;
deserialize_long_long(&meta, &count);
long long count;
deserialize_long_long_io(geoms, &count, geompos_in);

for (long long i = 0; i < count; i++) {
long long k, v;
deserialize_long_long(&meta, &k);
deserialize_long_long(&meta, &v);
sf.keys.push_back(k);
sf.values.push_back(v);
}
} else {
long long count;
deserialize_long_long_io(geoms, &count, geompos_in);

for (long long i = 0; i < count; i++) {
long long k, v;
deserialize_long_long_io(geoms, &k, geompos_in);
deserialize_long_long_io(geoms, &v, geompos_in);
sf.keys.push_back(k);
sf.values.push_back(v);
}
for (long long i = 0; i < count; i++) {
long long k, v;
deserialize_long_long_io(geoms, &k, geompos_in);
deserialize_long_long_io(geoms, &v, geompos_in);
sf.keys.push_back(k);
sf.values.push_back(v);
}

deserialize_byte_io(geoms, &sf.feature_minzoom, geompos_in);
Expand Down Expand Up @@ -488,32 +467,6 @@ int serialize_feature(struct serialization_state *sst, serial_feature &sf) {
locs.clear();
}

bool inline_meta = true;
// Don't inline metadata for features that will span several tiles at maxzoom
if (sf.geometry.size() > 0 && (sf.bbox[2] < sf.bbox[0] || sf.bbox[3] < sf.bbox[1])) {
fprintf(stderr, "Internal error: impossible feature bounding box %llx,%llx,%llx,%llx\n", sf.bbox[0], sf.bbox[1], sf.bbox[2], sf.bbox[3]);
}
if (sf.bbox[0] == LLONG_MAX) {
// No bounding box (empty geometry)
// Shouldn't happen, but avoid arithmetic overflow below
} else if (sf.bbox[2] - sf.bbox[0] > (2LL << (32 - sst->maxzoom)) || sf.bbox[3] - sf.bbox[1] > (2LL << (32 - sst->maxzoom))) {
inline_meta = false;

if (prevent[P_CLIPPING]) {
static std::atomic<long long> warned(0);
long long extent = ((sf.bbox[2] - sf.bbox[0]) / ((1LL << (32 - sst->maxzoom)) + 1)) * ((sf.bbox[3] - sf.bbox[1]) / ((1LL << (32 - sst->maxzoom)) + 1));
if (extent > warned) {
fprintf(stderr, "Warning: %s:%d: Large unclipped (-pc) feature may be duplicated across %lld tiles\n", sst->fname, sst->line, extent);
warned = extent;

if (extent > 10000) {
fprintf(stderr, "Exiting because this can't be right.\n");
exit(EXIT_FAILURE);
}
}
}
}

double extent = 0;
if (additional[A_DROP_SMALLEST_AS_NEEDED] || additional[A_COALESCE_SMALLEST_AS_NEEDED]) {
if (sf.t == VT_POLYGON) {
Expand Down Expand Up @@ -651,19 +604,9 @@ int serialize_feature(struct serialization_state *sst, serial_feature &sf) {
}
}

if (inline_meta) {
sf.metapos = -1;
for (size_t i = 0; i < sf.full_keys.size(); i++) {
sf.keys.push_back(addpool(r->poolfile, r->treefile, sf.full_keys[i].c_str(), mvt_string));
sf.values.push_back(addpool(r->poolfile, r->treefile, sf.full_values[i].s.c_str(), sf.full_values[i].type));
}
} else {
sf.metapos = r->metapos;
serialize_long_long(r->metafile, sf.full_keys.size(), &r->metapos, sst->fname);
for (size_t i = 0; i < sf.full_keys.size(); i++) {
serialize_long_long(r->metafile, addpool(r->poolfile, r->treefile, sf.full_keys[i].c_str(), mvt_string), &r->metapos, sst->fname);
serialize_long_long(r->metafile, addpool(r->poolfile, r->treefile, sf.full_values[i].s.c_str(), sf.full_values[i].type), &r->metapos, sst->fname);
}
for (size_t i = 0; i < sf.full_keys.size(); i++) {
sf.keys.push_back(addpool(r->poolfile, r->treefile, sf.full_keys[i].c_str(), mvt_string));
sf.values.push_back(addpool(r->poolfile, r->treefile, sf.full_values[i].s.c_str(), sf.full_values[i].type));
}

long long geomstart = r->geompos;
Expand Down
16 changes: 3 additions & 13 deletions serial.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,6 @@ struct serial_feature {

std::vector<long long> keys{};
std::vector<long long> values{};
// If >= 0, metadata is external
long long metapos = 0;

// XXX This isn't serialized. Should it be here?
long long bbox[4] = {0, 0, 0, 0};
Expand All @@ -72,22 +70,19 @@ struct serial_feature {
};

void serialize_feature(FILE *geomfile, serial_feature *sf, std::atomic<long long> *geompos, const char *fname, long long wx, long long wy, bool include_minzoom);
serial_feature deserialize_feature(FILE *geoms, std::atomic<long long> *geompos_in, char *metabase, long long *meta_off, unsigned z, unsigned tx, unsigned ty, unsigned *initial_x, unsigned *initial_y);
serial_feature deserialize_feature(FILE *geoms, std::atomic<long long> *geompos_in, unsigned z, unsigned tx, unsigned ty, unsigned *initial_x, unsigned *initial_y);

struct reader {
int metafd = -1;
int poolfd = -1;
int treefd = -1;
int geomfd = -1;
int indexfd = -1;

FILE *metafile = NULL;
struct memfile *poolfile = NULL;
struct memfile *treefile = NULL;
FILE *geomfile = NULL;
FILE *indexfile = NULL;

std::atomic<long long> metapos;
std::atomic<long long> geompos;
std::atomic<long long> indexpos;

Expand All @@ -99,26 +94,21 @@ struct reader {
char *geom_map = NULL;

reader()
: metapos(0), geompos(0), indexpos(0) {
: geompos(0), indexpos(0) {
}

reader(reader const &r) {
metafd = r.metafd;
poolfd = r.poolfd;
treefd = r.treefd;
geomfd = r.geomfd;
indexfd = r.indexfd;

metafile = r.metafile;
poolfile = r.poolfile;
treefile = r.treefile;
geomfile = r.geomfile;
indexfile = r.indexfile;

long long p = r.metapos;
metapos = p;

p = r.geompos;
long long p = r.geompos;
geompos = p;

p = r.indexpos;
Expand Down