Skip to content

Commit

Permalink
Ensure that content IDs are unique in a Nessie repository
Browse files Browse the repository at this point in the history
Nessie content IDs are random IDs, but we do not guarantee that those are actually really unique.

This change adds a new object type to ensure that a generated ID is unique by leveraging existing functionality of the `Persist` framework that already provides "`INSERT IF NOT EXIST`" guarantees.

New content IDs from this change on are now verified. This change does not include functionality to automatically add already existing content-IDs. IMHO it is probably okay for now given the practically non-existing probability of content-ID conflicts.
  • Loading branch information
snazy committed Dec 1, 2023
1 parent 08de363 commit 2c94b17
Show file tree
Hide file tree
Showing 16 changed files with 257 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ final class CassandraConstants {
static final String COLS_TAG = "t_message, t_headers, t_signature";
static final String COLS_STRING =
"s_content_type, s_compression, s_filename, s_predecessors, s_text";
static final String COLS_UNIQUE = "u_space, u_value";

static final String INSERT_OBJ_PREFIX =
"INSERT INTO %s."
Expand All @@ -52,6 +53,7 @@ final class CassandraConstants {
+ COL_OBJ_TYPE
+ ", ";
static final String STORE_OBJ_SUFFIX = " IF NOT EXISTS";
static final String INSERT_OBJ_UNIQUE = INSERT_OBJ_PREFIX + COLS_UNIQUE + ") VALUES (?,?,?, ?,?)";
static final String INSERT_OBJ_STRING =
INSERT_OBJ_PREFIX + COLS_STRING + ") VALUES (?,?,?, ?,?,?,?,?)";
static final String INSERT_OBJ_TAG = INSERT_OBJ_PREFIX + COLS_TAG + ") VALUES (?,?,?, ?,?,?)";
Expand Down Expand Up @@ -80,6 +82,7 @@ final class CassandraConstants {
+ ",\n i_index {4}"
+ ",\n t_message {6}, t_headers {4}, t_signature {4}"
+ ",\n s_content_type {0}, s_compression {0}, s_filename {0}, s_predecessors {2}, s_text {4}"
+ ",\n u_space {0}, u_value {0}"
+ ",\n PRIMARY KEY (("
+ COL_REPO_ID
+ ", "
Expand Down Expand Up @@ -223,7 +226,9 @@ final class CassandraConstants {
+ ", "
+ COLS_TAG
+ ", "
+ COLS_STRING;
+ COLS_STRING
+ ", "
+ COLS_UNIQUE;
static final int COL_COMMIT_CREATED = 2; // obj_id + obj_type before this column
static final int COL_COMMIT_SEQ = COL_COMMIT_CREATED + 1;
static final int COL_COMMIT_MESSAGE = COL_COMMIT_SEQ + 1;
Expand Down Expand Up @@ -252,6 +257,8 @@ final class CassandraConstants {
static final int COL_STRING_FILENAME = COL_STRING_COMPRESSION + 1;
static final int COL_STRING_PREDECESSORS = COL_STRING_FILENAME + 1;
static final int COL_STRING_TEXT = COL_STRING_PREDECESSORS + 1;
static final int COL_UNIQUE_SPACE = COL_STRING_TEXT + 1;
static final int COL_UNIQUE_VALUE = COL_UNIQUE_SPACE + 1;

static final String FETCH_OBJ_TYPE =
"SELECT "
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@
import static org.projectnessie.versioned.storage.cassandra.CassandraConstants.COL_TAG_HEADERS;
import static org.projectnessie.versioned.storage.cassandra.CassandraConstants.COL_TAG_MESSAGE;
import static org.projectnessie.versioned.storage.cassandra.CassandraConstants.COL_TAG_SIGNATURE;
import static org.projectnessie.versioned.storage.cassandra.CassandraConstants.COL_UNIQUE_SPACE;
import static org.projectnessie.versioned.storage.cassandra.CassandraConstants.COL_UNIQUE_VALUE;
import static org.projectnessie.versioned.storage.cassandra.CassandraConstants.COL_VALUE_CONTENT_ID;
import static org.projectnessie.versioned.storage.cassandra.CassandraConstants.COL_VALUE_DATA;
import static org.projectnessie.versioned.storage.cassandra.CassandraConstants.COL_VALUE_PAYLOAD;
Expand All @@ -59,6 +61,7 @@
import static org.projectnessie.versioned.storage.cassandra.CassandraConstants.INSERT_OBJ_SEGMENTS;
import static org.projectnessie.versioned.storage.cassandra.CassandraConstants.INSERT_OBJ_STRING;
import static org.projectnessie.versioned.storage.cassandra.CassandraConstants.INSERT_OBJ_TAG;
import static org.projectnessie.versioned.storage.cassandra.CassandraConstants.INSERT_OBJ_UNIQUE;
import static org.projectnessie.versioned.storage.cassandra.CassandraConstants.INSERT_OBJ_VALUE;
import static org.projectnessie.versioned.storage.cassandra.CassandraConstants.MARK_REFERENCE_AS_DELETED;
import static org.projectnessie.versioned.storage.cassandra.CassandraConstants.MAX_CONCURRENT_STORES;
Expand All @@ -74,6 +77,7 @@
import static org.projectnessie.versioned.storage.common.objtypes.RefObj.ref;
import static org.projectnessie.versioned.storage.common.objtypes.StringObj.stringData;
import static org.projectnessie.versioned.storage.common.objtypes.TagObj.tag;
import static org.projectnessie.versioned.storage.common.objtypes.UniqueIdObj.uniqueId;
import static org.projectnessie.versioned.storage.common.persist.ObjId.objIdFromByteBuffer;
import static org.projectnessie.versioned.storage.common.persist.ObjId.objIdFromString;
import static org.projectnessie.versioned.storage.serialize.ProtoSerialization.deserializePreviousPointers;
Expand Down Expand Up @@ -117,6 +121,7 @@
import org.projectnessie.versioned.storage.common.objtypes.RefObj;
import org.projectnessie.versioned.storage.common.objtypes.StringObj;
import org.projectnessie.versioned.storage.common.objtypes.TagObj;
import org.projectnessie.versioned.storage.common.objtypes.UniqueIdObj;
import org.projectnessie.versioned.storage.common.persist.CloseableIterator;
import org.projectnessie.versioned.storage.common.persist.Obj;
import org.projectnessie.versioned.storage.common.persist.ObjId;
Expand Down Expand Up @@ -835,6 +840,24 @@ StringObj deserialize(Row row, ObjId id) {
deserializeBytes(row, COL_STRING_TEXT));
}
});
STORE_OBJ_TYPE.put(
ObjType.UNIQUE,
new StoreObjDesc<UniqueIdObj>(INSERT_OBJ_UNIQUE) {
@Override
void store(
Consumer<Object> values,
UniqueIdObj obj,
int incrementalIndexLimit,
int maxSerializedIndexSize) {
values.accept(obj.space());
values.accept(obj.value());
}

@Override
UniqueIdObj deserialize(Row row, ObjId id) {
return uniqueId(id, row.getString(COL_UNIQUE_SPACE), row.getString(COL_UNIQUE_VALUE));
}
});
}

private static ByteString deserializeBytes(Row row, int idx) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,11 @@ message StringProto {
bytes text = 54;
}

message UniqueIdProto {
string space = 1;
string value = 2;
}

enum CommitTypeProto {
T_UNKNOWN = 0;
NORMAL = 1;
Expand Down Expand Up @@ -106,6 +111,7 @@ message ObjProto {
IndexProto index = 6;
StringProto string_data = 7;
TagProto tag = 8;
UniqueIdProto uniqueId = 9;
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import static org.projectnessie.versioned.storage.common.objtypes.RefObj.ref;
import static org.projectnessie.versioned.storage.common.objtypes.StringObj.stringData;
import static org.projectnessie.versioned.storage.common.objtypes.TagObj.tag;
import static org.projectnessie.versioned.storage.common.objtypes.UniqueIdObj.uniqueId;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
Expand All @@ -48,6 +49,7 @@
import org.projectnessie.versioned.storage.common.objtypes.RefObj;
import org.projectnessie.versioned.storage.common.objtypes.StringObj;
import org.projectnessie.versioned.storage.common.objtypes.TagObj;
import org.projectnessie.versioned.storage.common.objtypes.UniqueIdObj;
import org.projectnessie.versioned.storage.common.persist.ImmutableReference;
import org.projectnessie.versioned.storage.common.persist.Obj;
import org.projectnessie.versioned.storage.common.persist.ObjId;
Expand All @@ -66,6 +68,7 @@
import org.projectnessie.versioned.storage.common.proto.StorageTypes.StringProto;
import org.projectnessie.versioned.storage.common.proto.StorageTypes.Stripe;
import org.projectnessie.versioned.storage.common.proto.StorageTypes.TagProto;
import org.projectnessie.versioned.storage.common.proto.StorageTypes.UniqueIdProto;

public final class ProtoSerialization {
private ProtoSerialization() {}
Expand Down Expand Up @@ -226,6 +229,8 @@ public static byte[] serializeObj(Obj obj, int incrementalIndexSizeLimit, int in
return b.setStringData(serializeStringData((StringObj) obj)).build().toByteArray();
case TAG:
return b.setTag(serializeTag((TagObj) obj)).build().toByteArray();
case UNIQUE:
return b.setUniqueId(serializeUniqueId((UniqueIdObj) obj)).build().toByteArray();
default:
throw new UnsupportedOperationException("Unknown object type " + obj.type());
}
Expand Down Expand Up @@ -277,6 +282,9 @@ public static Obj deserializeObjProto(ObjId id, ObjProto obj) {
if (obj.hasTag()) {
return deserializeTag(id, obj.getTag());
}
if (obj.hasUniqueId()) {
return deserializeUniqueId(id, obj.getUniqueId());
}
throw new UnsupportedOperationException("Cannot deserialize " + obj);
}

Expand Down Expand Up @@ -482,4 +490,12 @@ private static TagProto.Builder serializeTag(TagObj obj) {
}
return tag;
}

private static UniqueIdObj deserializeUniqueId(ObjId id, UniqueIdProto uniqueId) {
return uniqueId(id, uniqueId.getSpace(), uniqueId.getValue());
}

private static UniqueIdProto.Builder serializeUniqueId(UniqueIdObj obj) {
return UniqueIdProto.newBuilder().setSpace(obj.space()).setValue(obj.value());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
import static org.projectnessie.versioned.storage.common.objtypes.RefObj.ref;
import static org.projectnessie.versioned.storage.common.objtypes.StringObj.stringData;
import static org.projectnessie.versioned.storage.common.objtypes.TagObj.tag;
import static org.projectnessie.versioned.storage.common.objtypes.UniqueIdObj.uniqueId;
import static org.projectnessie.versioned.storage.common.persist.ObjId.EMPTY_OBJ_ID;
import static org.projectnessie.versioned.storage.common.persist.ObjId.objIdFromString;
import static org.projectnessie.versioned.storage.common.persist.ObjId.randomObjId;
Expand All @@ -59,6 +60,7 @@
import static org.projectnessie.versioned.storage.common.persist.ObjType.REF;
import static org.projectnessie.versioned.storage.common.persist.ObjType.STRING;
import static org.projectnessie.versioned.storage.common.persist.ObjType.TAG;
import static org.projectnessie.versioned.storage.common.persist.ObjType.UNIQUE;
import static org.projectnessie.versioned.storage.common.persist.ObjType.VALUE;
import static org.projectnessie.versioned.storage.common.persist.Reference.reference;

Expand Down Expand Up @@ -99,6 +101,7 @@
import org.projectnessie.versioned.storage.common.objtypes.RefObj;
import org.projectnessie.versioned.storage.common.objtypes.StringObj;
import org.projectnessie.versioned.storage.common.objtypes.TagObj;
import org.projectnessie.versioned.storage.common.objtypes.UniqueIdObj;
import org.projectnessie.versioned.storage.common.persist.CloseableIterator;
import org.projectnessie.versioned.storage.common.persist.ImmutableReference;
import org.projectnessie.versioned.storage.common.persist.Obj;
Expand Down Expand Up @@ -435,7 +438,8 @@ public static Stream<Obj> allObjectTypeSamples() {
objIdFromString("0000000000000000")),
copyFromUtf8("This is not a markdown")),
ref(randomObjId(), "foo", randomObjId(), 123L, null),
ref(randomObjId(), "bar", randomObjId(), 456L, randomObjId()));
ref(randomObjId(), "bar", randomObjId(), 456L, randomObjId()),
uniqueId(randomObjId(), "space", "value"));
}

@SuppressWarnings("rawtypes")
Expand All @@ -455,6 +459,8 @@ static Class classForType(ObjType type) {
return TagObj.class;
case STRING:
return StringObj.class;
case UNIQUE:
return UniqueIdObj.class;
default:
throw new IllegalArgumentException(type.name());
}
Expand All @@ -476,6 +482,8 @@ static ObjType typeDifferentThan(ObjType type) {
return STRING;
case STRING:
return INDEX_SEGMENTS;
case UNIQUE:
return STRING;
default:
throw new IllegalArgumentException(type.name());
}
Expand Down Expand Up @@ -898,6 +906,9 @@ public static Obj updateObjChange(Obj obj) {
asList(randomObjId(), randomObjId(), randomObjId(), randomObjId()),
ByteString.copyFrom(new byte[123]));
break;
case UNIQUE:
newObj = uniqueId(obj.id(), "other_space", "other_value");
break;
default:
throw new UnsupportedOperationException("Unknown object type " + obj.type());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import static org.projectnessie.versioned.storage.common.persist.ObjType.REF;
import static org.projectnessie.versioned.storage.common.persist.ObjType.STRING;
import static org.projectnessie.versioned.storage.common.persist.ObjType.TAG;
import static org.projectnessie.versioned.storage.common.persist.ObjType.UNIQUE;
import static org.projectnessie.versioned.storage.common.persist.ObjType.VALUE;

import com.google.common.hash.Hasher;
Expand Down Expand Up @@ -122,4 +123,10 @@ static ObjId stringDataHash(
hasher.putBytes(text.asReadOnlyByteBuffer());
return hashAsObjId(hasher);
}

static ObjId uniqueIdHash(String space, String value) {
Hasher hasher =
newHasher().putString(UNIQUE.name(), UTF_8).putString(space, UTF_8).putString(value, UTF_8);
return hashAsObjId(hasher);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
/*
* Copyright (C) 2022 Dremio
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.projectnessie.versioned.storage.common.objtypes;

import static org.projectnessie.versioned.storage.common.objtypes.Hashes.uniqueIdHash;

import javax.annotation.Nullable;
import org.immutables.value.Value;
import org.projectnessie.versioned.storage.common.logic.ReferenceLogic;
import org.projectnessie.versioned.storage.common.persist.Obj;
import org.projectnessie.versioned.storage.common.persist.ObjId;
import org.projectnessie.versioned.storage.common.persist.ObjType;
import org.projectnessie.versioned.storage.common.persist.Persist;

/**
* Describes the <em>internal</em> state of a reference when it has been created, managed by {@link
* ReferenceLogic} implementations, not available/tracked for transactional {@link Persist}
* instances.
*/
@Value.Immutable
public interface UniqueIdObj extends Obj {

@Override
default ObjType type() {
return ObjType.UNIQUE;
}

@Override
@Value.Parameter(order = 1)
@Nullable
@jakarta.annotation.Nullable
ObjId id();

/** The "ID space", for example {@code content-id}. */
@Value.Parameter(order = 2)
String space();

/** The value of the ID within the {@link #space()}. */
@Value.Parameter(order = 3)
String value();

static UniqueIdObj uniqueId(ObjId id, String space, String value) {
return ImmutableUniqueIdObj.of(id, space, value);
}

static UniqueIdObj uniqueId(String space, String value) {
return uniqueId(uniqueIdHash(space, value), space, value);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.projectnessie.versioned.storage.common.objtypes.RefObj;
import org.projectnessie.versioned.storage.common.objtypes.StringObj;
import org.projectnessie.versioned.storage.common.objtypes.TagObj;
import org.projectnessie.versioned.storage.common.objtypes.UniqueIdObj;

public enum ObjType {
/**
Expand Down Expand Up @@ -50,7 +51,10 @@ public enum ObjType {
INDEX_SEGMENTS("I"),

/** {@link Obj} is a {@link IndexObj}. */
INDEX("i");
INDEX("i"),

/** Obj is a {@link UniqueIdObj}. */
UNIQUE("u");

private static final ObjType[] ALL_OBJ_TYPES = ObjType.values();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
import static org.projectnessie.versioned.storage.common.objtypes.RefObj.ref;
import static org.projectnessie.versioned.storage.common.objtypes.StringObj.stringData;
import static org.projectnessie.versioned.storage.common.objtypes.TagObj.tag;
import static org.projectnessie.versioned.storage.common.objtypes.UniqueIdObj.uniqueId;
import static org.projectnessie.versioned.storage.common.persist.ObjId.EMPTY_OBJ_ID;
import static org.projectnessie.versioned.storage.common.persist.ObjId.objIdFromString;
import static org.projectnessie.versioned.storage.common.persist.ObjId.randomObjId;
Expand All @@ -47,6 +48,7 @@
import static org.projectnessie.versioned.storage.common.persist.ObjType.REF;
import static org.projectnessie.versioned.storage.common.persist.ObjType.STRING;
import static org.projectnessie.versioned.storage.common.persist.ObjType.TAG;
import static org.projectnessie.versioned.storage.common.persist.ObjType.UNIQUE;
import static org.projectnessie.versioned.storage.common.persist.ObjType.VALUE;

import java.util.List;
Expand Down Expand Up @@ -101,7 +103,8 @@ static Stream<Arguments> objTypes() {
STRING),
arguments(indexSegments(randomObjId(), emptyList()), INDEX_SEGMENTS),
arguments(
index(randomObjId(), emptyImmutableIndex(COMMIT_OP_SERIALIZER).serialize()), INDEX));
index(randomObjId(), emptyImmutableIndex(COMMIT_OP_SERIALIZER).serialize()), INDEX),
arguments(uniqueId(randomObjId(), "space", "value"), UNIQUE));
}

@ParameterizedTest
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ final class DynamoDBConstants {
static final String COL_INDEX = "i";
static final String COL_TAG = "t";
static final String COL_STRING = "s";
static final String COL_UNIQUE = "u";

static final String CONDITION_STORE_REF = "attribute_not_exists(" + COL_REFERENCES_POINTER + ")";
static final String CONDITION_STORE_OBJ = "attribute_not_exists(" + COL_OBJ_TYPE + ")";
Expand Down Expand Up @@ -87,5 +88,8 @@ final class DynamoDBConstants {
static final String COL_STRING_PREDECESSORS = "p";
static final String COL_STRING_TEXT = "t";

static final String COL_UNIQUE_SPACE = "s";
static final String COL_UNIQUE_VALUE = "v";

private DynamoDBConstants() {}
}

0 comments on commit 2c94b17

Please sign in to comment.