diff --git a/.classpath b/.classpath index 806f541..fc1b013 100644 --- a/.classpath +++ b/.classpath @@ -10,5 +10,7 @@ + + diff --git a/Thrudoc.thrift b/Thrudoc.thrift index 9e7e222..26aa2c9 100644 --- a/Thrudoc.thrift +++ b/Thrudoc.thrift @@ -6,14 +6,16 @@ namespace ruby Thrudoc exception ThrudocException{} -exception InvalidKeyException{} exception InvalidBucketException{} +exception InvalidKeyException{} exception InvalidParametersException{} service Thrudoc { - set getBuckets() throws(1:ThrudocException ex1), + void create_bucket(1: string bucket) throws(1:ThrudocException ex1), + void delete_bucket(1: string bucket) throws(1:ThrudocException ex1), + set get_bucket_list() throws(1:ThrudocException ex1), i32 incr(1:string bucket, 2:string key, 3:i32 amount=1) throws(1:ThrudocException ex1, 2:InvalidBucketException ex2), i32 decr(1:string bucket, 2:string key, 3:i32 amount=1) throws(1:ThrudocException ex1, 2:InvalidBucketException ex2), diff --git a/gen-java/org/thrudb/thrudoc/Thrudoc.java b/gen-java/org/thrudb/thrudoc/Thrudoc.java index 3bee5fc..08278ee 100644 --- a/gen-java/org/thrudb/thrudoc/Thrudoc.java +++ b/gen-java/org/thrudb/thrudoc/Thrudoc.java @@ -21,7 +21,11 @@ public class Thrudoc { public interface Iface { - public Set getBuckets() throws ThrudocException, TException; + public void create_bucket(String bucket) throws ThrudocException, TException; + + public void delete_bucket(String bucket) throws ThrudocException, TException; + + public Set get_bucket_list() throws ThrudocException, TException; public int incr(String bucket, String key, int amount) throws ThrudocException, InvalidBucketException, TException; @@ -86,22 +90,88 @@ public TProtocol getOutputProtocol() return this.oprot_; } - public Set getBuckets() throws ThrudocException, TException + public void create_bucket(String bucket) throws ThrudocException, TException + { + send_create_bucket(bucket); + recv_create_bucket(); + } + + public void send_create_bucket(String bucket) throws TException + { + oprot_.writeMessageBegin(new TMessage("create_bucket", TMessageType.CALL, seqid_)); + create_bucket_args args = new create_bucket_args(); + args.bucket = bucket; + args.write(oprot_); + oprot_.writeMessageEnd(); + oprot_.getTransport().flush(); + } + + public void recv_create_bucket() throws ThrudocException, TException + { + TMessage msg = iprot_.readMessageBegin(); + if (msg.type == TMessageType.EXCEPTION) { + TApplicationException x = TApplicationException.read(iprot_); + iprot_.readMessageEnd(); + throw x; + } + create_bucket_result result = new create_bucket_result(); + result.read(iprot_); + iprot_.readMessageEnd(); + if (result.ex1 != null) { + throw result.ex1; + } + return; + } + + public void delete_bucket(String bucket) throws ThrudocException, TException + { + send_delete_bucket(bucket); + recv_delete_bucket(); + } + + public void send_delete_bucket(String bucket) throws TException + { + oprot_.writeMessageBegin(new TMessage("delete_bucket", TMessageType.CALL, seqid_)); + delete_bucket_args args = new delete_bucket_args(); + args.bucket = bucket; + args.write(oprot_); + oprot_.writeMessageEnd(); + oprot_.getTransport().flush(); + } + + public void recv_delete_bucket() throws ThrudocException, TException + { + TMessage msg = iprot_.readMessageBegin(); + if (msg.type == TMessageType.EXCEPTION) { + TApplicationException x = TApplicationException.read(iprot_); + iprot_.readMessageEnd(); + throw x; + } + delete_bucket_result result = new delete_bucket_result(); + result.read(iprot_); + iprot_.readMessageEnd(); + if (result.ex1 != null) { + throw result.ex1; + } + return; + } + + public Set get_bucket_list() throws ThrudocException, TException { - send_getBuckets(); - return recv_getBuckets(); + send_get_bucket_list(); + return recv_get_bucket_list(); } - public void send_getBuckets() throws TException + public void send_get_bucket_list() throws TException { - oprot_.writeMessageBegin(new TMessage("getBuckets", TMessageType.CALL, seqid_)); - getBuckets_args args = new getBuckets_args(); + oprot_.writeMessageBegin(new TMessage("get_bucket_list", TMessageType.CALL, seqid_)); + get_bucket_list_args args = new get_bucket_list_args(); args.write(oprot_); oprot_.writeMessageEnd(); oprot_.getTransport().flush(); } - public Set recv_getBuckets() throws ThrudocException, TException + public Set recv_get_bucket_list() throws ThrudocException, TException { TMessage msg = iprot_.readMessageBegin(); if (msg.type == TMessageType.EXCEPTION) { @@ -109,7 +179,7 @@ public Set recv_getBuckets() throws ThrudocException, TException iprot_.readMessageEnd(); throw x; } - getBuckets_result result = new getBuckets_result(); + get_bucket_list_result result = new get_bucket_list_result(); result.read(iprot_); iprot_.readMessageEnd(); if (result.isSetSuccess()) { @@ -118,7 +188,7 @@ public Set recv_getBuckets() throws ThrudocException, TException if (result.ex1 != null) { throw result.ex1; } - throw new TApplicationException(TApplicationException.MISSING_RESULT, "getBuckets failed: unknown result"); + throw new TApplicationException(TApplicationException.MISSING_RESULT, "get_bucket_list failed: unknown result"); } public int incr(String bucket, String key, int amount) throws ThrudocException, InvalidBucketException, TException @@ -802,7 +872,9 @@ public static class Processor implements TProcessor { public Processor(Iface iface) { iface_ = iface; - processMap_.put("getBuckets", new getBuckets()); + processMap_.put("create_bucket", new create_bucket()); + processMap_.put("delete_bucket", new delete_bucket()); + processMap_.put("get_bucket_list", new get_bucket_list()); processMap_.put("incr", new incr()); processMap_.put("decr", new decr()); processMap_.put("put", new put()); @@ -847,19 +919,59 @@ public boolean process(TProtocol iprot, TProtocol oprot) throws TException return true; } - private class getBuckets implements ProcessFunction { + private class create_bucket implements ProcessFunction { + public void process(int seqid, TProtocol iprot, TProtocol oprot) throws TException + { + create_bucket_args args = new create_bucket_args(); + args.read(iprot); + iprot.readMessageEnd(); + create_bucket_result result = new create_bucket_result(); + try { + iface_.create_bucket(args.bucket); + } catch (ThrudocException ex1) { + result.ex1 = ex1; + } + oprot.writeMessageBegin(new TMessage("create_bucket", TMessageType.REPLY, seqid)); + result.write(oprot); + oprot.writeMessageEnd(); + oprot.getTransport().flush(); + } + + } + + private class delete_bucket implements ProcessFunction { + public void process(int seqid, TProtocol iprot, TProtocol oprot) throws TException + { + delete_bucket_args args = new delete_bucket_args(); + args.read(iprot); + iprot.readMessageEnd(); + delete_bucket_result result = new delete_bucket_result(); + try { + iface_.delete_bucket(args.bucket); + } catch (ThrudocException ex1) { + result.ex1 = ex1; + } + oprot.writeMessageBegin(new TMessage("delete_bucket", TMessageType.REPLY, seqid)); + result.write(oprot); + oprot.writeMessageEnd(); + oprot.getTransport().flush(); + } + + } + + private class get_bucket_list implements ProcessFunction { public void process(int seqid, TProtocol iprot, TProtocol oprot) throws TException { - getBuckets_args args = new getBuckets_args(); + get_bucket_list_args args = new get_bucket_list_args(); args.read(iprot); iprot.readMessageEnd(); - getBuckets_result result = new getBuckets_result(); + get_bucket_list_result result = new get_bucket_list_result(); try { - result.success = iface_.getBuckets(); + result.success = iface_.get_bucket_list(); } catch (ThrudocException ex1) { result.ex1 = ex1; } - oprot.writeMessageBegin(new TMessage("getBuckets", TMessageType.REPLY, seqid)); + oprot.writeMessageBegin(new TMessage("get_bucket_list", TMessageType.REPLY, seqid)); result.write(oprot); oprot.writeMessageEnd(); oprot.getTransport().flush(); @@ -1246,28 +1358,824 @@ public void process(int seqid, TProtocol iprot, TProtocol oprot) throws TExcepti } - public static class getBuckets_args implements TBase, java.io.Serializable, Cloneable { - private static final TStruct STRUCT_DESC = new TStruct("getBuckets_args"); + public static class create_bucket_args implements TBase, java.io.Serializable, Cloneable { + private static final TStruct STRUCT_DESC = new TStruct("create_bucket_args"); + private static final TField BUCKET_FIELD_DESC = new TField("bucket", TType.STRING, (short)1); + + public String bucket; + public static final int BUCKET = 1; + + private final Isset __isset = new Isset(); + private static final class Isset implements java.io.Serializable { + } + + public static final Map metaDataMap = Collections.unmodifiableMap(new HashMap() {{ + put(BUCKET, new FieldMetaData("bucket", TFieldRequirementType.DEFAULT, + new FieldValueMetaData(TType.STRING))); + }}); + + static { + FieldMetaData.addStructMetaDataMap(create_bucket_args.class, metaDataMap); + } + + public create_bucket_args() { + } + + public create_bucket_args( + String bucket) + { + this(); + this.bucket = bucket; + } + + /** + * Performs a deep copy on other. + */ + public create_bucket_args(create_bucket_args other) { + if (other.isSetBucket()) { + this.bucket = other.bucket; + } + } + + @Override + public create_bucket_args clone() { + return new create_bucket_args(this); + } + + public String getBucket() { + return this.bucket; + } + + public void setBucket(String bucket) { + this.bucket = bucket; + } + + public void unsetBucket() { + this.bucket = null; + } + + // Returns true if field bucket is set (has been asigned a value) and false otherwise + public boolean isSetBucket() { + return this.bucket != null; + } + + public void setBucketIsSet(boolean value) { + if (!value) { + this.bucket = null; + } + } + + public void setFieldValue(int fieldID, Object value) { + switch (fieldID) { + case BUCKET: + if (value == null) { + unsetBucket(); + } else { + setBucket((String)value); + } + break; + + default: + throw new IllegalArgumentException("Field " + fieldID + " doesn't exist!"); + } + } + + public Object getFieldValue(int fieldID) { + switch (fieldID) { + case BUCKET: + return getBucket(); + + default: + throw new IllegalArgumentException("Field " + fieldID + " doesn't exist!"); + } + } + + // Returns true if field corresponding to fieldID is set (has been asigned a value) and false otherwise + public boolean isSet(int fieldID) { + switch (fieldID) { + case BUCKET: + return isSetBucket(); + default: + throw new IllegalArgumentException("Field " + fieldID + " doesn't exist!"); + } + } + + @Override + public boolean equals(Object that) { + if (that == null) + return false; + if (that instanceof create_bucket_args) + return this.equals((create_bucket_args)that); + return false; + } + + public boolean equals(create_bucket_args that) { + if (that == null) + return false; + + boolean this_present_bucket = true && this.isSetBucket(); + boolean that_present_bucket = true && that.isSetBucket(); + if (this_present_bucket || that_present_bucket) { + if (!(this_present_bucket && that_present_bucket)) + return false; + if (!this.bucket.equals(that.bucket)) + return false; + } + + return true; + } + + @Override + public int hashCode() { + return 0; + } + + public void read(TProtocol iprot) throws TException { + TField field; + iprot.readStructBegin(); + while (true) + { + field = iprot.readFieldBegin(); + if (field.type == TType.STOP) { + break; + } + switch (field.id) + { + case BUCKET: + if (field.type == TType.STRING) { + this.bucket = iprot.readString(); + } else { + TProtocolUtil.skip(iprot, field.type); + } + break; + default: + TProtocolUtil.skip(iprot, field.type); + break; + } + iprot.readFieldEnd(); + } + iprot.readStructEnd(); + + + // check for required fields of primitive type, which can't be checked in the validate method + validate(); + } + + public void write(TProtocol oprot) throws TException { + validate(); + + oprot.writeStructBegin(STRUCT_DESC); + if (this.bucket != null) { + oprot.writeFieldBegin(BUCKET_FIELD_DESC); + oprot.writeString(this.bucket); + oprot.writeFieldEnd(); + } + oprot.writeFieldStop(); + oprot.writeStructEnd(); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder("create_bucket_args("); + boolean first = true; + + sb.append("bucket:"); + if (this.bucket == null) { + sb.append("null"); + } else { + sb.append(this.bucket); + } + first = false; + sb.append(")"); + return sb.toString(); + } + + public void validate() throws TException { + // check for required fields + // check that fields of type enum have valid values + } + + } + + public static class create_bucket_result implements TBase, java.io.Serializable, Cloneable { + private static final TStruct STRUCT_DESC = new TStruct("create_bucket_result"); + private static final TField EX1_FIELD_DESC = new TField("ex1", TType.STRUCT, (short)1); + + public ThrudocException ex1; + public static final int EX1 = 1; + + private final Isset __isset = new Isset(); + private static final class Isset implements java.io.Serializable { + } + + public static final Map metaDataMap = Collections.unmodifiableMap(new HashMap() {{ + put(EX1, new FieldMetaData("ex1", TFieldRequirementType.DEFAULT, + new FieldValueMetaData(TType.STRUCT))); + }}); + + static { + FieldMetaData.addStructMetaDataMap(create_bucket_result.class, metaDataMap); + } + + public create_bucket_result() { + } + + public create_bucket_result( + ThrudocException ex1) + { + this(); + this.ex1 = ex1; + } + + /** + * Performs a deep copy on other. + */ + public create_bucket_result(create_bucket_result other) { + if (other.isSetEx1()) { + this.ex1 = new ThrudocException(other.ex1); + } + } + + @Override + public create_bucket_result clone() { + return new create_bucket_result(this); + } + + public ThrudocException getEx1() { + return this.ex1; + } + + public void setEx1(ThrudocException ex1) { + this.ex1 = ex1; + } + + public void unsetEx1() { + this.ex1 = null; + } + + // Returns true if field ex1 is set (has been asigned a value) and false otherwise + public boolean isSetEx1() { + return this.ex1 != null; + } + + public void setEx1IsSet(boolean value) { + if (!value) { + this.ex1 = null; + } + } + + public void setFieldValue(int fieldID, Object value) { + switch (fieldID) { + case EX1: + if (value == null) { + unsetEx1(); + } else { + setEx1((ThrudocException)value); + } + break; + + default: + throw new IllegalArgumentException("Field " + fieldID + " doesn't exist!"); + } + } + + public Object getFieldValue(int fieldID) { + switch (fieldID) { + case EX1: + return getEx1(); + + default: + throw new IllegalArgumentException("Field " + fieldID + " doesn't exist!"); + } + } + + // Returns true if field corresponding to fieldID is set (has been asigned a value) and false otherwise + public boolean isSet(int fieldID) { + switch (fieldID) { + case EX1: + return isSetEx1(); + default: + throw new IllegalArgumentException("Field " + fieldID + " doesn't exist!"); + } + } + + @Override + public boolean equals(Object that) { + if (that == null) + return false; + if (that instanceof create_bucket_result) + return this.equals((create_bucket_result)that); + return false; + } + + public boolean equals(create_bucket_result that) { + if (that == null) + return false; + + boolean this_present_ex1 = true && this.isSetEx1(); + boolean that_present_ex1 = true && that.isSetEx1(); + if (this_present_ex1 || that_present_ex1) { + if (!(this_present_ex1 && that_present_ex1)) + return false; + if (!this.ex1.equals(that.ex1)) + return false; + } + + return true; + } + + @Override + public int hashCode() { + return 0; + } + + public void read(TProtocol iprot) throws TException { + TField field; + iprot.readStructBegin(); + while (true) + { + field = iprot.readFieldBegin(); + if (field.type == TType.STOP) { + break; + } + switch (field.id) + { + case EX1: + if (field.type == TType.STRUCT) { + this.ex1 = new ThrudocException(); + this.ex1.read(iprot); + } else { + TProtocolUtil.skip(iprot, field.type); + } + break; + default: + TProtocolUtil.skip(iprot, field.type); + break; + } + iprot.readFieldEnd(); + } + iprot.readStructEnd(); + + + // check for required fields of primitive type, which can't be checked in the validate method + validate(); + } + + public void write(TProtocol oprot) throws TException { + oprot.writeStructBegin(STRUCT_DESC); + + if (this.isSetEx1()) { + oprot.writeFieldBegin(EX1_FIELD_DESC); + this.ex1.write(oprot); + oprot.writeFieldEnd(); + } + oprot.writeFieldStop(); + oprot.writeStructEnd(); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder("create_bucket_result("); + boolean first = true; + + sb.append("ex1:"); + if (this.ex1 == null) { + sb.append("null"); + } else { + sb.append(this.ex1); + } + first = false; + sb.append(")"); + return sb.toString(); + } + + public void validate() throws TException { + // check for required fields + // check that fields of type enum have valid values + } + + } + + public static class delete_bucket_args implements TBase, java.io.Serializable, Cloneable { + private static final TStruct STRUCT_DESC = new TStruct("delete_bucket_args"); + private static final TField BUCKET_FIELD_DESC = new TField("bucket", TType.STRING, (short)1); + + public String bucket; + public static final int BUCKET = 1; + + private final Isset __isset = new Isset(); + private static final class Isset implements java.io.Serializable { + } + + public static final Map metaDataMap = Collections.unmodifiableMap(new HashMap() {{ + put(BUCKET, new FieldMetaData("bucket", TFieldRequirementType.DEFAULT, + new FieldValueMetaData(TType.STRING))); + }}); + + static { + FieldMetaData.addStructMetaDataMap(delete_bucket_args.class, metaDataMap); + } + + public delete_bucket_args() { + } + + public delete_bucket_args( + String bucket) + { + this(); + this.bucket = bucket; + } + + /** + * Performs a deep copy on other. + */ + public delete_bucket_args(delete_bucket_args other) { + if (other.isSetBucket()) { + this.bucket = other.bucket; + } + } + + @Override + public delete_bucket_args clone() { + return new delete_bucket_args(this); + } + + public String getBucket() { + return this.bucket; + } + + public void setBucket(String bucket) { + this.bucket = bucket; + } + + public void unsetBucket() { + this.bucket = null; + } + + // Returns true if field bucket is set (has been asigned a value) and false otherwise + public boolean isSetBucket() { + return this.bucket != null; + } + + public void setBucketIsSet(boolean value) { + if (!value) { + this.bucket = null; + } + } + + public void setFieldValue(int fieldID, Object value) { + switch (fieldID) { + case BUCKET: + if (value == null) { + unsetBucket(); + } else { + setBucket((String)value); + } + break; + + default: + throw new IllegalArgumentException("Field " + fieldID + " doesn't exist!"); + } + } + + public Object getFieldValue(int fieldID) { + switch (fieldID) { + case BUCKET: + return getBucket(); + + default: + throw new IllegalArgumentException("Field " + fieldID + " doesn't exist!"); + } + } + + // Returns true if field corresponding to fieldID is set (has been asigned a value) and false otherwise + public boolean isSet(int fieldID) { + switch (fieldID) { + case BUCKET: + return isSetBucket(); + default: + throw new IllegalArgumentException("Field " + fieldID + " doesn't exist!"); + } + } + + @Override + public boolean equals(Object that) { + if (that == null) + return false; + if (that instanceof delete_bucket_args) + return this.equals((delete_bucket_args)that); + return false; + } + + public boolean equals(delete_bucket_args that) { + if (that == null) + return false; + + boolean this_present_bucket = true && this.isSetBucket(); + boolean that_present_bucket = true && that.isSetBucket(); + if (this_present_bucket || that_present_bucket) { + if (!(this_present_bucket && that_present_bucket)) + return false; + if (!this.bucket.equals(that.bucket)) + return false; + } + + return true; + } + + @Override + public int hashCode() { + return 0; + } + + public void read(TProtocol iprot) throws TException { + TField field; + iprot.readStructBegin(); + while (true) + { + field = iprot.readFieldBegin(); + if (field.type == TType.STOP) { + break; + } + switch (field.id) + { + case BUCKET: + if (field.type == TType.STRING) { + this.bucket = iprot.readString(); + } else { + TProtocolUtil.skip(iprot, field.type); + } + break; + default: + TProtocolUtil.skip(iprot, field.type); + break; + } + iprot.readFieldEnd(); + } + iprot.readStructEnd(); + + + // check for required fields of primitive type, which can't be checked in the validate method + validate(); + } + + public void write(TProtocol oprot) throws TException { + validate(); + + oprot.writeStructBegin(STRUCT_DESC); + if (this.bucket != null) { + oprot.writeFieldBegin(BUCKET_FIELD_DESC); + oprot.writeString(this.bucket); + oprot.writeFieldEnd(); + } + oprot.writeFieldStop(); + oprot.writeStructEnd(); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder("delete_bucket_args("); + boolean first = true; + + sb.append("bucket:"); + if (this.bucket == null) { + sb.append("null"); + } else { + sb.append(this.bucket); + } + first = false; + sb.append(")"); + return sb.toString(); + } + + public void validate() throws TException { + // check for required fields + // check that fields of type enum have valid values + } + + } + + public static class delete_bucket_result implements TBase, java.io.Serializable, Cloneable { + private static final TStruct STRUCT_DESC = new TStruct("delete_bucket_result"); + private static final TField EX1_FIELD_DESC = new TField("ex1", TType.STRUCT, (short)1); + + public ThrudocException ex1; + public static final int EX1 = 1; + + private final Isset __isset = new Isset(); + private static final class Isset implements java.io.Serializable { + } + + public static final Map metaDataMap = Collections.unmodifiableMap(new HashMap() {{ + put(EX1, new FieldMetaData("ex1", TFieldRequirementType.DEFAULT, + new FieldValueMetaData(TType.STRUCT))); + }}); + + static { + FieldMetaData.addStructMetaDataMap(delete_bucket_result.class, metaDataMap); + } + + public delete_bucket_result() { + } + + public delete_bucket_result( + ThrudocException ex1) + { + this(); + this.ex1 = ex1; + } + + /** + * Performs a deep copy on other. + */ + public delete_bucket_result(delete_bucket_result other) { + if (other.isSetEx1()) { + this.ex1 = new ThrudocException(other.ex1); + } + } + + @Override + public delete_bucket_result clone() { + return new delete_bucket_result(this); + } + + public ThrudocException getEx1() { + return this.ex1; + } + + public void setEx1(ThrudocException ex1) { + this.ex1 = ex1; + } + + public void unsetEx1() { + this.ex1 = null; + } + + // Returns true if field ex1 is set (has been asigned a value) and false otherwise + public boolean isSetEx1() { + return this.ex1 != null; + } + + public void setEx1IsSet(boolean value) { + if (!value) { + this.ex1 = null; + } + } + + public void setFieldValue(int fieldID, Object value) { + switch (fieldID) { + case EX1: + if (value == null) { + unsetEx1(); + } else { + setEx1((ThrudocException)value); + } + break; + + default: + throw new IllegalArgumentException("Field " + fieldID + " doesn't exist!"); + } + } + + public Object getFieldValue(int fieldID) { + switch (fieldID) { + case EX1: + return getEx1(); + + default: + throw new IllegalArgumentException("Field " + fieldID + " doesn't exist!"); + } + } + + // Returns true if field corresponding to fieldID is set (has been asigned a value) and false otherwise + public boolean isSet(int fieldID) { + switch (fieldID) { + case EX1: + return isSetEx1(); + default: + throw new IllegalArgumentException("Field " + fieldID + " doesn't exist!"); + } + } + + @Override + public boolean equals(Object that) { + if (that == null) + return false; + if (that instanceof delete_bucket_result) + return this.equals((delete_bucket_result)that); + return false; + } + + public boolean equals(delete_bucket_result that) { + if (that == null) + return false; + + boolean this_present_ex1 = true && this.isSetEx1(); + boolean that_present_ex1 = true && that.isSetEx1(); + if (this_present_ex1 || that_present_ex1) { + if (!(this_present_ex1 && that_present_ex1)) + return false; + if (!this.ex1.equals(that.ex1)) + return false; + } + + return true; + } + + @Override + public int hashCode() { + return 0; + } + + public void read(TProtocol iprot) throws TException { + TField field; + iprot.readStructBegin(); + while (true) + { + field = iprot.readFieldBegin(); + if (field.type == TType.STOP) { + break; + } + switch (field.id) + { + case EX1: + if (field.type == TType.STRUCT) { + this.ex1 = new ThrudocException(); + this.ex1.read(iprot); + } else { + TProtocolUtil.skip(iprot, field.type); + } + break; + default: + TProtocolUtil.skip(iprot, field.type); + break; + } + iprot.readFieldEnd(); + } + iprot.readStructEnd(); + + + // check for required fields of primitive type, which can't be checked in the validate method + validate(); + } + + public void write(TProtocol oprot) throws TException { + oprot.writeStructBegin(STRUCT_DESC); + + if (this.isSetEx1()) { + oprot.writeFieldBegin(EX1_FIELD_DESC); + this.ex1.write(oprot); + oprot.writeFieldEnd(); + } + oprot.writeFieldStop(); + oprot.writeStructEnd(); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder("delete_bucket_result("); + boolean first = true; + + sb.append("ex1:"); + if (this.ex1 == null) { + sb.append("null"); + } else { + sb.append(this.ex1); + } + first = false; + sb.append(")"); + return sb.toString(); + } + + public void validate() throws TException { + // check for required fields + // check that fields of type enum have valid values + } + + } + + public static class get_bucket_list_args implements TBase, java.io.Serializable, Cloneable { + private static final TStruct STRUCT_DESC = new TStruct("get_bucket_list_args"); public static final Map metaDataMap = Collections.unmodifiableMap(new HashMap() {{ }}); static { - FieldMetaData.addStructMetaDataMap(getBuckets_args.class, metaDataMap); + FieldMetaData.addStructMetaDataMap(get_bucket_list_args.class, metaDataMap); } - public getBuckets_args() { + public get_bucket_list_args() { } /** * Performs a deep copy on other. */ - public getBuckets_args(getBuckets_args other) { + public get_bucket_list_args(get_bucket_list_args other) { } @Override - public getBuckets_args clone() { - return new getBuckets_args(this); + public get_bucket_list_args clone() { + return new get_bucket_list_args(this); } public void setFieldValue(int fieldID, Object value) { @@ -1296,12 +2204,12 @@ public boolean isSet(int fieldID) { public boolean equals(Object that) { if (that == null) return false; - if (that instanceof getBuckets_args) - return this.equals((getBuckets_args)that); + if (that instanceof get_bucket_list_args) + return this.equals((get_bucket_list_args)that); return false; } - public boolean equals(getBuckets_args that) { + public boolean equals(get_bucket_list_args that) { if (that == null) return false; @@ -1347,7 +2255,7 @@ public void write(TProtocol oprot) throws TException { @Override public String toString() { - StringBuilder sb = new StringBuilder("getBuckets_args("); + StringBuilder sb = new StringBuilder("get_bucket_list_args("); boolean first = true; sb.append(")"); @@ -1361,8 +2269,8 @@ public void validate() throws TException { } - public static class getBuckets_result implements TBase, java.io.Serializable, Cloneable { - private static final TStruct STRUCT_DESC = new TStruct("getBuckets_result"); + public static class get_bucket_list_result implements TBase, java.io.Serializable, Cloneable { + private static final TStruct STRUCT_DESC = new TStruct("get_bucket_list_result"); private static final TField SUCCESS_FIELD_DESC = new TField("success", TType.SET, (short)0); private static final TField EX1_FIELD_DESC = new TField("ex1", TType.STRUCT, (short)1); @@ -1384,13 +2292,13 @@ private static final class Isset implements java.io.Serializable { }}); static { - FieldMetaData.addStructMetaDataMap(getBuckets_result.class, metaDataMap); + FieldMetaData.addStructMetaDataMap(get_bucket_list_result.class, metaDataMap); } - public getBuckets_result() { + public get_bucket_list_result() { } - public getBuckets_result( + public get_bucket_list_result( Set success, ThrudocException ex1) { @@ -1402,7 +2310,7 @@ public getBuckets_result( /** * Performs a deep copy on other. */ - public getBuckets_result(getBuckets_result other) { + public get_bucket_list_result(get_bucket_list_result other) { if (other.isSetSuccess()) { Set __this__success = new HashSet(); for (String other_element : other.success) { @@ -1416,8 +2324,8 @@ public getBuckets_result(getBuckets_result other) { } @Override - public getBuckets_result clone() { - return new getBuckets_result(this); + public get_bucket_list_result clone() { + return new get_bucket_list_result(this); } public int getSuccessSize() { @@ -1533,12 +2441,12 @@ public boolean isSet(int fieldID) { public boolean equals(Object that) { if (that == null) return false; - if (that instanceof getBuckets_result) - return this.equals((getBuckets_result)that); + if (that instanceof get_bucket_list_result) + return this.equals((get_bucket_list_result)that); return false; } - public boolean equals(getBuckets_result that) { + public boolean equals(get_bucket_list_result that) { if (that == null) return false; @@ -1641,7 +2549,7 @@ public void write(TProtocol oprot) throws TException { @Override public String toString() { - StringBuilder sb = new StringBuilder("getBuckets_result("); + StringBuilder sb = new StringBuilder("get_bucket_list_result("); boolean first = true; sb.append("success:"); diff --git a/lib/libthrift.jar b/lib/libthrift.jar index 8261c11..396773f 100755 Binary files a/lib/libthrift.jar and b/lib/libthrift.jar differ diff --git a/src/org/thrudb/thrift/TPeekingTransport.java b/src/org/thrudb/thrift/TPeekingTransport.java new file mode 100644 index 0000000..a9750a8 --- /dev/null +++ b/src/org/thrudb/thrift/TPeekingTransport.java @@ -0,0 +1,162 @@ +package org.thrudb.thrift; + +import org.apache.log4j.Logger; +import org.apache.thrift.transport.TFramedTransport; +import org.apache.thrift.transport.TTransport; +import org.apache.thrift.transport.TTransportException; + +import tokyocabinet.HDB; + +/** + * Allows a way to peek at the content of the message and rewind back to start. + * + * This is used to log certain actions to disk for redo-logging + * + * @author jake + * + */ +public class TPeekingTransport extends TFramedTransport { + + private Logger logger = Logger.getLogger(getClass()); + private byte[] peekBuffer = new byte[] {}; + private byte[] writeBuffer = new byte[] {}; + private int writeMax = 1024; //only store the initial output message + private int writePos = 0; + + private int replayPos = 0; + private boolean replayMode = false; + private int replayEnd = 0; //stop reply at this point in the buf + private boolean recording = false; + + private boolean logging = false; + private HDB log; + private int nextLogId = -1; + + + public TPeekingTransport(TTransport baseTransport, HDB log) { + super(baseTransport); + this.log = log; + } + + @Override + public void close() { + super.close(); + } + + @Override + public boolean isOpen() { + return super.isOpen(); + } + + public boolean isRecording() { + return recording; + } + + public void setRecording(boolean recording) { + this.recording = recording; + } + + @Override + public void open() throws TTransportException { + super.open(); + } + + @Override + public int read(byte[] buf, int off, int len) throws TTransportException { + if (replayMode && replayPos + len <= replayEnd ) { + + System.arraycopy(peekBuffer, replayPos, buf, 0, len); + + replayPos += len; + + return len; + + }else{ + + int sz = super.read(buf, off, len); + + // Add to peek buffer + if (recording && sz > 0) { + byte[] newPeekBuffer = new byte[peekBuffer.length + sz]; + + System.arraycopy(peekBuffer, 0, newPeekBuffer, 0, peekBuffer.length); + System.arraycopy(buf, 0, newPeekBuffer, peekBuffer.length, sz); + + peekBuffer = newPeekBuffer; + } + + return sz; + } + + } + + @Override + public void write(byte[] buf, int off, int len) throws TTransportException { + + if(writeBuffer.length + len < writeMax){ + byte[] newWriteBuffer = new byte[writeBuffer.length + len]; + + System.arraycopy(writeBuffer, 0, newWriteBuffer, 0, writeBuffer.length); + System.arraycopy(buf, 0, newWriteBuffer, writeBuffer.length, len); + + writeBuffer = newWriteBuffer; + } + + super.write(buf, off, len); + } + + public boolean isReplayMode() { + return replayMode; + } + + public void setReplayMode(boolean replayMode) { + this.replayMode = replayMode; + + if(replayMode == true) + replayEnd = peekBuffer.length; + } + + + /** + * This is a hack for reading the output struct type + * + */ + public void swapInWriteBuffer(){ + peekBuffer = writeBuffer; + replayPos = 0; + replayMode = true; + recording = false; + replayEnd = writeMax; + } + + public void reset(){ + peekBuffer = new byte[] {}; + replayPos = 0; + replayMode = false; + recording = false; + + writeBuffer = new byte[] {}; + writePos = 0; + + } + + public byte[] getBuffer(){ + return peekBuffer; + } + + public boolean isLogging() { + return logging; + } + + public void setLogging(boolean logging) { + this.logging = logging; + + //get new log in sequence + if(logging){ + nextLogId = log.addint("LSN", 1); + } + + } +} + + diff --git a/src/org/thrudb/thrift/TPeekingTransportFactory.java b/src/org/thrudb/thrift/TPeekingTransportFactory.java new file mode 100644 index 0000000..959a5e1 --- /dev/null +++ b/src/org/thrudb/thrift/TPeekingTransportFactory.java @@ -0,0 +1,41 @@ +package org.thrudb.thrift; + +import java.io.File; + +import org.apache.thrift.TException; +import org.apache.thrift.transport.TFramedTransport; +import org.apache.thrift.transport.TTransport; + +import tokyocabinet.HDB; + +public class TPeekingTransportFactory extends TFramedTransport.Factory { + private HDB hdb; + + public TPeekingTransportFactory(String logDir,String logName) throws TException{ + int hdbFlags = HDB.OWRITER; + + //verify db file + String logFileName = logDir+File.separatorChar+logName+".tch"; + File logFile = new File(logFileName); + + if(logFile.isFile() && !logFile.canWrite()) + throw new TException(logFileName+" is not writable"); + + if(logFile.isDirectory()) + throw new TException(logFileName+" should not be a directory"); + + if(!logFile.exists()) + hdbFlags |= HDB.OCREAT; + + + hdb = new HDB(); + if(!hdb.open(logFileName,hdbFlags)){ + throw new TException(hdb.errmsg()); + } + } + + @Override + public TTransport getTransport(TTransport trans) { + return new TPeekingTransport(trans,hdb); + } +} \ No newline at end of file diff --git a/src/org/thrudb/thrudoc/ThrudocBackend.java b/src/org/thrudb/thrudoc/ThrudocBackend.java new file mode 100644 index 0000000..4f3d3c8 --- /dev/null +++ b/src/org/thrudb/thrudoc/ThrudocBackend.java @@ -0,0 +1,46 @@ +package org.thrudb.thrudoc; + +import java.util.List; + +import org.apache.thrift.TException; + +import tokyocabinet.BDBCUR; + +public interface ThrudocBackend { + + byte[] get(String key) throws InvalidKeyException; + + void put(String key, byte[] value); + + void remove(String key); + + List scan(String seed, int limit); + + int incr(String key, int amount); + + int decr(String key, int amount); + + void push_back(String key, byte[] value) throws TException; + + byte[] pop_back(String key) throws TException; + + void push_front(String key, byte[] value) throws TException; + + byte[] pop_front(String key) throws TException; + + byte[] remove_at(String key, int position); + + void insert_at(String key, byte[] value, int position); + + void replace_at(String key, byte[] value, int position); + + byte[] retrieve_at(String key, int position); + + List range(String key, int start, int end); + + int length(String key); + + boolean erase(); + + +} diff --git a/src/org/thrudb/thrudoc/ThrudocHandler.java b/src/org/thrudb/thrudoc/ThrudocHandler.java new file mode 100644 index 0000000..8a1c6f0 --- /dev/null +++ b/src/org/thrudb/thrudoc/ThrudocHandler.java @@ -0,0 +1,225 @@ +package org.thrudb.thrudoc; + +import java.io.File; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.log4j.Logger; +import org.apache.thrift.TException; +import org.thrudb.thrudoc.InvalidBucketException; +import org.thrudb.thrudoc.InvalidKeyException; +import org.thrudb.thrudoc.ThrudocException; +import org.thrudb.thrudoc.Thrudoc.Iface; +import org.thrudb.thrudoc.tokyocabinet.TokyoCabinetDB; + + +public class ThrudocHandler implements Iface { + + private Logger logger = Logger.getLogger(getClass()); + private volatile Map bucketMap = new HashMap(); + private String docRoot; + + public ThrudocHandler(String docRoot){ + this.docRoot = docRoot; + } + + public boolean isValidBucket(String bucketName) throws TException { + + if(bucketMap.containsKey(bucketName)) + return true; + + synchronized(bucketMap){ + + //double lock check + if(bucketMap.containsKey(bucketName)) + return true; + + String dbFileName = docRoot+File.separatorChar+bucketName+".tcb"; + File dbFile = new File(dbFileName); + + //open this index if it already exists + if(dbFile.isFile() && dbFile.canWrite()){ + bucketMap.put(bucketName, new TokyoCabinetDB(docRoot,bucketName)); + return true; + }else{ + return false; + } + } + } + + public String admin(String op, String data) throws ThrudocException, + TException { + + + + return "ok"; + } + + + public int decr(String bucket, String key, int amount) throws InvalidBucketException, TException { + + if(!isValidBucket(bucket)) + throw new InvalidBucketException(); + + return 0; + } + + + /** + * Get's a key from the bucket + */ + public byte[] get(String bucket, String key) throws InvalidKeyException, InvalidBucketException, TException { + + if(!isValidBucket(bucket)) + throw new InvalidBucketException(); + + return bucketMap.get(bucket).get(key); + } + + public void create_bucket(String bucket) throws ThrudocException, + TException { + + if(bucketMap.containsKey(bucket)) + return; + + bucketMap.put(bucket, new TokyoCabinetDB(docRoot,bucket)); + + } + + public void delete_bucket(String bucket) throws ThrudocException, + TException { + + if(!isValidBucket(bucket)) + this.create_bucket(bucket); + + + TokyoCabinetDB db = bucketMap.get(bucket); + + if(db == null) + return; //this can't happen + + db.erase(); + + bucketMap.remove(bucket); + } + + public Set get_bucket_list() throws ThrudocException, TException { + return bucketMap.keySet(); + } + + public int incr(String bucket, String key, int amount) throws TException, InvalidBucketException { + if(!isValidBucket(bucket)) + throw new InvalidBucketException(); + + return bucketMap.get(bucket).incr(key, amount); + } + + public int length(String bucket, String key) throws TException, InvalidBucketException { + if(!isValidBucket(bucket)) + throw new InvalidBucketException(); + + return bucketMap.get(bucket).length(key); + } + + public byte[] pop_back(String bucket, String key) throws TException, InvalidBucketException { + if(!isValidBucket(bucket)) + throw new InvalidBucketException(); + + return bucketMap.get(bucket).pop_back(key); + } + + public byte[] pop_front(String bucket, String key) throws TException, InvalidBucketException { + if(!isValidBucket(bucket)) + throw new InvalidBucketException(); + + return bucketMap.get(bucket).pop_front(key); + } + + public void push_back(String bucket, String key, byte[] value) + throws ThrudocException, InvalidBucketException, TException { + + if(!isValidBucket(bucket)) + throw new InvalidBucketException(); + + bucketMap.get(bucket).push_back(key, value); + + } + + public void push_front(String bucket, String key, byte[] value) + throws ThrudocException, TException, InvalidBucketException { + + if(!isValidBucket(bucket)) + throw new InvalidBucketException(); + + bucketMap.get(bucket).push_front(key, value); + + } + + public void put(String bucket, String key, byte[] value) throws InvalidBucketException, TException { + if(!isValidBucket(bucket)) + throw new InvalidBucketException(); + + bucketMap.get(bucket).put(key,value); + + } + + public List range(String bucket, String key, int start, int end) + throws TException, InvalidBucketException { + if(!isValidBucket(bucket)) + throw new InvalidBucketException(); + + return bucketMap.get(bucket).range(key, start, end); + } + + public byte[] remove_at(String bucket, String key, int pos) + throws TException, InvalidBucketException { + if(!isValidBucket(bucket)) + throw new InvalidBucketException(); + + return bucketMap.get(bucket).remove_at(key, pos); + } + + public void remove(String bucket, String key) throws InvalidBucketException, TException { + if(!isValidBucket(bucket)) + throw new InvalidBucketException(); + + bucketMap.get(bucket).remove(key); + } + + public void insert_at(String bucket, String key, byte[] value, int pos) + throws ThrudocException, InvalidBucketException, TException { + + if(!isValidBucket(bucket)) + throw new InvalidBucketException(); + + bucketMap.get(bucket).insert_at(key, value, pos); + } + + public void replace_at(String bucket, String key, byte[] value, int pos) + throws ThrudocException, InvalidBucketException, TException { + + if(!isValidBucket(bucket)) + throw new InvalidBucketException(); + + bucketMap.get(bucket).replace_at(key, value, pos); + } + + public byte[] retrieve_at(String bucket, String key, int pos) + throws ThrudocException, InvalidBucketException, TException { + if(!isValidBucket(bucket)) + throw new InvalidBucketException(); + + return bucketMap.get(bucket).retrieve_at(key, pos); + } + + public List scan(String bucket, String seed, int count) throws InvalidBucketException, TException { + if(!isValidBucket(bucket)) + throw new InvalidBucketException(); + + + return bucketMap.get(bucket).scan(seed,count); + } + +} diff --git a/src/org/thrudb/thrudoc/ThrudocLoggingProcessor.java b/src/org/thrudb/thrudoc/ThrudocLoggingProcessor.java new file mode 100644 index 0000000..acf6cd5 --- /dev/null +++ b/src/org/thrudb/thrudoc/ThrudocLoggingProcessor.java @@ -0,0 +1,88 @@ +package org.thrudb.thrudoc; + +import java.util.Arrays; +import java.util.HashSet; +import java.util.Set; + +import org.apache.log4j.Logger; +import org.apache.thrift.TException; +import org.apache.thrift.protocol.TMessage; +import org.apache.thrift.protocol.TMessageType; +import org.apache.thrift.protocol.TProtocol; +import org.thrudb.thrift.TPeekingTransport; +import org.thrudb.thrudoc.Thrudoc.Iface; +import org.thrudb.thrudoc.Thrudoc.Processor; + +import tokyocabinet.HDB; + +public class ThrudocLoggingProcessor extends Processor { + + private Set writeOps; + private Logger logger = Logger.getLogger(getClass()); + + + + public ThrudocLoggingProcessor(Iface iface) { + super(iface); + + writeOps = new HashSet(); + writeOps.addAll(Arrays.asList(new String[]{ + "create_bucket","delete_bucket","put", + "push_front","push_back","pop_front","pop_back", + "erase_at","insert_at","replace_at","incr","decr" + })); + } + + @Override + public boolean process(TProtocol iprot, TProtocol oprot) throws TException { + + TPeekingTransport peekTrans = (TPeekingTransport) iprot.getTransport(); + TPeekingTransport writeTrans = (TPeekingTransport) oprot.getTransport(); + + //Just peek at the initial message + peekTrans.setRecording(true); + + TMessage msg = iprot.readMessageBegin(); + + peekTrans.setRecording(false); + + if(writeOps.contains(msg.name)){ + logger.info("logging "+msg.name); + + peekTrans.setLogging(true); + } + + peekTrans.setReplayMode(true); + + boolean result = super.process(iprot, oprot); + + //only log operations that alter the db + if(writeOps.contains(msg.name)){ + writeTrans.swapInWriteBuffer(); + + msg = oprot.readMessageBegin(); + + //dont log operations that caused an exceptions + if(msg.type != TMessageType.EXCEPTION){ + byte[] log = peekTrans.getBuffer(); + + //writeLog(log); + } + + } + + + peekTrans.reset(); + + + + return result; + + } + + private void writeLog(byte[] logMessage){ + + } + + +} diff --git a/src/org/thrudb/thrudoc/ThrudocServer.java b/src/org/thrudb/thrudoc/ThrudocServer.java index 28c99eb..2ea8dd1 100644 --- a/src/org/thrudb/thrudoc/ThrudocServer.java +++ b/src/org/thrudb/thrudoc/ThrudocServer.java @@ -5,21 +5,33 @@ import java.util.Properties; import org.apache.log4j.PropertyConfigurator; -import org.apache.lucene.store.FSDirectory; import org.apache.thrift.TProcessor; +import org.apache.thrift.TProcessorFactory; +import org.apache.thrift.protocol.TBinaryProtocol; import org.apache.thrift.server.THsHaServer; import org.apache.thrift.server.TServer; import org.apache.thrift.server.THsHaServer.Options; import org.apache.thrift.transport.TNonblockingServerSocket; import org.apache.thrift.transport.TNonblockingServerTransport; -import org.thrudb.thrudoc.tokyocabinet.ThrudocHandler; +import org.thrudb.thrift.TPeekingTransportFactory; public class ThrudocServer { private String docRoot; + private String propertyName; + + public String getLogRoot() { + return propertyName; + } + + public void setLogRoot(String logRoot) { + this.propertyName = logRoot; + } + + private int port; private int threadCount; - + private TServer server; public String getDocRoot() { return docRoot; @@ -56,22 +68,65 @@ public void start(){ //Processor TProcessor processor = - new Thrudoc.Processor(new ThrudocHandler(docRoot)); + new ThrudocLoggingProcessor(new ThrudocHandler(docRoot)); Options opt = new Options(); opt.maxWorkerThreads = threadCount; + TPeekingTransportFactory peekFactory = new TPeekingTransportFactory(propertyName,"thrudoc_log"); + //Server - TServer server = new THsHaServer(processor,serverSocket); + //TServer server = new THsHaServer(processor,serverSocket); + server = new THsHaServer( new TProcessorFactory(processor), serverSocket, + peekFactory, peekFactory, + new TBinaryProtocol.Factory(), + new TBinaryProtocol.Factory(), + opt); - //Serve + + //Server server.serve(); + }catch(Exception e){ e.printStackTrace(); } } + public void stop(){ + server.stop(); + } + + public static String checkDirProperty(Properties properties, String propertyName){ + String property = properties.getProperty(propertyName); + + if(property == null){ + System.err.println(propertyName+" missing from property file"); + System.exit(0); + } + + File log = new File(property); + if(!log.exists()){ + if(log.mkdirs()){ + System.out.println("Created dir: "+property); + }else{ + System.err.println("Failed to create dir:"+property); + System.exit(0); + } + }else if(!log.isDirectory()) { + System.err.println("exists but not a directory:"+property); + System.exit(0); + }else if(!log.canWrite()) { + System.err.println("dir exists but not writable:"+property); + System.exit(0); + }else{ + System.out.println(propertyName+": "+property); + } + + return propertyName; + } + + /** * @param args */ @@ -116,33 +171,12 @@ public static void main(String[] args) { ThrudocServer thrudocServer = new ThrudocServer(); - if(!properties.containsKey("DOC_ROOT")){ - System.out.println("DOC_ROOT Property Required"); - System.exit(0); - } - - //make sure root exits - String docRoot = properties.getProperty("DOC_ROOT"); - File doc = new File(docRoot); - if(!doc.exists()){ - if(doc.mkdirs()){ - System.out.println("Created doc root: "+docRoot); - }else{ - System.err.println("Failed to create doc root:"+docRoot); - System.exit(0); - } - }else if(!doc.isDirectory()) { - System.err.println("doc root exists but not a directory:"+docRoot); - System.exit(0); - }else if(!doc.canWrite()) { - System.err.println("doc root exists but not writable:"+docRoot); - System.exit(0); - }else{ - System.out.println("doc root: "+docRoot); - } - + String docRoot = checkDirProperty(properties, "DOC_ROOT"); thrudocServer.setDocRoot(docRoot); + String logRoot = checkDirProperty(properties, "LOG_ROOT"); + thrudocServer.setLogRoot(logRoot); + int port = Integer.valueOf(properties.getProperty("SERVER_PORT","9090")); System.out.println("service port: "+port); diff --git a/src/org/thrudb/thrudoc/tokyocabinet/TokyoCabinetDB.java b/src/org/thrudb/thrudoc/tokyocabinet/TokyoCabinetDB.java index 37dd4b3..d941f5b 100644 --- a/src/org/thrudb/thrudoc/tokyocabinet/TokyoCabinetDB.java +++ b/src/org/thrudb/thrudoc/tokyocabinet/TokyoCabinetDB.java @@ -8,6 +8,7 @@ import org.apache.log4j.Logger; import org.apache.thrift.TException; import org.thrudb.thrudoc.InvalidKeyException; +import org.thrudb.thrudoc.ThrudocBackend; import tokyocabinet.BDB; import tokyocabinet.BDBCUR; @@ -18,7 +19,7 @@ * @author jake * */ -public class TokyoCabinetDB { +public class TokyoCabinetDB implements ThrudocBackend { private Logger logger = Logger.getLogger(getClass()); private String docRoot; @@ -86,7 +87,8 @@ public byte[] get(String key) throws InvalidKeyException { * @param key the key name * @param value the binary value */ - public void put(String key, byte[] value) { + public void put(String key, byte[] value) { + bdb.put(key.getBytes(), value); } diff --git a/src/org/thrudb/util/bloom/BloomFilter.java b/src/org/thrudb/util/bloom/BloomFilter.java new file mode 100644 index 0000000..5ef2652 --- /dev/null +++ b/src/org/thrudb/util/bloom/BloomFilter.java @@ -0,0 +1,234 @@ +/** + * + * Copyright (c) 2005, European Commission project OneLab under contract 034819 (http://www.one-lab.org) + * All rights reserved. + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * - Neither the name of the University Catholique de Louvain - UCL + * nor the names of its contributors may be used to endorse or + * promote products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.thrudb.util.bloom; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; + +import java.util.BitSet; + +/** + * Implements a Bloom filter, as defined by Bloom in 1970. + *

+ * The Bloom filter is a data structure that was introduced in 1970 and that has been adopted by + * the networking research community in the past decade thanks to the bandwidth efficiencies that it + * offers for the transmission of set membership information between networked hosts. A sender encodes + * the information into a bit vector, the Bloom filter, that is more compact than a conventional + * representation. Computation and space costs for construction are linear in the number of elements. + * The receiver uses the filter to test whether various elements are members of the set. Though the + * filter will occasionally return a false positive, it will never return a false negative. When creating + * the filter, the sender can choose its desired point in a trade-off between the false positive rate and the size. + * + *

+ * Originally created by + * European Commission One-Lab Project 034819. + * + * @see Filter The general behavior of a filter + * + * @see Space/Time Trade-Offs in Hash Coding with Allowable Errors + */ +public class BloomFilter extends Filter { + private static final byte[] bitvalues = new byte[] { + (byte)0x01, + (byte)0x02, + (byte)0x04, + (byte)0x08, + (byte)0x10, + (byte)0x20, + (byte)0x40, + (byte)0x80 + }; + + /** The bit vector. */ + BitSet bits; + + /** Default constructor - use with readFields */ + public BloomFilter() { + super(); + } + + /** + * Constructor + * @param vectorSize The vector size of this filter. + * @param nbHash The number of hash function to consider. + * @param hashType type of the hashing function (see + * {@link org.apache.hadoop.util.hash.Hash}). + */ + public BloomFilter(int vectorSize, int nbHash, int hashType) { + super(vectorSize, nbHash, hashType); + + bits = new BitSet(this.vectorSize); + } + + @Override + public void add(Key key) { + if(key == null) { + throw new NullPointerException("key cannot be null"); + } + + int[] h = hash.hash(key); + hash.clear(); + + for(int i = 0; i < nbHash; i++) { + bits.set(h[i]); + } + } + + @Override + public void and(Filter filter) { + if(filter == null + || !(filter instanceof BloomFilter) + || filter.vectorSize != this.vectorSize + || filter.nbHash != this.nbHash) { + throw new IllegalArgumentException("filters cannot be and-ed"); + } + + this.bits.and(((BloomFilter) filter).bits); + } + + @Override + public boolean membershipTest(Key key) { + if(key == null) { + throw new NullPointerException("key cannot be null"); + } + + int[] h = hash.hash(key); + hash.clear(); + for(int i = 0; i < nbHash; i++) { + if(!bits.get(h[i])) { + return false; + } + } + return true; + } + + @Override + public void not() { + bits.flip(0, vectorSize - 1); + } + + @Override + public void or(Filter filter) { + if(filter == null + || !(filter instanceof BloomFilter) + || filter.vectorSize != this.vectorSize + || filter.nbHash != this.nbHash) { + throw new IllegalArgumentException("filters cannot be or-ed"); + } + bits.or(((BloomFilter) filter).bits); + } + + @Override + public void xor(Filter filter) { + if(filter == null + || !(filter instanceof BloomFilter) + || filter.vectorSize != this.vectorSize + || filter.nbHash != this.nbHash) { + throw new IllegalArgumentException("filters cannot be xor-ed"); + } + bits.xor(((BloomFilter) filter).bits); + } + + @Override + public String toString() { + return bits.toString(); + } + + /** + * @return size of the the bloomfilter + */ + public int getVectorSize() { + return this.vectorSize; + } + + // Writable + + @Override + public void write(DataOutput out) throws IOException { + super.write(out); + byte[] bytes = new byte[getNBytes()]; + for(int i = 0, byteIndex = 0, bitIndex = 0; i < vectorSize; i++, bitIndex++) { + if (bitIndex == 8) { + bitIndex = 0; + byteIndex++; + } + if (bitIndex == 0) { + bytes[byteIndex] = 0; + } + if (bits.get(i)) { + bytes[byteIndex] |= bitvalues[bitIndex]; + } + } + out.write(bytes); + } + + @Override + public void readFields(DataInput in) throws IOException { + super.readFields(in); + bits = new BitSet(this.vectorSize); + byte[] bytes = new byte[getNBytes()]; + in.readFully(bytes); + for(int i = 0, byteIndex = 0, bitIndex = 0; i < vectorSize; i++, bitIndex++) { + if (bitIndex == 8) { + bitIndex = 0; + byteIndex++; + } + if ((bytes[byteIndex] & bitvalues[bitIndex]) != 0) { + bits.set(i); + } + } + } + + /* @return number of bytes needed to hold bit vector */ + private int getNBytes() { + return (vectorSize + 7) / 8; + } +}//end class diff --git a/src/org/thrudb/util/bloom/CountingBloomFilter.java b/src/org/thrudb/util/bloom/CountingBloomFilter.java new file mode 100644 index 0000000..3d68269 --- /dev/null +++ b/src/org/thrudb/util/bloom/CountingBloomFilter.java @@ -0,0 +1,305 @@ +/** + * + * Copyright (c) 2005, European Commission project OneLab under contract 034819 (http://www.one-lab.org) + * All rights reserved. + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * - Neither the name of the University Catholique de Louvain - UCL + * nor the names of its contributors may be used to endorse or + * promote products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.thrudb.util.bloom; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; + +/** + * Implements a counting Bloom filter, as defined by Fan et al. in a ToN + * 2000 paper. + *

+ * A counting Bloom filter is an improvement to standard a Bloom filter as it + * allows dynamic additions and deletions of set membership information. This + * is achieved through the use of a counting vector instead of a bit vector. + *

+ * Originally created by + * European Commission One-Lab Project 034819. + * + * @see Filter The general behavior of a filter + * + * @see Summary cache: a scalable wide-area web cache sharing protocol + */ +public final class CountingBloomFilter extends Filter { + /** Storage for the counting buckets */ + private long[] buckets; + + /** We are using 4bit buckets, so each bucket can count to 15 */ + private final static long BUCKET_MAX_VALUE = 15; + + /** Default constructor - use with readFields */ + public CountingBloomFilter() {} + + /** + * Constructor + * @param vectorSize The vector size of this filter. + * @param nbHash The number of hash function to consider. + * @param hashType type of the hashing function (see + * {@link org.apache.hadoop.util.hash.Hash}). + */ + public CountingBloomFilter(int vectorSize, int nbHash, int hashType) { + super(vectorSize, nbHash, hashType); + buckets = new long[buckets2words(vectorSize)]; + } + + /** returns the number of 64 bit words it would take to hold vectorSize buckets */ + private static int buckets2words(int vectorSize) { + return ((vectorSize - 1) >>> 4) + 1; + } + + + @Override + public void add(Key key) { + if(key == null) { + throw new NullPointerException("key can not be null"); + } + + int[] h = hash.hash(key); + hash.clear(); + + for(int i = 0; i < nbHash; i++) { + // find the bucket + int wordNum = h[i] >> 4; // div 16 + int bucketShift = (h[i] & 0x0f) << 2; // (mod 16) * 4 + + long bucketMask = 15L << bucketShift; + long bucketValue = (buckets[wordNum] & bucketMask) >>> bucketShift; + + // only increment if the count in the bucket is less than BUCKET_MAX_VALUE + if(bucketValue < BUCKET_MAX_VALUE) { + // increment by 1 + buckets[wordNum] = (buckets[wordNum] & ~bucketMask) | ((bucketValue + 1) << bucketShift); + } + } + } + + /** + * Removes a specified key from this counting Bloom filter. + *

+ * Invariant: nothing happens if the specified key does not belong to this counter Bloom filter. + * @param key The key to remove. + */ + public void delete(Key key) { + if(key == null) { + throw new NullPointerException("Key may not be null"); + } + if(!membershipTest(key)) { + throw new IllegalArgumentException("Key is not a member"); + } + + int[] h = hash.hash(key); + hash.clear(); + + for(int i = 0; i < nbHash; i++) { + // find the bucket + int wordNum = h[i] >> 4; // div 16 + int bucketShift = (h[i] & 0x0f) << 2; // (mod 16) * 4 + + long bucketMask = 15L << bucketShift; + long bucketValue = (buckets[wordNum] & bucketMask) >>> bucketShift; + + // only decrement if the count in the bucket is between 0 and BUCKET_MAX_VALUE + if(bucketValue >= 1 && bucketValue < BUCKET_MAX_VALUE) { + // decrement by 1 + buckets[wordNum] = (buckets[wordNum] & ~bucketMask) | ((bucketValue - 1) << bucketShift); + } + } + } + + @Override + public void and(Filter filter) { + if(filter == null + || !(filter instanceof CountingBloomFilter) + || filter.vectorSize != this.vectorSize + || filter.nbHash != this.nbHash) { + throw new IllegalArgumentException("filters cannot be and-ed"); + } + CountingBloomFilter cbf = (CountingBloomFilter)filter; + + int sizeInWords = buckets2words(vectorSize); + for(int i = 0; i < sizeInWords; i++) { + this.buckets[i] &= cbf.buckets[i]; + } + } + + @Override + public boolean membershipTest(Key key) { + if(key == null) { + throw new NullPointerException("Key may not be null"); + } + + int[] h = hash.hash(key); + hash.clear(); + + for(int i = 0; i < nbHash; i++) { + // find the bucket + int wordNum = h[i] >> 4; // div 16 + int bucketShift = (h[i] & 0x0f) << 2; // (mod 16) * 4 + + long bucketMask = 15L << bucketShift; + + if((buckets[wordNum] & bucketMask) == 0) { + return false; + } + } + + return true; + } + + /** + * This method calculates an approximate count of the key, i.e. how many + * times the key was added to the filter. This allows the filter to be + * used as an approximate key -> count map. + *

NOTE: due to the bucket size of this filter, inserting the same + * key more than 15 times will cause an overflow at all filter positions + * associated with this key, and it will significantly increase the error + * rate for this and other keys. For this reason the filter can only be + * used to store small count values 0 <= N << 15. + * @param key key to be tested + * @return 0 if the key is not present. Otherwise, a positive value v will + * be returned such that v == count with probability equal to the + * error rate of this filter, and v > count otherwise. + * Additionally, if the filter experienced an underflow as a result of + * {@link #delete(Key)} operation, the return value may be lower than the + * count with the probability of the false negative rate of such + * filter. + */ + public int approximateCount(Key key) { + int res = Integer.MAX_VALUE; + int[] h = hash.hash(key); + hash.clear(); + for (int i = 0; i < nbHash; i++) { + // find the bucket + int wordNum = h[i] >> 4; // div 16 + int bucketShift = (h[i] & 0x0f) << 2; // (mod 16) * 4 + + long bucketMask = 15L << bucketShift; + long bucketValue = (buckets[wordNum] & bucketMask) >>> bucketShift; + if (bucketValue < res) res = (int)bucketValue; + } + if (res != Integer.MAX_VALUE) { + return res; + } else { + return 0; + } + } + + @Override + public void not() { + throw new UnsupportedOperationException("not() is undefined for " + + this.getClass().getName()); + } + + @Override + public void or(Filter filter) { + if(filter == null + || !(filter instanceof CountingBloomFilter) + || filter.vectorSize != this.vectorSize + || filter.nbHash != this.nbHash) { + throw new IllegalArgumentException("filters cannot be or-ed"); + } + + CountingBloomFilter cbf = (CountingBloomFilter)filter; + + int sizeInWords = buckets2words(vectorSize); + for(int i = 0; i < sizeInWords; i++) { + this.buckets[i] |= cbf.buckets[i]; + } + } + + @Override + public void xor(Filter filter) { + throw new UnsupportedOperationException("xor() is undefined for " + + this.getClass().getName()); + } + + @Override + public String toString() { + StringBuilder res = new StringBuilder(); + + for(int i = 0; i < vectorSize; i++) { + if(i > 0) { + res.append(" "); + } + + int wordNum = i >> 4; // div 16 + int bucketShift = (i & 0x0f) << 2; // (mod 16) * 4 + + long bucketMask = 15L << bucketShift; + long bucketValue = (buckets[wordNum] & bucketMask) >>> bucketShift; + + res.append(bucketValue); + } + + return res.toString(); + } + + // Writable + + @Override + public void write(DataOutput out) throws IOException { + super.write(out); + int sizeInWords = buckets2words(vectorSize); + for(int i = 0; i < sizeInWords; i++) { + out.writeLong(buckets[i]); + } + } + + @Override + public void readFields(DataInput in) throws IOException { + super.readFields(in); + int sizeInWords = buckets2words(vectorSize); + buckets = new long[sizeInWords]; + for(int i = 0; i < sizeInWords; i++) { + buckets[i] = in.readLong(); + } + } +} \ No newline at end of file diff --git a/src/org/thrudb/util/bloom/DynamicBloomFilter.java b/src/org/thrudb/util/bloom/DynamicBloomFilter.java new file mode 100644 index 0000000..30c86b6 --- /dev/null +++ b/src/org/thrudb/util/bloom/DynamicBloomFilter.java @@ -0,0 +1,293 @@ +/** + * + * Copyright (c) 2005, European Commission project OneLab under contract 034819 (http://www.one-lab.org) + * All rights reserved. + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * - Neither the name of the University Catholique de Louvain - UCL + * nor the names of its contributors may be used to endorse or + * promote products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.thrudb.util.bloom; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; + +/** + * Implements a dynamic Bloom filter, as defined in the INFOCOM 2006 paper. + *

+ * A dynamic Bloom filter (DBF) makes use of a s * m bit matrix but + * each of the s rows is a standard Bloom filter. The creation + * process of a DBF is iterative. At the start, the DBF is a 1 * m + * bit matrix, i.e., it is composed of a single standard Bloom filter. + * It assumes that nr elements are recorded in the + * initial bit vector, where nr <= n (n is + * the cardinality of the set A to record in the filter). + *

+ * As the size of A grows during the execution of the application, + * several keys must be inserted in the DBF. When inserting a key into the DBF, + * one must first get an active Bloom filter in the matrix. A Bloom filter is + * active when the number of recorded keys, nr, is + * strictly less than the current cardinality of A, n. + * If an active Bloom filter is found, the key is inserted and + * nr is incremented by one. On the other hand, if there + * is no active Bloom filter, a new one is created (i.e., a new row is added to + * the matrix) according to the current size of A and the element + * is added in this new Bloom filter and the nr value of + * this new Bloom filter is set to one. A given key is said to belong to the + * DBF if the k positions are set to one in one of the matrix rows. + *

+ * Originally created by + * European Commission One-Lab Project 034819. + * + * @see Filter The general behavior of a filter + * @see BloomFilter A Bloom filter + * + * @see Theory and Network Applications of Dynamic Bloom Filters + */ +public class DynamicBloomFilter extends Filter { + /** + * Threshold for the maximum number of key to record in a dynamic Bloom filter row. + */ + private int nr; + + /** + * The number of keys recorded in the current standard active Bloom filter. + */ + private int currentNbRecord; + + /** + * The matrix of Bloom filter. + */ + private BloomFilter[] matrix; + + /** + * Zero-args constructor for the serialization. + */ + public DynamicBloomFilter() { } + + /** + * Constructor. + *

+ * Builds an empty Dynamic Bloom filter. + * @param vectorSize The number of bits in the vector. + * @param nbHash The number of hash function to consider. + * @param hashType type of the hashing function (see + * {@link org.apache.hadoop.util.hash.Hash}). + * @param nr The threshold for the maximum number of keys to record in a + * dynamic Bloom filter row. + */ + public DynamicBloomFilter(int vectorSize, int nbHash, int hashType, int nr) { + super(vectorSize, nbHash, hashType); + + this.nr = nr; + this.currentNbRecord = 0; + + matrix = new BloomFilter[1]; + matrix[0] = new BloomFilter(this.vectorSize, this.nbHash, this.hashType); + } + + @Override + public void add(Key key) { + if (key == null) { + throw new NullPointerException("Key can not be null"); + } + + BloomFilter bf = getActiveStandardBF(); + + if (bf == null) { + addRow(); + bf = matrix[matrix.length - 1]; + currentNbRecord = 0; + } + + bf.add(key); + + currentNbRecord++; + } + + @Override + public void and(Filter filter) { + if (filter == null + || !(filter instanceof DynamicBloomFilter) + || filter.vectorSize != this.vectorSize + || filter.nbHash != this.nbHash) { + throw new IllegalArgumentException("filters cannot be and-ed"); + } + + DynamicBloomFilter dbf = (DynamicBloomFilter)filter; + + if (dbf.matrix.length != this.matrix.length || dbf.nr != this.nr) { + throw new IllegalArgumentException("filters cannot be and-ed"); + } + + for (int i = 0; i < matrix.length; i++) { + matrix[i].and(dbf.matrix[i]); + } + } + + @Override + public boolean membershipTest(Key key) { + if (key == null) { + return true; + } + + for (int i = 0; i < matrix.length; i++) { + if (matrix[i].membershipTest(key)) { + return true; + } + } + + return false; + } + + @Override + public void not() { + for (int i = 0; i < matrix.length; i++) { + matrix[i].not(); + } + } + + @Override + public void or(Filter filter) { + if (filter == null + || !(filter instanceof DynamicBloomFilter) + || filter.vectorSize != this.vectorSize + || filter.nbHash != this.nbHash) { + throw new IllegalArgumentException("filters cannot be or-ed"); + } + + DynamicBloomFilter dbf = (DynamicBloomFilter)filter; + + if (dbf.matrix.length != this.matrix.length || dbf.nr != this.nr) { + throw new IllegalArgumentException("filters cannot be or-ed"); + } + for (int i = 0; i < matrix.length; i++) { + matrix[i].or(dbf.matrix[i]); + } + } + + @Override + public void xor(Filter filter) { + if (filter == null + || !(filter instanceof DynamicBloomFilter) + || filter.vectorSize != this.vectorSize + || filter.nbHash != this.nbHash) { + throw new IllegalArgumentException("filters cannot be xor-ed"); + } + DynamicBloomFilter dbf = (DynamicBloomFilter)filter; + + if (dbf.matrix.length != this.matrix.length || dbf.nr != this.nr) { + throw new IllegalArgumentException("filters cannot be xor-ed"); + } + + for(int i = 0; ithis dynamic Bloom filter. + */ + private void addRow() { + BloomFilter[] tmp = new BloomFilter[matrix.length + 1]; + + for (int i = 0; i < matrix.length; i++) { + tmp[i] = matrix[i]; + } + + tmp[tmp.length-1] = new BloomFilter(vectorSize, nbHash, hashType); + + matrix = tmp; + } + + /** + * Returns the active standard Bloom filter in this dynamic Bloom filter. + * @return BloomFilter The active standard Bloom filter. + * Null otherwise. + */ + private BloomFilter getActiveStandardBF() { + if (currentNbRecord >= nr) { + return null; + } + + return matrix[matrix.length - 1]; + } +} diff --git a/src/org/thrudb/util/bloom/Filter.java b/src/org/thrudb/util/bloom/Filter.java new file mode 100644 index 0000000..45a266e --- /dev/null +++ b/src/org/thrudb/util/bloom/Filter.java @@ -0,0 +1,212 @@ +/** + * + * Copyright (c) 2005, European Commission project OneLab under contract 034819 + * (http://www.one-lab.org) + * + * All rights reserved. + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * - Neither the name of the University Catholique de Louvain - UCL + * nor the names of its contributors may be used to endorse or + * promote products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.thrudb.util.bloom; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.util.Collection; +import java.util.List; + +import org.thrudb.util.hash.Hash; + +/** + * Defines the general behavior of a filter. + *

+ * A filter is a data structure which aims at offering a lossy summary of a set A. The + * key idea is to map entries of A (also called keys) into several positions + * in a vector through the use of several hash functions. + *

+ * Typically, a filter will be implemented as a Bloom filter (or a Bloom filter extension). + *

+ * It must be extended in order to define the real behavior. + * + * @see Key The general behavior of a key + * @see HashFunction A hash function + */ +public abstract class Filter { + private static final int VERSION = -1; // negative to accommodate for old format + /** The vector size of this filter. */ + protected int vectorSize; + + /** The hash function used to map a key to several positions in the vector. */ + protected HashFunction hash; + + /** The number of hash function to consider. */ + protected int nbHash; + + /** Type of hashing function to use. */ + protected int hashType; + + protected Filter() {} + + /** + * Constructor. + * @param vectorSize The vector size of this filter. + * @param nbHash The number of hash functions to consider. + * @param hashType type of the hashing function (see {@link Hash}). + */ + protected Filter(int vectorSize, int nbHash, int hashType) { + this.vectorSize = vectorSize; + this.nbHash = nbHash; + this.hashType = hashType; + this.hash = new HashFunction(this.vectorSize, this.nbHash, this.hashType); + } + + /** + * Adds a key to this filter. + * @param key The key to add. + */ + public abstract void add(Key key); + + /** + * Determines wether a specified key belongs to this filter. + * @param key The key to test. + * @return boolean True if the specified key belongs to this filter. + * False otherwise. + */ + public abstract boolean membershipTest(Key key); + + /** + * Peforms a logical AND between this filter and a specified filter. + *

+ * Invariant: The result is assigned to this filter. + * @param filter The filter to AND with. + */ + public abstract void and(Filter filter); + + /** + * Peforms a logical OR between this filter and a specified filter. + *

+ * Invariant: The result is assigned to this filter. + * @param filter The filter to OR with. + */ + public abstract void or(Filter filter); + + /** + * Peforms a logical XOR between this filter and a specified filter. + *

+ * Invariant: The result is assigned to this filter. + * @param filter The filter to XOR with. + */ + public abstract void xor(Filter filter); + + /** + * Performs a logical NOT on this filter. + *

+ * The result is assigned to this filter. + */ + public abstract void not(); + + /** + * Adds a list of keys to this filter. + * @param keys The list of keys. + */ + public void add(List keys){ + if(keys == null) { + throw new IllegalArgumentException("ArrayList may not be null"); + } + + for(Key key: keys) { + add(key); + } + }//end add() + + /** + * Adds a collection of keys to this filter. + * @param keys The collection of keys. + */ + public void add(Collection keys){ + if(keys == null) { + throw new IllegalArgumentException("Collection may not be null"); + } + for(Key key: keys) { + add(key); + } + }//end add() + + /** + * Adds an array of keys to this filter. + * @param keys The array of keys. + */ + public void add(Key[] keys){ + if(keys == null) { + throw new IllegalArgumentException("Key[] may not be null"); + } + for(int i = 0; i < keys.length; i++) { + add(keys[i]); + } + }//end add() + + // Writable interface + + public void write(DataOutput out) throws IOException { + out.writeInt(VERSION); + out.writeInt(this.nbHash); + out.writeByte(this.hashType); + out.writeInt(this.vectorSize); + } + + public void readFields(DataInput in) throws IOException { + int ver = in.readInt(); + if (ver > 0) { // old unversioned format + this.nbHash = ver; + this.hashType = Hash.JENKINS_HASH; + } else if (ver == VERSION) { + this.nbHash = in.readInt(); + this.hashType = in.readByte(); + } else { + throw new IOException("Unsupported version: " + ver); + } + this.vectorSize = in.readInt(); + this.hash = new HashFunction(this.vectorSize, this.nbHash, this.hashType); + } +}//end class diff --git a/src/org/thrudb/util/bloom/HashFunction.java b/src/org/thrudb/util/bloom/HashFunction.java new file mode 100644 index 0000000..7ac2342 --- /dev/null +++ b/src/org/thrudb/util/bloom/HashFunction.java @@ -0,0 +1,119 @@ +/** + * + * Copyright (c) 2005, European Commission project OneLab under contract 034819 + * (http://www.one-lab.org) + * + * All rights reserved. + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * - Neither the name of the University Catholique de Louvain - UCL + * nor the names of its contributors may be used to endorse or + * promote products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.thrudb.util.bloom; + +import org.thrudb.util.hash.Hash; + +/** + * Implements a hash object that returns a certain number of hashed values. + * + * @see Key The general behavior of a key being stored in a filter + * @see Filter The general behavior of a filter + */ +public final class HashFunction { + /** The number of hashed values. */ + private int nbHash; + + /** The maximum highest returned value. */ + private int maxValue; + + /** Hashing algorithm to use. */ + private Hash hashFunction; + + /** + * Constructor. + *

+ * Builds a hash function that must obey to a given maximum number of returned values and a highest value. + * @param maxValue The maximum highest returned value. + * @param nbHash The number of resulting hashed values. + * @param hashType type of the hashing function (see {@link Hash}). + */ + public HashFunction(int maxValue, int nbHash, int hashType) { + if (maxValue <= 0) { + throw new IllegalArgumentException("maxValue must be > 0"); + } + + if (nbHash <= 0) { + throw new IllegalArgumentException("nbHash must be > 0"); + } + + this.maxValue = maxValue; + this.nbHash = nbHash; + this.hashFunction = Hash.getInstance(hashType); + if (this.hashFunction == null) + throw new IllegalArgumentException("hashType must be known"); + } + + /** Clears this hash function. A NOOP */ + public void clear() { + } + + /** + * Hashes a specified key into several integers. + * @param k The specified key. + * @return The array of hashed values. + */ + public int[] hash(Key k){ + byte[] b = k.getBytes(); + if (b == null) { + throw new NullPointerException("buffer reference is null"); + } + if (b.length == 0) { + throw new IllegalArgumentException("key length must be > 0"); + } + int[] result = new int[nbHash]; + for (int i = 0, initval = 0; i < nbHash; i++) { + initval = hashFunction.hash(b, initval); + result[i] = Math.abs(initval % maxValue); + } + return result; + } +} \ No newline at end of file diff --git a/src/org/thrudb/util/bloom/Key.java b/src/org/thrudb/util/bloom/Key.java new file mode 100644 index 0000000..0c94346 --- /dev/null +++ b/src/org/thrudb/util/bloom/Key.java @@ -0,0 +1,176 @@ +/** + * + * Copyright (c) 2005, European Commission project OneLab under contract 034819 (http://www.one-lab.org) + * All rights reserved. + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * - Neither the name of the University Catholique de Louvain - UCL + * nor the names of its contributors may be used to endorse or + * promote products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.thrudb.util.bloom; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; + +/** + * The general behavior of a key that must be stored in a filter. + * + * @see Filter The general behavior of a filter + */ +public class Key implements Comparable { + /** Byte value of key */ + byte[] bytes; + + /** + * The weight associated to this key. + *

+ * Invariant: if it is not specified, each instance of + * Key will have a default weight of 1.0 + */ + double weight; + + /** default constructor - use with readFields */ + public Key() {} + + /** + * Constructor. + *

+ * Builds a key with a default weight. + * @param value The byte value of this key. + */ + public Key(byte[] value) { + this(value, 1.0); + } + + /** + * Constructor. + *

+ * Builds a key with a specified weight. + * @param value The value of this key. + * @param weight The weight associated to this key. + */ + public Key(byte[] value, double weight) { + set(value, weight); + } + + /** + * @param value + * @param weight + */ + public void set(byte[] value, double weight) { + if (value == null) { + throw new IllegalArgumentException("value can not be null"); + } + this.bytes = value; + this.weight = weight; + } + + /** @return byte[] The value of this key. */ + public byte[] getBytes() { + return this.bytes; + } + + /** @return Returns the weight associated to this key. */ + public double getWeight() { + return weight; + } + + /** + * Increments the weight of this key with a specified value. + * @param weight The increment. + */ + public void incrementWeight(double weight) { + this.weight += weight; + } + + /** Increments the weight of this key by one. */ + public void incrementWeight() { + this.weight++; + } + + @Override + public boolean equals(Object o) { + if (!(o instanceof Key)) { + return false; + } + return this.compareTo((Key)o) == 0; + } + + @Override + public int hashCode() { + int result = 0; + for (int i = 0; i < bytes.length; i++) { + result ^= Byte.valueOf(bytes[i]).hashCode(); + } + result ^= Double.valueOf(weight).hashCode(); + return result; + } + + // Writable + + public void write(DataOutput out) throws IOException { + out.writeInt(bytes.length); + out.write(bytes); + out.writeDouble(weight); + } + + public void readFields(DataInput in) throws IOException { + this.bytes = new byte[in.readInt()]; + in.readFully(this.bytes); + weight = in.readDouble(); + } + + // Comparable + + public int compareTo(Key other) { + int result = this.bytes.length - other.getBytes().length; + for (int i = 0; result == 0 && i < bytes.length; i++) { + result = this.bytes[i] - other.bytes[i]; + } + + if (result == 0) { + result = Double.valueOf(this.weight - other.weight).intValue(); + } + return result; + } +} \ No newline at end of file diff --git a/src/org/thrudb/util/bloom/RemoveScheme.java b/src/org/thrudb/util/bloom/RemoveScheme.java new file mode 100644 index 0000000..30bcdd0 --- /dev/null +++ b/src/org/thrudb/util/bloom/RemoveScheme.java @@ -0,0 +1,91 @@ +/** + * + * Copyright (c) 2005, European Commission project OneLab under contract 034819 + * (http://www.one-lab.org) + * + * All rights reserved. + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * - Neither the name of the University Catholique de Louvain - UCL + * nor the names of its contributors may be used to endorse or + * promote products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.thrudb.util.bloom; + +/** + * Defines the different remove scheme for retouched Bloom filters. + *

+ * Originally created by + * European Commission One-Lab Project 034819. + */ +public interface RemoveScheme { + /** + * Random selection. + *

+ * The idea is to randomly select a bit to reset. + */ + public final static short RANDOM = 0; + + /** + * MinimumFN Selection. + *

+ * The idea is to select the bit to reset that will generate the minimum + * number of false negative. + */ + public final static short MINIMUM_FN = 1; + + /** + * MaximumFP Selection. + *

+ * The idea is to select the bit to reset that will remove the maximum number + * of false positive. + */ + public final static short MAXIMUM_FP = 2; + + /** + * Ratio Selection. + *

+ * The idea is to select the bit to reset that will, at the same time, remove + * the maximum number of false positve while minimizing the amount of false + * negative generated. + */ + public final static short RATIO = 3; +} diff --git a/src/org/thrudb/util/bloom/RetouchedBloomFilter.java b/src/org/thrudb/util/bloom/RetouchedBloomFilter.java new file mode 100644 index 0000000..c7cb3c9 --- /dev/null +++ b/src/org/thrudb/util/bloom/RetouchedBloomFilter.java @@ -0,0 +1,450 @@ +/** + * + * Copyright (c) 2005, European Commission project OneLab under contract 034819 (http://www.one-lab.org) + * All rights reserved. + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * - Neither the name of the University Catholique de Louvain - UCL + * nor the names of its contributors may be used to endorse or + * promote products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN + * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.thrudb.util.bloom; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Random; + +/** + * Implements a retouched Bloom filter, as defined in the CoNEXT 2006 paper. + *

+ * It allows the removal of selected false positives at the cost of introducing + * random false negatives, and with the benefit of eliminating some random false + * positives at the same time. + * + *

+ * Originally created by + * European Commission One-Lab Project 034819. + * + * @see Filter The general behavior of a filter + * @see BloomFilter A Bloom filter + * @see RemoveScheme The different selective clearing algorithms + * + * @see Retouched Bloom Filters: Allowing Networked Applications to Trade Off Selected False Positives Against False Negatives + */ +public final class RetouchedBloomFilter extends BloomFilter +implements RemoveScheme { + /** + * KeyList vector (or ElementList Vector, as defined in the paper) of false positives. + */ + List[] fpVector; + + /** + * KeyList vector of keys recorded in the filter. + */ + List[] keyVector; + + /** + * Ratio vector. + */ + double[] ratio; + + private Random rand; + + /** Default constructor - use with readFields */ + public RetouchedBloomFilter() {} + + /** + * Constructor + * @param vectorSize The vector size of this filter. + * @param nbHash The number of hash function to consider. + * @param hashType type of the hashing function (see + * {@link org.apache.hadoop.util.hash.Hash}). + */ + public RetouchedBloomFilter(int vectorSize, int nbHash, int hashType) { + super(vectorSize, nbHash, hashType); + + this.rand = null; + createVector(); + } + + @Override + public void add(Key key) { + if (key == null) { + throw new NullPointerException("key can not be null"); + } + + int[] h = hash.hash(key); + hash.clear(); + + for (int i = 0; i < nbHash; i++) { + bits.set(h[i]); + keyVector[h[i]].add(key); + } + } + + /** + * Adds a false positive information to this retouched Bloom filter. + *

+ * Invariant: if the false positive is null, nothing happens. + * @param key The false positive key to add. + */ + public void addFalsePositive(Key key) { + if (key == null) { + throw new NullPointerException("key can not be null"); + } + + int[] h = hash.hash(key); + hash.clear(); + + for (int i = 0; i < nbHash; i++) { + fpVector[h[i]].add(key); + } + } + + /** + * Adds a collection of false positive information to this retouched Bloom filter. + * @param coll The collection of false positive. + */ + public void addFalsePositive(Collection coll) { + if (coll == null) { + throw new NullPointerException("Collection can not be null"); + } + + for (Key k : coll) { + addFalsePositive(k); + } + } + + /** + * Adds a list of false positive information to this retouched Bloom filter. + * @param keys The list of false positive. + */ + public void addFalsePositive(List keys) { + if (keys == null) { + throw new NullPointerException("ArrayList can not be null"); + } + + for (Key k : keys) { + addFalsePositive(k); + } + } + + /** + * Adds an array of false positive information to this retouched Bloom filter. + * @param keys The array of false positive. + */ + public void addFalsePositive(Key[] keys) { + if (keys == null) { + throw new NullPointerException("Key[] can not be null"); + } + + for (int i = 0; i < keys.length; i++) { + addFalsePositive(keys[i]); + } + } + + /** + * Performs the selective clearing for a given key. + * @param k The false positive key to remove from this retouched Bloom filter. + * @param scheme The selective clearing scheme to apply. + */ + public void selectiveClearing(Key k, short scheme) { + if (k == null) { + throw new NullPointerException("Key can not be null"); + } + + if (!membershipTest(k)) { + throw new IllegalArgumentException("Key is not a member"); + } + + int index = 0; + int[] h = hash.hash(k); + + switch(scheme) { + + case RANDOM: + index = randomRemove(); + break; + + case MINIMUM_FN: + index = minimumFnRemove(h); + break; + + case MAXIMUM_FP: + index = maximumFpRemove(h); + break; + + case RATIO: + index = ratioRemove(h); + break; + + default: + throw new AssertionError("Undefined selective clearing scheme"); + + } + + clearBit(index); + } + + private int randomRemove() { + if (rand == null) { + rand = new Random(); + } + + return rand.nextInt(nbHash); + } + + /** + * Chooses the bit position that minimizes the number of false negative generated. + * @param h The different bit positions. + * @return The position that minimizes the number of false negative generated. + */ + private int minimumFnRemove(int[] h) { + int minIndex = Integer.MAX_VALUE; + double minValue = Double.MAX_VALUE; + + for (int i = 0; i < nbHash; i++) { + double keyWeight = getWeight(keyVector[h[i]]); + + if (keyWeight < minValue) { + minIndex = h[i]; + minValue = keyWeight; + } + + } + + return minIndex; + } + + /** + * Chooses the bit position that maximizes the number of false positive removed. + * @param h The different bit positions. + * @return The position that maximizes the number of false positive removed. + */ + private int maximumFpRemove(int[] h) { + int maxIndex = Integer.MIN_VALUE; + double maxValue = Double.MIN_VALUE; + + for (int i = 0; i < nbHash; i++) { + double fpWeight = getWeight(fpVector[h[i]]); + + if (fpWeight > maxValue) { + maxValue = fpWeight; + maxIndex = h[i]; + } + } + + return maxIndex; + } + + /** + * Chooses the bit position that minimizes the number of false negative generated while maximizing. + * the number of false positive removed. + * @param h The different bit positions. + * @return The position that minimizes the number of false negative generated while maximizing. + */ + private int ratioRemove(int[] h) { + computeRatio(); + int minIndex = Integer.MAX_VALUE; + double minValue = Double.MAX_VALUE; + + for (int i = 0; i < nbHash; i++) { + if (ratio[h[i]] < minValue) { + minValue = ratio[h[i]]; + minIndex = h[i]; + } + } + + return minIndex; + } + + /** + * Clears a specified bit in the bit vector and keeps up-to-date the KeyList vectors. + * @param index The position of the bit to clear. + */ + private void clearBit(int index) { + if (index < 0 || index >= vectorSize) { + throw new ArrayIndexOutOfBoundsException(index); + } + + List kl = keyVector[index]; + List fpl = fpVector[index]; + + // update key list + int listSize = kl.size(); + for (int i = 0; i < listSize && !kl.isEmpty(); i++) { + removeKey(kl.get(0), keyVector); + } + + kl.clear(); + keyVector[index].clear(); + + //update false positive list + listSize = fpl.size(); + for (int i = 0; i < listSize && !fpl.isEmpty(); i++) { + removeKey(fpl.get(0), fpVector); + } + + fpl.clear(); + fpVector[index].clear(); + + //update ratio + ratio[index] = 0.0; + + //update bit vector + bits.clear(index); + } + + /** + * Removes a given key from this filer. + * @param k The key to remove. + * @param vector The counting vector associated to the key. + */ + private void removeKey(Key k, List[] vector) { + if (k == null) { + throw new NullPointerException("Key can not be null"); + } + if (vector == null) { + throw new NullPointerException("ArrayList[] can not be null"); + } + + int[] h = hash.hash(k); + hash.clear(); + + for (int i = 0; i < nbHash; i++) { + vector[h[i]].remove(k); + } + } + + /** + * Computes the ratio A/FP. + */ + private void computeRatio() { + for (int i = 0; i < vectorSize; i++) { + double keyWeight = getWeight(keyVector[i]); + double fpWeight = getWeight(fpVector[i]); + + if (keyWeight > 0 && fpWeight > 0) { + ratio[i] = keyWeight / fpWeight; + } + } + } + + private double getWeight(List keyList) { + double weight = 0.0; + for (Key k : keyList) { + weight += k.getWeight(); + } + return weight; + } + + /** + * Creates and initialises the various vectors. + */ + @SuppressWarnings("unchecked") + private void createVector() { + fpVector = new List[vectorSize]; + keyVector = new List[vectorSize]; + ratio = new double[vectorSize]; + + for (int i = 0; i < vectorSize; i++) { + fpVector[i] = Collections.synchronizedList(new ArrayList()); + keyVector[i] = Collections.synchronizedList(new ArrayList()); + ratio[i] = 0.0; + } + } + + // Writable + + @Override + public void write(DataOutput out) throws IOException { + super.write(out); + for (int i = 0; i < fpVector.length; i++) { + List list = fpVector[i]; + out.writeInt(list.size()); + for (Key k : list) { + k.write(out); + } + } + for (int i = 0; i < keyVector.length; i++) { + List list = keyVector[i]; + out.writeInt(list.size()); + for (Key k : list) { + k.write(out); + } + } + for (int i = 0; i < ratio.length; i++) { + out.writeDouble(ratio[i]); + } + } + + @Override + public void readFields(DataInput in) throws IOException { + super.readFields(in); + createVector(); + for (int i = 0; i < fpVector.length; i++) { + List list = fpVector[i]; + int size = in.readInt(); + for (int j = 0; j < size; j++) { + Key k = new Key(); + k.readFields(in); + list.add(k); + } + } + for (int i = 0; i < keyVector.length; i++) { + List list = keyVector[i]; + int size = in.readInt(); + for (int j = 0; j < size; j++) { + Key k = new Key(); + k.readFields(in); + list.add(k); + } + } + for (int i = 0; i < ratio.length; i++) { + ratio[i] = in.readDouble(); + } + } +} diff --git a/src/org/thrudb/util/hash/Hash.java b/src/org/thrudb/util/hash/Hash.java new file mode 100644 index 0000000..c986fba --- /dev/null +++ b/src/org/thrudb/util/hash/Hash.java @@ -0,0 +1,117 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.thrudb.util.hash; + +/** + * This class represents a common API for hashing functions. + */ +public abstract class Hash { + /** Constant to denote invalid hash type. */ + public static final int INVALID_HASH = -1; + /** Constant to denote {@link JenkinsHash}. */ + public static final int JENKINS_HASH = 0; + /** Constant to denote {@link MurmurHash}. */ + public static final int MURMUR_HASH = 1; + + /** + * This utility method converts String representation of hash function name + * to a symbolic constant. Currently two function types are supported, + * "jenkins" and "murmur". + * @param name hash function name + * @return one of the predefined constants + */ + public static int parseHashType(String name) { + if ("jenkins".equalsIgnoreCase(name)) { + return JENKINS_HASH; + } else if ("murmur".equalsIgnoreCase(name)) { + return MURMUR_HASH; + } else { + return INVALID_HASH; + } + } + + /** + * This utility method converts the name of the configured + * hash type to a symbolic constant. + * @param conf configuration + * @return one of the predefined constants + */ +/* public static int getHashType(Configuration conf) { + String name = conf.get("hadoop.util.hash.type", "murmur"); + return parseHashType(name); + }*/ + + /** + * Get a singleton instance of hash function of a given type. + * @param type predefined hash type + * @return hash function instance, or null if type is invalid + */ + public static Hash getInstance(int type) { + switch(type) { + case JENKINS_HASH: + return JenkinsHash.getInstance(); + case MURMUR_HASH: + return MurmurHash.getInstance(); + default: + return null; + } + } + + /** + * Get a singleton instance of hash function of a type + * defined in the configuration. + * @param conf current configuration + * @return defined hash type, or null if type is invalid + */ + /*public static Hash getInstance(Configuration conf) { + int type = getHashType(conf); + return getInstance(type); + }*/ + + /** + * Calculate a hash using all bytes from the input argument, and + * a seed of -1. + * @param bytes input bytes + * @return hash value + */ + public int hash(byte[] bytes) { + return hash(bytes, bytes.length, -1); + } + + /** + * Calculate a hash using all bytes from the input argument, + * and a provided seed value. + * @param bytes input bytes + * @param initval seed value + * @return hash value + */ + public int hash(byte[] bytes, int initval) { + return hash(bytes, bytes.length, initval); + } + + /** + * Calculate a hash using bytes from 0 to length, and + * the provided seed value + * @param bytes input bytes + * @param length length of the valid bytes to consider + * @param initval seed value + * @return hash value + */ + public abstract int hash(byte[] bytes, int length, int initval); +} diff --git a/src/org/thrudb/util/hash/JenkinsHash.java b/src/org/thrudb/util/hash/JenkinsHash.java new file mode 100644 index 0000000..db1e129 --- /dev/null +++ b/src/org/thrudb/util/hash/JenkinsHash.java @@ -0,0 +1,258 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.thrudb.util.hash; + +import java.io.FileInputStream; +import java.io.IOException; + +/** + * Produces 32-bit hash for hash table lookup. + * + *

lookup3.c, by Bob Jenkins, May 2006, Public Domain.
+ *
+ * You can use this free for any purpose.  It's in the public domain.
+ * It has no warranty.
+ * 
+ * + * @see lookup3.c + * @see Hash Functions (and how this + * function compares to others such as CRC, MD?, etc + * @see Has update on the + * Dr. Dobbs Article + */ +public class JenkinsHash extends Hash { + private static long INT_MASK = 0x00000000ffffffffL; + private static long BYTE_MASK = 0x00000000000000ffL; + + private static JenkinsHash _instance = new JenkinsHash(); + + public static Hash getInstance() { + return _instance; + } + + private static long rot(long val, int pos) { + return ((Integer.rotateLeft( + (int)(val & INT_MASK), pos)) & INT_MASK); + } + + /** + * taken from hashlittle() -- hash a variable-length key into a 32-bit value + * + * @param key the key (the unaligned variable-length array of bytes) + * @param nbytes number of bytes to include in hash + * @param initval can be any integer value + * @return a 32-bit value. Every bit of the key affects every bit of the + * return value. Two keys differing by one or two bits will have totally + * different hash values. + * + *

The best hash table sizes are powers of 2. There is no need to do mod + * a prime (mod is sooo slow!). If you need less than 32 bits, use a bitmask. + * For example, if you need only 10 bits, do + * h = (h & hashmask(10)); + * In which case, the hash table should have hashsize(10) elements. + * + *

If you are hashing n strings byte[][] k, do it like this: + * for (int i = 0, h = 0; i < n; ++i) h = hash( k[i], h); + * + *

By Bob Jenkins, 2006. bob_jenkins@burtleburtle.net. You may use this + * code any way you wish, private, educational, or commercial. It's free. + * + *

Use for hash table lookup, or anything where one collision in 2^^32 is + * acceptable. Do NOT use for cryptographic purposes. + */ + @SuppressWarnings("fallthrough") + public int hash(byte[] key, int nbytes, int initval) { + int length = nbytes; + long a, b, c; // We use longs because we don't have unsigned ints + a = b = c = (0x00000000deadbeefL + length + initval) & INT_MASK; + int offset = 0; + for (; length > 12; offset += 12, length -= 12) { + a = (a + (key[offset + 0] & BYTE_MASK)) & INT_MASK; + a = (a + (((key[offset + 1] & BYTE_MASK) << 8) & INT_MASK)) & INT_MASK; + a = (a + (((key[offset + 2] & BYTE_MASK) << 16) & INT_MASK)) & INT_MASK; + a = (a + (((key[offset + 3] & BYTE_MASK) << 24) & INT_MASK)) & INT_MASK; + b = (b + (key[offset + 4] & BYTE_MASK)) & INT_MASK; + b = (b + (((key[offset + 5] & BYTE_MASK) << 8) & INT_MASK)) & INT_MASK; + b = (b + (((key[offset + 6] & BYTE_MASK) << 16) & INT_MASK)) & INT_MASK; + b = (b + (((key[offset + 7] & BYTE_MASK) << 24) & INT_MASK)) & INT_MASK; + c = (c + (key[offset + 8] & BYTE_MASK)) & INT_MASK; + c = (c + (((key[offset + 9] & BYTE_MASK) << 8) & INT_MASK)) & INT_MASK; + c = (c + (((key[offset + 10] & BYTE_MASK) << 16) & INT_MASK)) & INT_MASK; + c = (c + (((key[offset + 11] & BYTE_MASK) << 24) & INT_MASK)) & INT_MASK; + + /* + * mix -- mix 3 32-bit values reversibly. + * This is reversible, so any information in (a,b,c) before mix() is + * still in (a,b,c) after mix(). + * + * If four pairs of (a,b,c) inputs are run through mix(), or through + * mix() in reverse, there are at least 32 bits of the output that + * are sometimes the same for one pair and different for another pair. + * + * This was tested for: + * - pairs that differed by one bit, by two bits, in any combination + * of top bits of (a,b,c), or in any combination of bottom bits of + * (a,b,c). + * - "differ" is defined as +, -, ^, or ~^. For + and -, I transformed + * the output delta to a Gray code (a^(a>>1)) so a string of 1's (as + * is commonly produced by subtraction) look like a single 1-bit + * difference. + * - the base values were pseudorandom, all zero but one bit set, or + * all zero plus a counter that starts at zero. + * + * Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that + * satisfy this are + * 4 6 8 16 19 4 + * 9 15 3 18 27 15 + * 14 9 3 7 17 3 + * Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing for + * "differ" defined as + with a one-bit base and a two-bit delta. I + * used http://burtleburtle.net/bob/hash/avalanche.html to choose + * the operations, constants, and arrangements of the variables. + * + * This does not achieve avalanche. There are input bits of (a,b,c) + * that fail to affect some output bits of (a,b,c), especially of a. + * The most thoroughly mixed value is c, but it doesn't really even + * achieve avalanche in c. + * + * This allows some parallelism. Read-after-writes are good at doubling + * the number of bits affected, so the goal of mixing pulls in the + * opposite direction as the goal of parallelism. I did what I could. + * Rotates seem to cost as much as shifts on every machine I could lay + * my hands on, and rotates are much kinder to the top and bottom bits, + * so I used rotates. + * + * #define mix(a,b,c) \ + * { \ + * a -= c; a ^= rot(c, 4); c += b; \ + * b -= a; b ^= rot(a, 6); a += c; \ + * c -= b; c ^= rot(b, 8); b += a; \ + * a -= c; a ^= rot(c,16); c += b; \ + * b -= a; b ^= rot(a,19); a += c; \ + * c -= b; c ^= rot(b, 4); b += a; \ + * } + * + * mix(a,b,c); + */ + a = (a - c) & INT_MASK; a ^= rot(c, 4); c = (c + b) & INT_MASK; + b = (b - a) & INT_MASK; b ^= rot(a, 6); a = (a + c) & INT_MASK; + c = (c - b) & INT_MASK; c ^= rot(b, 8); b = (b + a) & INT_MASK; + a = (a - c) & INT_MASK; a ^= rot(c,16); c = (c + b) & INT_MASK; + b = (b - a) & INT_MASK; b ^= rot(a,19); a = (a + c) & INT_MASK; + c = (c - b) & INT_MASK; c ^= rot(b, 4); b = (b + a) & INT_MASK; + } + + //-------------------------------- last block: affect all 32 bits of (c) + switch (length) { // all the case statements fall through + case 12: + c = (c + (((key[offset + 11] & BYTE_MASK) << 24) & INT_MASK)) & INT_MASK; + case 11: + c = (c + (((key[offset + 10] & BYTE_MASK) << 16) & INT_MASK)) & INT_MASK; + case 10: + c = (c + (((key[offset + 9] & BYTE_MASK) << 8) & INT_MASK)) & INT_MASK; + case 9: + c = (c + (key[offset + 8] & BYTE_MASK)) & INT_MASK; + case 8: + b = (b + (((key[offset + 7] & BYTE_MASK) << 24) & INT_MASK)) & INT_MASK; + case 7: + b = (b + (((key[offset + 6] & BYTE_MASK) << 16) & INT_MASK)) & INT_MASK; + case 6: + b = (b + (((key[offset + 5] & BYTE_MASK) << 8) & INT_MASK)) & INT_MASK; + case 5: + b = (b + (key[offset + 4] & BYTE_MASK)) & INT_MASK; + case 4: + a = (a + (((key[offset + 3] & BYTE_MASK) << 24) & INT_MASK)) & INT_MASK; + case 3: + a = (a + (((key[offset + 2] & BYTE_MASK) << 16) & INT_MASK)) & INT_MASK; + case 2: + a = (a + (((key[offset + 1] & BYTE_MASK) << 8) & INT_MASK)) & INT_MASK; + case 1: + a = (a + (key[offset + 0] & BYTE_MASK)) & INT_MASK; + break; + case 0: + return (int)(c & INT_MASK); + } + /* + * final -- final mixing of 3 32-bit values (a,b,c) into c + * + * Pairs of (a,b,c) values differing in only a few bits will usually + * produce values of c that look totally different. This was tested for + * - pairs that differed by one bit, by two bits, in any combination + * of top bits of (a,b,c), or in any combination of bottom bits of + * (a,b,c). + * + * - "differ" is defined as +, -, ^, or ~^. For + and -, I transformed + * the output delta to a Gray code (a^(a>>1)) so a string of 1's (as + * is commonly produced by subtraction) look like a single 1-bit + * difference. + * + * - the base values were pseudorandom, all zero but one bit set, or + * all zero plus a counter that starts at zero. + * + * These constants passed: + * 14 11 25 16 4 14 24 + * 12 14 25 16 4 14 24 + * and these came close: + * 4 8 15 26 3 22 24 + * 10 8 15 26 3 22 24 + * 11 8 15 26 3 22 24 + * + * #define final(a,b,c) \ + * { + * c ^= b; c -= rot(b,14); \ + * a ^= c; a -= rot(c,11); \ + * b ^= a; b -= rot(a,25); \ + * c ^= b; c -= rot(b,16); \ + * a ^= c; a -= rot(c,4); \ + * b ^= a; b -= rot(a,14); \ + * c ^= b; c -= rot(b,24); \ + * } + * + */ + c ^= b; c = (c - rot(b,14)) & INT_MASK; + a ^= c; a = (a - rot(c,11)) & INT_MASK; + b ^= a; b = (b - rot(a,25)) & INT_MASK; + c ^= b; c = (c - rot(b,16)) & INT_MASK; + a ^= c; a = (a - rot(c,4)) & INT_MASK; + b ^= a; b = (b - rot(a,14)) & INT_MASK; + c ^= b; c = (c - rot(b,24)) & INT_MASK; + + return (int)(c & INT_MASK); + } + + /** + * Compute the hash of the specified file + * @param args name of file to compute hash of. + * @throws IOException + */ + public static void main(String[] args) throws IOException { + if (args.length != 1) { + System.err.println("Usage: JenkinsHash filename"); + System.exit(-1); + } + FileInputStream in = new FileInputStream(args[0]); + byte[] bytes = new byte[512]; + int value = 0; + JenkinsHash hash = new JenkinsHash(); + for (int length = in.read(bytes); length > 0 ; length = in.read(bytes)) { + value = hash.hash(bytes, length, value); + } + System.out.println(Math.abs(value)); + } +} diff --git a/src/org/thrudb/util/hash/MurmurHash.java b/src/org/thrudb/util/hash/MurmurHash.java new file mode 100644 index 0000000..2e5d410 --- /dev/null +++ b/src/org/thrudb/util/hash/MurmurHash.java @@ -0,0 +1,83 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.thrudb.util.hash; + +/** + * This is a very fast, non-cryptographic hash suitable for general hash-based + * lookup. See http://murmurhash.googlepages.com/ for more details. + * + *

The C version of MurmurHash 2.0 found at that site was ported + * to Java by Andrzej Bialecki (ab at getopt org).

+ */ +public class MurmurHash extends Hash { + private static MurmurHash _instance = new MurmurHash(); + + public static Hash getInstance() { + return _instance; + } + + public int hash(byte[] data, int length, int seed) { + int m = 0x5bd1e995; + int r = 24; + + int h = seed ^ length; + + int len_4 = length >> 2; + + for (int i = 0; i < len_4; i++) { + int i_4 = i << 2; + int k = data[i_4 + 3]; + k = k << 8; + k = k | (data[i_4 + 2] & 0xff); + k = k << 8; + k = k | (data[i_4 + 1] & 0xff); + k = k << 8; + k = k | (data[i_4 + 0] & 0xff); + k *= m; + k ^= k >>> r; + k *= m; + h *= m; + h ^= k; + } + + // avoid calculating modulo + int len_m = len_4 << 2; + int left = length - len_m; + + if (left != 0) { + if (left >= 3) { + h ^= (int) data[length - 3] << 16; + } + if (left >= 2) { + h ^= (int) data[length - 2] << 8; + } + if (left >= 1) { + h ^= (int) data[length - 1]; + } + + h *= m; + } + + h ^= h >>> 13; + h *= m; + h ^= h >>> 15; + + return h; + } +} diff --git a/test/org/thrudb/thrudoc/ThrudocTests.java b/test/org/thrudb/thrudoc/ThrudocTests.java new file mode 100644 index 0000000..6e6d8b7 --- /dev/null +++ b/test/org/thrudb/thrudoc/ThrudocTests.java @@ -0,0 +1,89 @@ +package org.thrudb.thrudoc; + +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; + +import junit.framework.TestCase; + +import org.apache.thrift.TException; +import org.apache.thrift.protocol.TBinaryProtocol; +import org.apache.thrift.transport.TFramedTransport; +import org.apache.thrift.transport.TSocket; + +public class ThrudocTests extends TestCase { + + private ExecutorService serviceThread = Executors.newSingleThreadExecutor(); + private Thrudoc.Client client; + private TFramedTransport transport; + + + public void setUp() { + serviceThread.submit(new ThrudocTestService()); + + try{ + Thread.sleep(1000); + }catch(InterruptedException e){ + fail(e.getMessage()); + } + + TSocket socket = new TSocket("localhost", 11291 ); + transport = new TFramedTransport(socket); + TBinaryProtocol protocol = new TBinaryProtocol(transport); + + client = new Thrudoc.Client(protocol); + + try{ + transport.open(); + }catch(TException e){ + fail(e.getMessage()); + } + } + + public void tearDown(){ + transport.close(); + serviceThread.shutdown(); + } + + class ThrudocTestService implements Runnable { + private ThrudocServer thrudocServer = new ThrudocServer(); + + + public void run() { + try{ + + thrudocServer.setDocRoot("."); + thrudocServer.setPort(11291); + thrudocServer.setThreadCount(5); + thrudocServer.start(); + }catch(Throwable t){ + thrudocServer.stop(); + fail(t.toString()); + } + } + } + + + public void testMapOperations(){ + String bucket = "test_bucket"; + + try{ + client.delete_bucket(bucket); + client.create_bucket(bucket); + + try{ + client.get(bucket, "key"); + fail("key should not exist"); + }catch(InvalidKeyException e){} + + client.put(bucket, "key", "value".getBytes()); + + assertTrue("value".equals(new String(client.get(bucket, "key")))); + + }catch(Throwable t){ + t.printStackTrace(); + fail(t.getMessage()); + } + } + + +} diff --git a/test/org/thrudb/thrudoc/tokyocabinet/TokyoCabinetTests.java b/test/org/thrudb/thrudoc/tokyocabinet/TokyoCabinetTests.java index 3d97ce9..762a01d 100644 --- a/test/org/thrudb/thrudoc/tokyocabinet/TokyoCabinetTests.java +++ b/test/org/thrudb/thrudoc/tokyocabinet/TokyoCabinetTests.java @@ -63,12 +63,10 @@ public void testCRUD(){ //re-Retrieve for(int i=0; i<100; i++){ - try{ - tdb.get("key"+Integer.toString(i)); + + if(tdb.get("key"+Integer.toString(i)) != null) fail("key still exists after removed"); - }catch(InvalidKeyException ex){ - //this is good - } + } }catch(Throwable t){