Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

python classes generated by thrifts compiler for http://trec-kba.org/…

  • Loading branch information...
commit 1cb59b79155b497c39f524673827d38e2929ac5d 1 parent 00f5524
@jrfgit jrfgit authored
View
1  kba_thrift/__init__.py
@@ -0,0 +1 @@
+__all__ = ['ttypes', 'constants']
View
11 kba_thrift/constants.py
@@ -0,0 +1,11 @@
+#
+# Autogenerated by Thrift Compiler (0.8.0)
+#
+# DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+#
+# options string: py
+#
+
+from thrift.Thrift import TType, TMessageType, TException
+from ttypes import *
+
View
565 kba_thrift/ttypes.py
@@ -0,0 +1,565 @@
+#
+# Autogenerated by Thrift Compiler (0.8.0)
+#
+# DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+#
+# options string: py
+#
+
+from thrift.Thrift import TType, TMessageType, TException
+
+from thrift.transport import TTransport
+from thrift.protocol import TBinaryProtocol, TProtocol
+try:
+ from thrift.protocol import fastbinary
+except:
+ fastbinary = None
+
+
+
+class ContentItem:
+ """
+ ContentItem is the thrift analog of
+ http://trec-kba.org/schemas/v1.0/content-item.json
+
+ The JSON version has a 'stages' property that contains descriptions
+ **and also names** of additional properties on the ContentItem.
+ That was overly flexible. Each content-item in the KBA corpus can
+ have a 'cleansed' and 'ner' property. 'cleansed' is generated from
+ 'raw', and 'ner' is generated from 'cleansed.' Generally,
+ 'cleansed' is a tag-stripped version of 'raw', and 'ner' is the
+ output of a named entity recognizer that generates
+ one-word-per-line output.
+
+ For the kba-stream-corpus-2012, the specific tag-stripping and NER
+ configurations were:
+ 'raw' --> boilerpipe 1.2.0 ArticleExtractor --> 'cleansed'
+
+ 'cleansed' -> Stanford CoreNLP ver 1.2.0 with annotators
+ {tokenize, cleanxml, ssplit, pos, lemma, ner}, property
+ pos.maxlen=100" --> 'ner'
+
+ Attributes:
+ - raw
+ - encoding
+ - cleansed
+ - ner
+ """
+
+ thrift_spec = (
+ None, # 0
+ (1, TType.STRING, 'raw', None, None, ), # 1
+ (2, TType.STRING, 'encoding', None, None, ), # 2
+ (3, TType.STRING, 'cleansed', None, None, ), # 3
+ (4, TType.STRING, 'ner', None, None, ), # 4
+ )
+
+ def __init__(self, raw=None, encoding=None, cleansed=None, ner=None,):
+ self.raw = raw
+ self.encoding = encoding
+ self.cleansed = cleansed
+ self.ner = ner
+
+ def read(self, iprot):
+ if iprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None and fastbinary is not None:
+ fastbinary.decode_binary(self, iprot.trans, (self.__class__, self.thrift_spec))
+ return
+ iprot.readStructBegin()
+ while True:
+ (fname, ftype, fid) = iprot.readFieldBegin()
+ if ftype == TType.STOP:
+ break
+ if fid == 1:
+ if ftype == TType.STRING:
+ self.raw = iprot.readString();
+ else:
+ iprot.skip(ftype)
+ elif fid == 2:
+ if ftype == TType.STRING:
+ self.encoding = iprot.readString();
+ else:
+ iprot.skip(ftype)
+ elif fid == 3:
+ if ftype == TType.STRING:
+ self.cleansed = iprot.readString();
+ else:
+ iprot.skip(ftype)
+ elif fid == 4:
+ if ftype == TType.STRING:
+ self.ner = iprot.readString();
+ else:
+ iprot.skip(ftype)
+ else:
+ iprot.skip(ftype)
+ iprot.readFieldEnd()
+ iprot.readStructEnd()
+
+ def write(self, oprot):
+ if oprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and self.thrift_spec is not None and fastbinary is not None:
+ oprot.trans.write(fastbinary.encode_binary(self, (self.__class__, self.thrift_spec)))
+ return
+ oprot.writeStructBegin('ContentItem')
+ if self.raw is not None:
+ oprot.writeFieldBegin('raw', TType.STRING, 1)
+ oprot.writeString(self.raw)
+ oprot.writeFieldEnd()
+ if self.encoding is not None:
+ oprot.writeFieldBegin('encoding', TType.STRING, 2)
+ oprot.writeString(self.encoding)
+ oprot.writeFieldEnd()
+ if self.cleansed is not None:
+ oprot.writeFieldBegin('cleansed', TType.STRING, 3)
+ oprot.writeString(self.cleansed)
+ oprot.writeFieldEnd()
+ if self.ner is not None:
+ oprot.writeFieldBegin('ner', TType.STRING, 4)
+ oprot.writeString(self.ner)
+ oprot.writeFieldEnd()
+ oprot.writeFieldStop()
+ oprot.writeStructEnd()
+
+ def validate(self):
+ return
+
+
+ def __repr__(self):
+ L = ['%s=%r' % (key, value)
+ for key, value in self.__dict__.iteritems()]
+ return '%s(%s)' % (self.__class__.__name__, ', '.join(L))
+
+ def __eq__(self, other):
+ return isinstance(other, self.__class__) and self.__dict__ == other.__dict__
+
+ def __ne__(self, other):
+ return not (self == other)
+
+class CorpusItem:
+ """
+ CorpusItem is the thrift equivalent of
+ http://trec-kba.org/schemas/v1.0/corpus-item.json
+
+ Attributes:
+ - doc_id
+ - abs_url
+ - schost
+ - original_url
+ - source
+ - title
+ - body
+ - anchor
+ - source_metadata
+ """
+
+ thrift_spec = (
+ None, # 0
+ (1, TType.STRING, 'doc_id', None, None, ), # 1
+ (2, TType.STRING, 'abs_url', None, None, ), # 2
+ (3, TType.STRING, 'schost', None, None, ), # 3
+ (4, TType.STRING, 'original_url', None, None, ), # 4
+ (5, TType.STRING, 'source', None, None, ), # 5
+ (6, TType.STRUCT, 'title', (ContentItem, ContentItem.thrift_spec), None, ), # 6
+ (7, TType.STRUCT, 'body', (ContentItem, ContentItem.thrift_spec), None, ), # 7
+ (8, TType.STRUCT, 'anchor', (ContentItem, ContentItem.thrift_spec), None, ), # 8
+ (9, TType.STRING, 'source_metadata', None, None, ), # 9
+ )
+
+ def __init__(self, doc_id=None, abs_url=None, schost=None, original_url=None, source=None, title=None, body=None, anchor=None, source_metadata=None,):
+ self.doc_id = doc_id
+ self.abs_url = abs_url
+ self.schost = schost
+ self.original_url = original_url
+ self.source = source
+ self.title = title
+ self.body = body
+ self.anchor = anchor
+ self.source_metadata = source_metadata
+
+ def read(self, iprot):
+ if iprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None and fastbinary is not None:
+ fastbinary.decode_binary(self, iprot.trans, (self.__class__, self.thrift_spec))
+ return
+ iprot.readStructBegin()
+ while True:
+ (fname, ftype, fid) = iprot.readFieldBegin()
+ if ftype == TType.STOP:
+ break
+ if fid == 1:
+ if ftype == TType.STRING:
+ self.doc_id = iprot.readString();
+ else:
+ iprot.skip(ftype)
+ elif fid == 2:
+ if ftype == TType.STRING:
+ self.abs_url = iprot.readString();
+ else:
+ iprot.skip(ftype)
+ elif fid == 3:
+ if ftype == TType.STRING:
+ self.schost = iprot.readString();
+ else:
+ iprot.skip(ftype)
+ elif fid == 4:
+ if ftype == TType.STRING:
+ self.original_url = iprot.readString();
+ else:
+ iprot.skip(ftype)
+ elif fid == 5:
+ if ftype == TType.STRING:
+ self.source = iprot.readString();
+ else:
+ iprot.skip(ftype)
+ elif fid == 6:
+ if ftype == TType.STRUCT:
+ self.title = ContentItem()
+ self.title.read(iprot)
+ else:
+ iprot.skip(ftype)
+ elif fid == 7:
+ if ftype == TType.STRUCT:
+ self.body = ContentItem()
+ self.body.read(iprot)
+ else:
+ iprot.skip(ftype)
+ elif fid == 8:
+ if ftype == TType.STRUCT:
+ self.anchor = ContentItem()
+ self.anchor.read(iprot)
+ else:
+ iprot.skip(ftype)
+ elif fid == 9:
+ if ftype == TType.STRING:
+ self.source_metadata = iprot.readString();
+ else:
+ iprot.skip(ftype)
+ else:
+ iprot.skip(ftype)
+ iprot.readFieldEnd()
+ iprot.readStructEnd()
+
+ def write(self, oprot):
+ if oprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and self.thrift_spec is not None and fastbinary is not None:
+ oprot.trans.write(fastbinary.encode_binary(self, (self.__class__, self.thrift_spec)))
+ return
+ oprot.writeStructBegin('CorpusItem')
+ if self.doc_id is not None:
+ oprot.writeFieldBegin('doc_id', TType.STRING, 1)
+ oprot.writeString(self.doc_id)
+ oprot.writeFieldEnd()
+ if self.abs_url is not None:
+ oprot.writeFieldBegin('abs_url', TType.STRING, 2)
+ oprot.writeString(self.abs_url)
+ oprot.writeFieldEnd()
+ if self.schost is not None:
+ oprot.writeFieldBegin('schost', TType.STRING, 3)
+ oprot.writeString(self.schost)
+ oprot.writeFieldEnd()
+ if self.original_url is not None:
+ oprot.writeFieldBegin('original_url', TType.STRING, 4)
+ oprot.writeString(self.original_url)
+ oprot.writeFieldEnd()
+ if self.source is not None:
+ oprot.writeFieldBegin('source', TType.STRING, 5)
+ oprot.writeString(self.source)
+ oprot.writeFieldEnd()
+ if self.title is not None:
+ oprot.writeFieldBegin('title', TType.STRUCT, 6)
+ self.title.write(oprot)
+ oprot.writeFieldEnd()
+ if self.body is not None:
+ oprot.writeFieldBegin('body', TType.STRUCT, 7)
+ self.body.write(oprot)
+ oprot.writeFieldEnd()
+ if self.anchor is not None:
+ oprot.writeFieldBegin('anchor', TType.STRUCT, 8)
+ self.anchor.write(oprot)
+ oprot.writeFieldEnd()
+ if self.source_metadata is not None:
+ oprot.writeFieldBegin('source_metadata', TType.STRING, 9)
+ oprot.writeString(self.source_metadata)
+ oprot.writeFieldEnd()
+ oprot.writeFieldStop()
+ oprot.writeStructEnd()
+
+ def validate(self):
+ return
+
+
+ def __repr__(self):
+ L = ['%s=%r' % (key, value)
+ for key, value in self.__dict__.iteritems()]
+ return '%s(%s)' % (self.__class__.__name__, ', '.join(L))
+
+ def __eq__(self, other):
+ return isinstance(other, self.__class__) and self.__dict__ == other.__dict__
+
+ def __ne__(self, other):
+ return not (self == other)
+
+class StreamTime:
+ """
+ StreamTime is a timestamp measured in seconds since the 1970 epoch.
+ 'news', 'linking', and 'social' each have slightly different ways
+ of generating these timestamps. See details:
+ http://trec-kba.org/kba-stream-corpus-2012.shtml
+
+ Attributes:
+ - epoch_ticks
+ - zulu_timestamp
+ """
+
+ thrift_spec = (
+ None, # 0
+ (1, TType.DOUBLE, 'epoch_ticks', None, None, ), # 1
+ (2, TType.STRING, 'zulu_timestamp', None, None, ), # 2
+ )
+
+ def __init__(self, epoch_ticks=None, zulu_timestamp=None,):
+ self.epoch_ticks = epoch_ticks
+ self.zulu_timestamp = zulu_timestamp
+
+ def read(self, iprot):
+ if iprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None and fastbinary is not None:
+ fastbinary.decode_binary(self, iprot.trans, (self.__class__, self.thrift_spec))
+ return
+ iprot.readStructBegin()
+ while True:
+ (fname, ftype, fid) = iprot.readFieldBegin()
+ if ftype == TType.STOP:
+ break
+ if fid == 1:
+ if ftype == TType.DOUBLE:
+ self.epoch_ticks = iprot.readDouble();
+ else:
+ iprot.skip(ftype)
+ elif fid == 2:
+ if ftype == TType.STRING:
+ self.zulu_timestamp = iprot.readString();
+ else:
+ iprot.skip(ftype)
+ else:
+ iprot.skip(ftype)
+ iprot.readFieldEnd()
+ iprot.readStructEnd()
+
+ def write(self, oprot):
+ if oprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and self.thrift_spec is not None and fastbinary is not None:
+ oprot.trans.write(fastbinary.encode_binary(self, (self.__class__, self.thrift_spec)))
+ return
+ oprot.writeStructBegin('StreamTime')
+ if self.epoch_ticks is not None:
+ oprot.writeFieldBegin('epoch_ticks', TType.DOUBLE, 1)
+ oprot.writeDouble(self.epoch_ticks)
+ oprot.writeFieldEnd()
+ if self.zulu_timestamp is not None:
+ oprot.writeFieldBegin('zulu_timestamp', TType.STRING, 2)
+ oprot.writeString(self.zulu_timestamp)
+ oprot.writeFieldEnd()
+ oprot.writeFieldStop()
+ oprot.writeStructEnd()
+
+ def validate(self):
+ return
+
+
+ def __repr__(self):
+ L = ['%s=%r' % (key, value)
+ for key, value in self.__dict__.iteritems()]
+ return '%s(%s)' % (self.__class__.__name__, ', '.join(L))
+
+ def __eq__(self, other):
+ return isinstance(other, self.__class__) and self.__dict__ == other.__dict__
+
+ def __ne__(self, other):
+ return not (self == other)
+
+class StreamItem:
+ """
+ This is the primary interface to the data. StreamItem is the
+ thrift equivalent of
+ http://trec-kba.org/schemas/v1.0/stream-item.json
+
+ which extends corpus-item.json. For better or worse, thrift does
+ not support inheritence on struct, so this copies the first nine
+ fields of CorpusItem and then adds two more fields.
+
+ Attributes:
+ - doc_id
+ - abs_url
+ - schost
+ - original_url
+ - source
+ - title
+ - body
+ - anchor
+ - source_metadata
+ - stream_id
+ - stream_time
+ """
+
+ thrift_spec = (
+ None, # 0
+ (1, TType.STRING, 'doc_id', None, None, ), # 1
+ (2, TType.STRING, 'abs_url', None, None, ), # 2
+ (3, TType.STRING, 'schost', None, None, ), # 3
+ (4, TType.STRING, 'original_url', None, None, ), # 4
+ (5, TType.STRING, 'source', None, None, ), # 5
+ (6, TType.STRUCT, 'title', (ContentItem, ContentItem.thrift_spec), None, ), # 6
+ (7, TType.STRUCT, 'body', (ContentItem, ContentItem.thrift_spec), None, ), # 7
+ (8, TType.STRUCT, 'anchor', (ContentItem, ContentItem.thrift_spec), None, ), # 8
+ (9, TType.STRING, 'source_metadata', None, None, ), # 9
+ (10, TType.STRING, 'stream_id', None, None, ), # 10
+ (11, TType.STRUCT, 'stream_time', (StreamTime, StreamTime.thrift_spec), None, ), # 11
+ )
+
+ def __init__(self, doc_id=None, abs_url=None, schost=None, original_url=None, source=None, title=None, body=None, anchor=None, source_metadata=None, stream_id=None, stream_time=None,):
+ self.doc_id = doc_id
+ self.abs_url = abs_url
+ self.schost = schost
+ self.original_url = original_url
+ self.source = source
+ self.title = title
+ self.body = body
+ self.anchor = anchor
+ self.source_metadata = source_metadata
+ self.stream_id = stream_id
+ self.stream_time = stream_time
+
+ def read(self, iprot):
+ if iprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None and fastbinary is not None:
+ fastbinary.decode_binary(self, iprot.trans, (self.__class__, self.thrift_spec))
+ return
+ iprot.readStructBegin()
+ while True:
+ (fname, ftype, fid) = iprot.readFieldBegin()
+ if ftype == TType.STOP:
+ break
+ if fid == 1:
+ if ftype == TType.STRING:
+ self.doc_id = iprot.readString();
+ else:
+ iprot.skip(ftype)
+ elif fid == 2:
+ if ftype == TType.STRING:
+ self.abs_url = iprot.readString();
+ else:
+ iprot.skip(ftype)
+ elif fid == 3:
+ if ftype == TType.STRING:
+ self.schost = iprot.readString();
+ else:
+ iprot.skip(ftype)
+ elif fid == 4:
+ if ftype == TType.STRING:
+ self.original_url = iprot.readString();
+ else:
+ iprot.skip(ftype)
+ elif fid == 5:
+ if ftype == TType.STRING:
+ self.source = iprot.readString();
+ else:
+ iprot.skip(ftype)
+ elif fid == 6:
+ if ftype == TType.STRUCT:
+ self.title = ContentItem()
+ self.title.read(iprot)
+ else:
+ iprot.skip(ftype)
+ elif fid == 7:
+ if ftype == TType.STRUCT:
+ self.body = ContentItem()
+ self.body.read(iprot)
+ else:
+ iprot.skip(ftype)
+ elif fid == 8:
+ if ftype == TType.STRUCT:
+ self.anchor = ContentItem()
+ self.anchor.read(iprot)
+ else:
+ iprot.skip(ftype)
+ elif fid == 9:
+ if ftype == TType.STRING:
+ self.source_metadata = iprot.readString();
+ else:
+ iprot.skip(ftype)
+ elif fid == 10:
+ if ftype == TType.STRING:
+ self.stream_id = iprot.readString();
+ else:
+ iprot.skip(ftype)
+ elif fid == 11:
+ if ftype == TType.STRUCT:
+ self.stream_time = StreamTime()
+ self.stream_time.read(iprot)
+ else:
+ iprot.skip(ftype)
+ else:
+ iprot.skip(ftype)
+ iprot.readFieldEnd()
+ iprot.readStructEnd()
+
+ def write(self, oprot):
+ if oprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and self.thrift_spec is not None and fastbinary is not None:
+ oprot.trans.write(fastbinary.encode_binary(self, (self.__class__, self.thrift_spec)))
+ return
+ oprot.writeStructBegin('StreamItem')
+ if self.doc_id is not None:
+ oprot.writeFieldBegin('doc_id', TType.STRING, 1)
+ oprot.writeString(self.doc_id)
+ oprot.writeFieldEnd()
+ if self.abs_url is not None:
+ oprot.writeFieldBegin('abs_url', TType.STRING, 2)
+ oprot.writeString(self.abs_url)
+ oprot.writeFieldEnd()
+ if self.schost is not None:
+ oprot.writeFieldBegin('schost', TType.STRING, 3)
+ oprot.writeString(self.schost)
+ oprot.writeFieldEnd()
+ if self.original_url is not None:
+ oprot.writeFieldBegin('original_url', TType.STRING, 4)
+ oprot.writeString(self.original_url)
+ oprot.writeFieldEnd()
+ if self.source is not None:
+ oprot.writeFieldBegin('source', TType.STRING, 5)
+ oprot.writeString(self.source)
+ oprot.writeFieldEnd()
+ if self.title is not None:
+ oprot.writeFieldBegin('title', TType.STRUCT, 6)
+ self.title.write(oprot)
+ oprot.writeFieldEnd()
+ if self.body is not None:
+ oprot.writeFieldBegin('body', TType.STRUCT, 7)
+ self.body.write(oprot)
+ oprot.writeFieldEnd()
+ if self.anchor is not None:
+ oprot.writeFieldBegin('anchor', TType.STRUCT, 8)
+ self.anchor.write(oprot)
+ oprot.writeFieldEnd()
+ if self.source_metadata is not None:
+ oprot.writeFieldBegin('source_metadata', TType.STRING, 9)
+ oprot.writeString(self.source_metadata)
+ oprot.writeFieldEnd()
+ if self.stream_id is not None:
+ oprot.writeFieldBegin('stream_id', TType.STRING, 10)
+ oprot.writeString(self.stream_id)
+ oprot.writeFieldEnd()
+ if self.stream_time is not None:
+ oprot.writeFieldBegin('stream_time', TType.STRUCT, 11)
+ self.stream_time.write(oprot)
+ oprot.writeFieldEnd()
+ oprot.writeFieldStop()
+ oprot.writeStructEnd()
+
+ def validate(self):
+ return
+
+
+ def __repr__(self):
+ L = ['%s=%r' % (key, value)
+ for key, value in self.__dict__.iteritems()]
+ return '%s(%s)' % (self.__class__.__name__, ', '.join(L))
+
+ def __eq__(self, other):
+ return isinstance(other, self.__class__) and self.__dict__ == other.__dict__
+
+ def __ne__(self, other):
+ return not (self == other)
Please sign in to comment.
Something went wrong with that request. Please try again.