Skip to content

Commit

Permalink
Fix bug created by fastavro API change.
Browse files Browse the repository at this point in the history
Also pin `fastavro` version to prevent this from happening in the
future.
  • Loading branch information
mtth committed Aug 25, 2015
1 parent 4720160 commit 7ea9d30
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 26 deletions.
25 changes: 18 additions & 7 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,26 @@ API and command line interface for HDFS.
In [1]: CLIENT.list('models/')
Out[1]: ['1.json', '2.json']
In [2]: with CLIENT.read('models/2.json', encoding='utf-8') as reader:
In [2]: CLIENT.status('models/2.json')
Out[2]: {
'accessTime': 1439743128690,
'blockSize': 134217728,
'childrenNum': 0,
'fileId': 16389,
'group': 'supergroup',
'length': 48,
'modificationTime': 1439743129392,
'owner': 'drwho',
'pathSuffix': '',
'permission': '755',
'replication': 1,
'storagePolicy': 0,
'type': 'FILE'
}
In [3]: with CLIENT.read('models/2.json', encoding='utf-8') as reader:
...: from json import load
...: model = load(reader)
...: model['normalize'] = False
...:
In [3]: with CLIENT.write('models/2.json', encoding='utf-8', overwrite=True) as writer:
...: from json import dump
...: dump(model, writer)
...:
Expand Down
19 changes: 8 additions & 11 deletions examples/json.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,25 @@
#!/usr/bin/env python
# encoding: utf-8

"""Sample HdfsCLI script.
In this script, we show how to transfer JSON-serialized data to and from HDFS.
"""
"""Sample HdfsCLI script."""

from hdfs import Config
from json import dumps, loads
from json import dumps, load


# Get the default alias' client.
client = Config().get_client()

# Some sample data.
# Our new model.
weights = {
'first_feature': 48,
'second_feature': 12,
'(intercept)': 48.,
'first_feature': 2.,
'second_feature': 12.,
# ...
}

# The path on HDFS where we will store the file.
path = 'static/weights.json'
path = 'models/3.json'

# Serialize to JSON and upload to HDFS.
data = dumps(weights)
Expand All @@ -34,4 +31,4 @@

# Download the file back and check that the deserialized contents match.
with client.read(path, encoding='utf-8') as reader:
assert loads(contents) == weights
assert load(reader) == weights
11 changes: 5 additions & 6 deletions hdfs/ext/avro/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,12 +309,11 @@ def _write(self, fo):

def dump_header():
"""Write header."""
fastavro._writer.write_header(
fo,
self._schema,
self._codec,
self._sync_marker
)
metadata = {
'avro.codec': self._codec,
'avro.schema': dumps(self._schema),
}
fastavro._writer.write_header(fo, metadata, self._sync_marker)
_logger.debug('Wrote header. Sync marker: %r', self._sync_marker)
fastavro._writer.acquaint_schema(self._schema)

Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,9 @@ def _get_long_description():
'six>=1.9.0',
],
extras_require={
'avro': ['fastavro>=0.8.6'],
'avro': ['fastavro==0.9.2'],
'kerberos': ['requests-kerberos>=0.7.0'],
'dataframe': ['fastavro>=0.8.6', 'pandas>=0.14.1'],
'dataframe': ['fastavro==0.9.2', 'pandas>=0.14.1'],
},
entry_points={'console_scripts': [
'%s = hdfs.__main__:main' % (ENTRY_POINT, ),
Expand Down

0 comments on commit 7ea9d30

Please sign in to comment.