From 7ea9d30430278e9ab6f763c99b16eaa1f5fb8709 Mon Sep 17 00:00:00 2001
From: Matthieu Monsch <monsch@alum.mit.edu>
Date: Tue, 25 Aug 2015 08:19:43 -0700
Subject: [PATCH] Fix bug created by `fastavro` API change.

Also pin `fastavro` version to prevent this from happening in the
future.
---
 README.rst                | 25 ++++++++++++++++++-------
 examples/json.py          | 19 ++++++++-----------
 hdfs/ext/avro/__init__.py | 11 +++++------
 setup.py                  |  4 ++--
 4 files changed, 33 insertions(+), 26 deletions(-)

diff --git a/README.rst b/README.rst
index 064dae8..d187529 100644
--- a/README.rst
+++ b/README.rst
@@ -22,15 +22,26 @@ API and command line interface for HDFS.
   In [1]: CLIENT.list('models/')
   Out[1]: ['1.json', '2.json']
 
-  In [2]: with CLIENT.read('models/2.json', encoding='utf-8') as reader:
+  In [2]: CLIENT.status('models/2.json')
+  Out[2]: {
+    'accessTime': 1439743128690,
+    'blockSize': 134217728,
+    'childrenNum': 0,
+    'fileId': 16389,
+    'group': 'supergroup',
+    'length': 48,
+    'modificationTime': 1439743129392,
+    'owner': 'drwho',
+    'pathSuffix': '',
+    'permission': '755',
+    'replication': 1,
+    'storagePolicy': 0,
+    'type': 'FILE'
+  }
+
+  In [3]: with CLIENT.read('models/2.json', encoding='utf-8') as reader:
     ...:     from json import load
     ...:     model = load(reader)
-    ...:     model['normalize'] = False
-    ...:
-
-  In [3]: with CLIENT.write('models/2.json', encoding='utf-8', overwrite=True) as writer:
-    ...:     from json import dump
-    ...:     dump(model, writer)
     ...:
 
 
diff --git a/examples/json.py b/examples/json.py
index c22a0c8..72fb25b 100644
--- a/examples/json.py
+++ b/examples/json.py
@@ -1,28 +1,25 @@
 #!/usr/bin/env python
 # encoding: utf-8
 
-"""Sample HdfsCLI script.
-
-In this script, we show how to transfer JSON-serialized data to and from HDFS.
-
-"""
+"""Sample HdfsCLI script."""
 
 from hdfs import Config
-from json import dumps, loads
+from json import dumps, load
 
 
 # Get the default alias' client.
 client = Config().get_client()
 
-# Some sample data.
+# Our new model.
 weights = {
-  'first_feature': 48,
-  'second_feature': 12,
+  '(intercept)': 48.,
+  'first_feature': 2.,
+  'second_feature': 12.,
   # ...
 }
 
 # The path on HDFS where we will store the file.
-path = 'static/weights.json'
+path = 'models/3.json'
 
 # Serialize to JSON and upload to HDFS.
 data = dumps(weights)
@@ -34,4 +31,4 @@
 
 # Download the file back and check that the deserialized contents match.
 with client.read(path, encoding='utf-8') as reader:
-  assert loads(contents) == weights
+  assert load(reader) == weights
diff --git a/hdfs/ext/avro/__init__.py b/hdfs/ext/avro/__init__.py
index 2803c80..5e375e3 100644
--- a/hdfs/ext/avro/__init__.py
+++ b/hdfs/ext/avro/__init__.py
@@ -309,12 +309,11 @@ def _write(self, fo):
 
     def dump_header():
       """Write header."""
-      fastavro._writer.write_header(
-        fo,
-        self._schema,
-        self._codec,
-        self._sync_marker
-      )
+      metadata = {
+        'avro.codec': self._codec,
+        'avro.schema': dumps(self._schema),
+      }
+      fastavro._writer.write_header(fo, metadata, self._sync_marker)
       _logger.debug('Wrote header. Sync marker: %r', self._sync_marker)
       fastavro._writer.acquaint_schema(self._schema)
 
diff --git a/setup.py b/setup.py
index 70b027a..5bec3b7 100644
--- a/setup.py
+++ b/setup.py
@@ -54,9 +54,9 @@ def _get_long_description():
     'six>=1.9.0',
   ],
   extras_require={
-    'avro': ['fastavro>=0.8.6'],
+    'avro': ['fastavro==0.9.2'],
     'kerberos': ['requests-kerberos>=0.7.0'],
-    'dataframe': ['fastavro>=0.8.6', 'pandas>=0.14.1'],
+    'dataframe': ['fastavro==0.9.2', 'pandas>=0.14.1'],
   },
   entry_points={'console_scripts': [
     '%s = hdfs.__main__:main' % (ENTRY_POINT, ),