In [1]:
import json

## Encoding and Decoding Simple Data Types

In [2]:
data = [{'a': 'A', 'b': (2, 4), 'c': 3.0}]
print('DATA:', repr(data))

data_string = json.dumps(data)
print('JSON:', data_string)

DATA: [{'a': 'A', 'b': (2, 4), 'c': 3.0}]
JSON: [{"a": "A", "b": [2, 4], "c": 3.0}]


In [3]:
print(type(data_string))

<class 'str'>


Encoding, then re-decoding may not give exactly the same type of object

In [5]:
data = [{'a': 'A', 'b': (2, 4), 'c': 3.0}]
print('DATA   :', data)

data_string = json.dumps(data)
print('ENCODED:', data_string)

decoded = json.loads(data_string)
print('DECODED:', decoded)

print('ORIGINAL:', type(data[0]['b']))
print('DECODED :', type(decoded[0]['b']))

DATA   : [{'a': 'A', 'b': (2, 4), 'c': 3.0}]
ENCODED: [{"a": "A", "b": [2, 4], "c": 3.0}]
DECODED: [{'a': 'A', 'b': [2, 4], 'c': 3.0}]
ORIGINAL: <class 'tuple'>
DECODED : <class 'list'>


you can see that, **tuple become list**

## Human-consumable vs. Compact Output

### you can sort output

In [10]:
import json

data = [{'a': 'A', 'c': 3.0, 'b': (2, 4)}]
print('DATA:', repr(data))

unsorted = json.dumps(data)
print('JSON:', unsorted)
print('SORT:', json.dumps(data, sort_keys=True))

first = json.dumps(data, sort_keys=False)
second = json.dumps(data, sort_keys=True)

print('UNSORTED MATCH:', unsorted == first)
print('SORTED MATCH  :', first == second)

DATA: [{'a': 'A', 'c': 3.0, 'b': (2, 4)}]
JSON: [{"a": "A", "c": 3.0, "b": [2, 4]}]
SORT: [{"a": "A", "b": [2, 4], "c": 3.0}]
UNSORTED MATCH: True
SORTED MATCH  : False


### You can specify a value for `indent`, so the output is formatted

In [11]:
data = [{'a': 'A', 'b': (2, 4), 'c': 3.0}]
print('DATA:', repr(data))

print('NORMAL:', json.dumps(data, sort_keys=True))
print('INDENT:', json.dumps(data, sort_keys=True, indent=2))

DATA: [{'a': 'A', 'b': (2, 4), 'c': 3.0}]
NORMAL: [{"a": "A", "b": [2, 4], "c": 3.0}]
INDENT: [
  {
    "a": "A",
    "b": [
      2,
      4
    ],
    "c": 3.0
  }
]


### Make it more compact

In [12]:
data = [{'a': 'A', 'b': (2, 4), 'c': 3.0}]
print('DATA:', repr(data))

print('repr(data)             :', len(repr(data)))

plain_dump = json.dumps(data)
print('dumps(data)            :', len(plain_dump))

small_indent = json.dumps(data, indent=2)
print('dumps(data, indent=2)  :', len(small_indent))

with_separators = json.dumps(data, separators=(',', ':'))
print('dumps(data, separators):', len(with_separators))

DATA: [{'a': 'A', 'b': (2, 4), 'c': 3.0}]
repr(data)             : 35
dumps(data)            : 35
dumps(data, indent=2)  : 73
dumps(data, separators): 29


The separators argument to dumps() should be a tuple containing the strings to separate items in a list and keys from values in a dictionary. The default is (', ', ': '). By removing the whitespace, a more compact output is produced

## Encoding Dictonaries

The JSON format expect the key to a dictionary to be string. Trying to encode a dictionary with non-string types as keys produces a TypeError. One way to work around that limitation is to tell the encoder to skip over non-string kyes using the `skipkeys` argument


In [17]:
import json

data = [{'a': 'A', 'b': (2, 4), 'c': 3.0, ('d',): 'D tuple'}]

print('First attempt')
try:
    print(json.dumps(data))
except TypeError as err:
    print('ERROR:', err)

print()
print('Second attempt')
print(json.dumps(data, skipkeys=True))

First attempt
ERROR: keys must be a string

Second attempt
[{"a": "A", "b": [2, 4], "c": 3.0}]


## Working with custom Types

### Convert to known type

In [18]:
class MyObj:

    def __init__(self, s):
        self.s = s

    def __repr__(self):
        return '<MyObj({})>'.format(self.s)

In [19]:

obj = MyObj('instance value goes here')

print('First attempt')
try:
    print(json.dumps(obj))
except TypeError as err:
    print('ERROR:', err)


def convert_to_builtin_type(obj):
    print('default(', repr(obj), ')')
    # Convert objects to a dictionary of their representation
    d = {
        '__class__': obj.__class__.__name__,
        '__module__': obj.__module__,
    }
    d.update(obj.__dict__)
    return d


print()
print('With default')
print(json.dumps(obj, default=convert_to_builtin_type))

First attempt
ERROR: Object of type 'MyObj' is not JSON serializable

With default
default( <MyObj(instance value goes here)> )
{"__class__": "MyObj", "__module__": "__main__", "s": "instance value goes here"}


`convert_to_buitin_type` convert the MyObj class instance into a dictionary which JSON can endode

To decode the results and create a MyObj() instance, use the object_hook argument to loads() to tie in to the decoder so the class can be imported from the module and used to create the instance.

In [33]:
def dict_to_object(d):
    if '__class__' in d:
        class_name = d.pop('__class__')
        module_name = d.pop('__module__')
        module = __import__(module_name)
        print('MODULE:', module.__name__)
        class_ = getattr(module, class_name)
        print('CLASS:', class_)
        args = {
            key: value
            for key, value in d.items()
        }
        print('INSTANCE ARGS:', args)
        inst = class_(**args)
    else:
        inst = d
    return inst


encoded_object = '''
    [{"s": "instance value goes here",
      "__module__": "__main__", "__class__": "MyObj"}]
    '''

myobj_instance = json.loads(
    encoded_object,
    object_hook=dict_to_object,
)
print(myobj_instance)

MODULE: __main__
CLASS: <class '__main__.MyObj'>
INSTANCE ARGS: {'s': 'instance value goes here'}
[<MyObj(instance value goes here)>]


## Encoder and Decoder Classes

Besides the convenience functions already covered, the json module provides classes for encoding and decoding. Using the classes directly gives access to extra APIs for customizing their behavior.

The JSONEncoder uses an iterable interface for producing “chunks” of encoded data, making it easier to write to files or network sockets without having to represent an entire data structure in memory.

In [34]:
encoder = json.JSONEncoder()
data = [{'a': 'A', 'b': (2, 4), 'c': 3.0}]

for part in encoder.iterencode(data):
    print('PART:', part)

PART: [
PART: {
PART: "a"
PART: : 
PART: "A"
PART: , 
PART: "b"
PART: : 
PART: [2
PART: , 4
PART: ]
PART: , 
PART: "c"
PART: : 
PART: 3.0
PART: }
PART: ]


To encode arbitrary objects, override the default() method with an implementation similar to the one used in convert_to_builtin_type().

In [37]:
class MyEncoder(json.JSONEncoder):

    def default(self, obj):
        print('default(', repr(obj), ')')
        # Convert objects to a dictionary of their representation
        d = {
            '__class__': obj.__class__.__name__,
            '__module__': obj.__module__,
        }
        d.update(obj.__dict__)
        return d
    
obj = MyObj('internal data')
print(obj)
print(MyEncoder().encode(obj))

<MyObj(internal data)>
default( <MyObj(internal data)> )
{"__class__": "MyObj", "__module__": "__main__", "s": "internal data"}


Decoding:

In [39]:
class MyDecoder(json.JSONDecoder):

    def __init__(self):
        json.JSONDecoder.__init__(
            self,
            object_hook=self.dict_to_object,
        )

    def dict_to_object(self, d):
        if '__class__' in d:
            class_name = d.pop('__class__')
            module_name = d.pop('__module__')
            module = __import__(module_name)
            print('MODULE:', module.__name__)
            class_ = getattr(module, class_name)
            print('CLASS:', class_)
            args = {
                key: value
                for key, value in d.items()
            }
            print('INSTANCE ARGS:', args)
            inst = class_(**args)
        else:
            inst = d
        return inst


encoded_object = '''
[{"s": "instance value goes here",
  "__module__": "__main__", "__class__": "MyObj"}]
'''

myobj_instance = MyDecoder().decode(encoded_object)
print(myobj_instance)

MODULE: __main__
CLASS: <class '__main__.MyObj'>
INSTANCE ARGS: {'s': 'instance value goes here'}
[<MyObj(instance value goes here)>]


## Working with Streams and Files

All of the examples so far have assumed that the encoded version of the entire data structure could be held in memory at one time. With large data structures, it may be preferable to write the encoding directly to a file-like object. The convenience functions load() and dump() accept references to a file-like object to use for reading or writing

In [40]:
import io

In [41]:
data = [{'a': 'A', 'b': (2, 4), 'c': 3.0}]

f = io.StringIO()
json.dump(data, f)

print(f.getvalue())

[{"a": "A", "b": [2, 4], "c": 3.0}]


## Mixed Data Streams

JSONDecoder includes raw_decode(), a method for decoding a data structure followed by more data, such as JSON data with trailing text. The return value is the object created by decoding the input data, and an index into that data indicating where decoding left off.

In [42]:
decoder = json.JSONDecoder()


def get_decoded_and_remainder(input_data):
    obj, end = decoder.raw_decode(input_data)
    remaining = input_data[end:]
    return (obj, end, remaining)


encoded_object = '[{"a": "A", "c": 3.0, "b": [2, 4]}]'
extra_text = 'This text is not JSON.'

print('JSON first:')
data = ' '.join([encoded_object, extra_text])
obj, end, remaining = get_decoded_and_remainder(data)

print('Object              :', obj)
print('End of parsed input :', end)
print('Remaining text      :', repr(remaining))

print()
print('JSON embedded:')
try:
    data = ' '.join([extra_text, encoded_object, extra_text])
    obj, end, remaining = get_decoded_and_remainder(data)
except ValueError as err:
    print('ERROR:', err)

JSON first:
Object              : [{'a': 'A', 'c': 3.0, 'b': [2, 4]}]
End of parsed input : 35
Remaining text      : ' This text is not JSON.'

JSON embedded:
ERROR: Expecting value: line 1 column 1 (char 0)


Unfortunately, this only works if the object appears at the beginning of the input.

In [43]:
!ls

example.json
json.ipynb


In [47]:
# shows the data reformatted in order
! python -m json.tool example.json

[
    {
        "a": "A",
        "c": 3.0,
        "b": [
            2,
            4
        ]
    }
]


In [49]:
# ses --sort-keys to sort the mapping keys before printing the output.
! python -m json.tool --sort-keys example.json

[
    {
        "a": "A",
        "b": [
            2,
            4
        ],
        "c": 3.0
    }
]
