# JSON

## Basic constructs (recursive)

In [1]:
import json

## Example

In [2]:
data = [ { 'a':'A', 'b':(2, 4), 'c':3.0 } ]
print('DATA:', data, type(data))

DATA: [{'a': 'A', 'b': (2, 4), 'c': 3.0}] <class 'list'>


In [3]:
data = [ { 'a':'A', 'b':(2, 4), 'c':3.0 } ]
data_string = json.dumps(data)
print('ENCODED:', data_string, type(data_string))

ENCODED: [{"a": "A", "b": [2, 4], "c": 3.0}] <class 'str'>


In [4]:
decoded = json.loads(data_string)
print('DECODED:', decoded, type(decoded))

DECODED: [{'a': 'A', 'b': [2, 4], 'c': 3.0}] <class 'list'>


## Exercise 1

In [5]:
tuples = [('a',1),('b',2),('c',3)]
string = json.dumps(tuples)
decoded = json.loads(string)
print(tuples)
print(decoded)

[('a', 1), ('b', 2), ('c', 3)]
[['a', 1], ['b', 2], ['c', 3]]


### json.dump() json.load()

In [7]:
initial_data = [ { 'a':'A', 'b':(2, 4), 'c':3.0 } ]
json.dump(initial_data, open('data/temp.json','w'))

In [8]:
loaded_data = json.load(open('data/temp.json','r'))
print(loaded_data)

[{'a': 'A', 'b': [2, 4], 'c': 3.0}]


In [10]:
#load books
books_data = json.load(open('data/1_books.json','r'))
print(books_data)

FileNotFoundError: [Errno 2] No such file or directory: 'data/1_books.json'

## Exercise 2

In [11]:
input_file = open('data/1_books.json','r')
#Your code is here
books_data = json.load(input_file)
author_list = list()

for book_type in books_data:
    for book in books_data[book_type]:
        for author in book['Authors']:
            author_list.append(author['Last_Name'])

print(author_list)

['Berenholtz', 'Harris', 'Brockmann', 'Barron', 'Quindlen']


## Yelp Example

We use `Yelp` data from [https://www.yelp.com/dataset_challenge/dataset](https://www.yelp.com/dataset_challenge/dataset)

Load data from `yelp_business.json` file and 

- identify the set of categories of businesses
- indentify the most popular category

In [None]:
with open('data/yelp_business.json','r') as yelp_file:
    #load data and split by lines
    bus_info = yelp_file.read().split('\n')
print(bus_info[0])

In [None]:
# Load data structure from string
bus_info_data = json.loads(bus_info[0])
print(type(bus_info[0]))
print(type(bus_info_data))

In [None]:
#Where is the information about categories?
print(bus_info_data['categories'])

In [None]:
# Let's create a set
print(set(bus_info_data['categories']))

In [None]:
# Let's put all together
categories = set()
for business in bus_info:
    business_data = json.loads(business)
    categories.update(set(business_data['categories']))
print(categories)

In [None]:
# Let's find the most popular categories
category_freq = dict()
for business in bus_info:
    business_data = json.loads(business)
    for category in business_data['categories']:
        # check if category in the dict, otherwose add woth counter = 0
        category_freq[category] = category_freq.get(category, 0)
        # increase counter
        category_freq[category] += 1
        
print(category_freq)

In [None]:
category_list = [(category,value) for (category,value) in category_freq.items()]

In [None]:
# sorts in place, in descending order, based on the second element of the tuple
category_list.sort(key=lambda tupl: tupl[1], reverse=True)  

In [None]:
category_list