# JSON

### Learning Objectives:
- Interpret JSON formatted data and recognize differences between JSON and Python
- Deserialize data from JSON for use in Python programs (read)
- Serialize data into JSON for long term storage (write) 

In [1]:
import csv

# TODO: import json module
import json

In [2]:
# Deserialize
def read_json(path):
    with open(path, encoding = "utf-8") as f: # f is a varaible 
        return json.load(f)                 # f represents data in the JSON file (dict, list, etc)
    
# Serialize
def write_json(path, data):
    with open(path, 'w', encoding = "utf-8") as f:
        json.dump(data, f, indent = 2)

### Example 1: Sum of numbers (simple JSON)

In [None]:
# TODO 1: Create a new "numsA.json".
#         Add the list [1, 2, 3, 4] to "numsA.json" file.
#         Use jupyter notebook to create and edit the new file

# TODO 2: Use input built-in function go get JSON file name from user
#         Read the JSON file using read_json; capture return value into a variable
data = read_json(input("Enter the JSON file name: "))

# TODO 3: Print type of data returned by function that reads JSON file.
print(type(data))

# TODO 4: Using Python built-in function sum(...), calculate total of numbers in numsA.json, print the total.
print(sum(data))

# TODO 5: Create a new JSON file "numsB.json" and try out the following data:
#         [-1, 10, 4,]
#         Does that work?
#         Change the data to [-1, 10, 4] and try to run the program by providing input as numsB.json

# TODO 6: Create a new JSON file "simple.json" and try out the following data.
#         What kind of error do you get with this?
#         Fix the error by commenting the line of code that causes the error!
#         3.14
#         True
#         true
#         'hello'
#         "hello"


### Example 2: Score Tracker

In [None]:
player_details = input("Enter player name and score: ")
# TODO 1: extract player name and score into variables
player_name, player_score = player_details.split(" ")
player_score = int(player_score)

# TODO 2: Define an empty "scores" dictionary to keep track of players'
#         scores.
#         KEY: player name VALUE: player scores list

input_file = "score_history.json"
scores = read_json(input_file) # updated code after TODO 6

# TODO 3: Check if player name is a key in the scores dictionary.
#         If not, create a new key for player name and value as empty list
#         to keep track of that player's scores.

if player_name not in scores:
    scores[player_name] = []

# TODO 4: Add player's score to the player's list in scores dictionary
scores[player_name].append(player_score)
print(scores)

# TODO 5: Create a "score_history.json" file and popluate that file with
#         empty dictionary {}

# TODO 6: Read "score_history.json" to populate initial "scores" dict, 
#         instead of the empty dict created in TODO 2.

# TODO 7: Calculate average score for that player
print("Average score for {} is {}.".format(player_name, sum(scores[player_name]) / len(scores[player_name])))

# TODO 8: At the end of the program, write the updated scores from dict
#         into the "score_history.json" file

write_json(input_file, scores)

# That's it, now you have a program that helps you keep track 
# of player scores permanently.

### Example 3: Kiva.com Micro-lending site
Many Web Sites have APIs that allow you to get their data.

In [None]:
# TODO: read "kiva.json"
kiva_data = read_json('kiva.json')

# TODO: explore the type of the data structure returned by read_json
print(type(kiva_data))

# kiva_data # uncomment to see the whole JSON

How to explore an unknown JSON?
- If you run into a `dict`, try `.keys()` method to look at the keys of the dictionary, then use lookup process to explore further
- If you run into a `list`, iterate over the list and print each item

In [None]:
print(list(kiva_data.keys()))

# TODO: lookup the value corresponding to the key
kiva_data["data"]

# TODO: you know what to do next ... explore type again
print(type(kiva_data["data"]))

In [None]:
print(list(kiva_data["data"].keys()))
print(list(kiva_data["data"]["lend"].keys()))
print(list(kiva_data["data"]["lend"]["loans"].keys()))
loans_list = kiva_data["data"]["lend"]["loans"]["values"] # actual information: list of loan dictionaries

In [None]:
# TODO: iterate over loans_list and print every borrower's name, loan amount and country details

for loan_dict in loans_list:
    borrower_name = loan_dict["name"]
    print("Borrower name:", borrower_name)
    loan_amount = loan_dict["loanAmount"]
    print("Loan amount: $", loan_amount, sep = "")
    country_details = loan_dict["geocode"]["country"]
    print("Country details:", country_details)
    print("------------------------------------------------------------------------------------------------")

### Let's write student information dataset into various JSON files

In [None]:
# inspired by https://automatetheboringstuff.com/2e/chapter16/
def process_csv(filename):
    exampleFile = open(filename, encoding="utf-8")  
    exampleReader = csv.reader(exampleFile) 
    exampleData = list(exampleReader)        
    exampleFile.close()  
    return exampleData

survey_data = process_csv('cs220_survey_data.csv')
cs220_header = survey_data[0]
cs220_data = survey_data[1:]

In [None]:
def cell(row_idx, col_name):
    """
    Returns the data value (cell) corresponding to the row index and 
    the column name of a CSV file.
    """
    col_idx = cs220_header.index(col_name) 
    val = cs220_data[row_idx][col_idx]  
    
    # handle missing values, by returning None
    if val == '':
        return None
    
    # handle type conversions
    if col_name in ["Age",]:
        return int(val)
    
    return val

In [None]:
def bucketize(bucket_column):
    """
    generates and returns bucketized data based on bucket_column
    """
    # Key: unique bucketize column value; Value: list of lists (rows having that unique column value)
    buckets = dict()
    for row_idx in range(len(cs220_data)):
        row = cs220_data[row_idx]
        col_value = cell(row_idx, bucket_column)
        if col_value not in buckets:
            # create a new bucket when there is no existing bucket
            buckets[col_value] = []
        buckets[col_value].append(row)
        
    return buckets

# TODO: create lecture based buckets and store result into lecture_buckets
lec_buckets = bucketize("Lecture")

# TODO: What is the type of lec_buckets? A __dict____ of ___list of lists______

# TODO: write lec_buckets into a JSON file called "lecture_cs220_data.json"
write_json("lecture_cs220_data.json", lec_buckets)

# TODO: create major based buckets and store result into major_buckets
major_buckets = bucketize("Major")

# TODO: write major_buckets into a JSON file called "major_cs220_data.json"
write_json("major_cs220_data.json", major_buckets)

In [None]:
def transform(header, data):
    """
    Transform data into a list of dictionaries
    """
    transformed_data = [] #should be defined outside the for loop, because it stores the entire data
    for row in data:
        #should be defined inside the for loop, because it represents one row as a dictionary
        row_dict = {} # Key: header; Value: data
        for idx in range(len(row)):
            row_dict[header[idx]] = row[idx]
        transformed_data.append(row_dict)
    return transformed_data
    
transformed_data = transform(cs220_header, cs220_data)

# TODO: What is the type of transformed_data? A __list____ of ___dictionaries______

# TODO: write transformed_data into a JSON file called "cs220_survey_data.json"
write_json("cs220_survey_data.json", transformed_data)

### Self-practice: explore real-world JSON

### Weather for UW-Madison campus

In [None]:
# TODO: read "weather.json"
weather_data = read_json('weather.json')

# TODO: explore the type of the data structure returned by read_json
print(type(weather_data))

# display the data from "weather.json"
# weather_data # uncomment to see the whole JSON

In [None]:
# TODO: display the keys of the weather.json dict
print(list(weather_data.keys()))

# TODO: lookup the value corresponding to the 'properties'
weather_data["properties"]

# TODO: you know what to do next ... explore type again
print(type(weather_data["properties"]))

In [None]:
# TODO: display the keys of the properties dict
print(list(weather_data["properties"].keys()))

# TODO: lookup the value corresponding to the 'periods'
# weather_data["properties"]["periods"] # uncomment to see the output

# TODO: you know what to do next ... explore type again
print(type(weather_data["properties"]["periods"]))

In [None]:
# TODO: extract periods list into a variable
periods_list = weather_data["properties"]["periods"]

# TODO: iterate over loans_list and print every periods's startTime, endTime, temperature, and temperatureUnit

for period_dict in periods_list:
    start_time = period_dict["startTime"]
    print("Start time:", start_time)
    end_time = period_dict["startTime"]
    print("End time:", end_time)
    temperature = period_dict["temperature"]
    temperature_unit = period_dict["temperatureUnit"]
    print("Temperature: {} degree {}".format(temperature, temperature_unit))
    print("------------------------------------------------------------------------------------------------")