In [2]:
##################################
### Load JSON data     ###########
##################################

# Load pandas as pd
import pandas as pd

# Load the daily report to a dataframe
pop_in_shelters = pd.read_json("dhs_daily_report.json")

# View summary stats about pop_in_shelters
print(pop_in_shelters.describe())

#JSON isn't a tabular format, so pandas makes assumptions about its orientation when loading data. 
#Most JSON data will be in orientations that pandas can automatically transform into a dataframe.


       adult_families_in_shelter  adults_in_families_with_children_in_shelter  \
count                1000.000000                                  1000.000000   
mean                 2074.955000                                 16487.932000   
std                   148.020238                                   848.363772   
min                  1796.000000                                 14607.000000   
25%                  1906.000000                                 15831.500000   
50%                  2129.000000                                 16836.000000   
75%                  2172.250000                                 17118.250000   
max                  2356.000000                                 17733.000000   

       children_in_families_with_children_in_shelter  \
count                                    1000.000000   
mean                                    23273.873000   
std                                       926.243984   
min                                     21291.

In [17]:
###################################
### Work with JSON orientations  ##
###################################
#Sometimes, data is oriented differently. To reduce the file size, it has been split formatted. 
#when you try to load it normally, versus with the orient keyword argument. 
#The try/except block will alert you if there are errors loading the data.

try:
    # Load the JSON with orient specified
    df = pd.read_json("dhs_daily_report.json", orient="columns")
    print("Load ok")
    
except ValueError:
    print("pandas could not parse the JSON.")
    

Load ok


In [35]:
###################################
### Get data from an API         ##
### Set API parameters           ##
### Set request headers          ##
###################################
import requests

api_url = "https://api.yelp.com/v3/businesses/search"

# Set up parameter dictionary according to documentation
params = {"term": "cafe", "location": "NYC"}

# Set up header dictionary w/ API key according to documentation
headers = {"Authorization": "Bearer FjaD11byM6PvlGxv_def0yaP1f82uDuXaZsdXR0b3qT4Vlv5afDMiLvi6GW1XaR5eu6BnJ-8p45fMIMNTL2P9gJFr1M5WiqZxuWgJUkpwDECA3jiyAkami9e8tvnZHYx"}

# Get data about NYC cafes from the Yelp API
response = requests.get(api_url, 
                headers=headers, 
                params=params)

# Extract JSON data from the response
data = response.json()

# Load data to a dataframe
cafes = pd.DataFrame(data["businesses"])

# View the data's dtypes
print(cafes.head())

#Many APIs require users provide an API key, 
#obtained by registering for the service. 
#Keys typically are passed in the request header, rather than as parameters.
#The Yelp API documentation says "To authenticate API calls with the API Key, 
#set the Authorization HTTP header value as Bearer API_KEY."


                       id                alias        name  \
0  ED7A7vDdg8yLNKJTSVHHmg     arabica-brooklyn   % Arabica   
1  d2y35lqplnZvK0cbMWz7xQ    kijitora-brooklyn    Kijitora   
2  bJDU8KNLQMrZG0Ngs4AY0w   le-phin-new-york-2     Le Phin   
3  mofGEx6VNSSpZfaOjqUzww  café-lyria-new-york  Café Lyria   
4  -2UtjTxrt1Xzd-HPsLJ7mA    butler-brooklyn-2      Butler   

                                           image_url  is_closed  \
0  https://s3-media4.fl.yelpcdn.com/bphoto/_rgLT3...      False   
1  https://s3-media3.fl.yelpcdn.com/bphoto/Wj2NnW...      False   
2  https://s3-media3.fl.yelpcdn.com/bphoto/9ASzwJ...      False   
3  https://s3-media4.fl.yelpcdn.com/bphoto/8_eiyC...      False   
4  https://s3-media3.fl.yelpcdn.com/bphoto/bdMNkv...      False   

                                                 url  review_count  \
0  https://www.yelp.com/biz/arabica-brooklyn?adju...           229   
1  https://www.yelp.com/biz/kijitora-brooklyn?adj...             9   
2  https://www

In [31]:
###################################
### Flatten nested JSONs         ##
###################################
#A feature of JSON data is that it can be nested: 
#an attribute's value can consist of attribute-value pairs. 
#This nested data is more useful unpacked, or flattened, into its own dataframe columns. 
    
# Load json_normalize()
from pandas.io.json import json_normalize

# Isolate the JSON data from the API response
data = response.json()

# Flatten business data into a dataframe, replace separator
cafes = json_normalize(data["businesses"],
             sep="_")

# View data
print(cafes.head())

#by accessing data['businesses'] we're already working one level down the nested structure. 
#data itself could be flattened with json_normalize().


                       id                      alias            name  \
0  9Ri8et0HTPwzQDg8nUZTBw     book-club-bar-new-york   Book Club Bar   
1  d2y35lqplnZvK0cbMWz7xQ          kijitora-brooklyn        Kijitora   
2  kpxXi23lUQkeJQH-2BtzDw      qahwah-house-brooklyn    Qahwah House   
3  Wh05sPEf1rmgJfl7OHNlCw  felix-roasting-new-york-6  Felix Roasting   
4  bJDU8KNLQMrZG0Ngs4AY0w         le-phin-new-york-2         Le Phin   

                                           image_url  is_closed  \
0  https://s3-media1.fl.yelpcdn.com/bphoto/nl9i0K...      False   
1  https://s3-media3.fl.yelpcdn.com/bphoto/Wj2NnW...      False   
2  https://s3-media3.fl.yelpcdn.com/bphoto/kCuu8H...      False   
3  https://s3-media1.fl.yelpcdn.com/bphoto/1jbSdu...      False   
4  https://s3-media3.fl.yelpcdn.com/bphoto/9ASzwJ...      False   

                                                 url  review_count  \
0  https://www.yelp.com/biz/book-club-bar-new-yor...           118   
1  https://www.yelp.com/

  cafes = json_normalize(data["businesses"],


In [32]:
###################################
### Handle deeply nested data    ##
###################################
#we flattened data nested down one level. Here, we'll unpack more deeply nested data.
#The categories attribute in the Yelp API response contains lists of objects. 
#To flatten this data, we'll employ json_normalize() arguments to specify 
#the path to categories and pick other attributes to include in the dataframe. 
#we should also change the separator to facilitate column selection 
#and prefix the other attributes to prevent column name collisions.

# Load other business attributes and set meta prefix
flat_cafes = json_normalize(data["businesses"],
                            sep="_",
                    		record_path="categories",
                    		meta=["name", 
                                  "alias",  
                                  "rating",
                          		  ["coordinates", "latitude"], 
                          		  ["coordinates", "longitude"]],
                    		meta_prefix="biz_")





# View the data
print(flat_cafes.head())


        alias           title       biz_name               biz_alias  \
0  bookstores      Bookstores  Book Club Bar  book-club-bar-new-york   
1   wine_bars       Wine Bars  Book Club Bar  book-club-bar-new-york   
2       cafes           Cafes  Book Club Bar  book-club-bar-new-york   
3      coffee    Coffee & Tea       Kijitora       kijitora-brooklyn   
4  mideastern  Middle Eastern   Qahwah House   qahwah-house-brooklyn   

  biz_rating biz_coordinates_latitude biz_coordinates_longitude  
0        4.5                40.723015                -73.983207  
1        4.5                40.723015                -73.983207  
2        4.5                40.723015                -73.983207  
3        5.0                40.716485                -73.957276  
4        4.5                40.718563                 -73.95713  


  flat_cafes = json_normalize(data["businesses"],


In [36]:
###################################
### Append dataframes            ##
###################################
#By modifying the offset, we can get results 1-50 in one call and 51-100 in another. 
#Then, we can append the dataframes.

params = {"term": "cafe", 
          "location": "NYC",
          "sort_by": "rating", 
          "limit": 50}

result = requests.get(api_url, headers=headers, params=params)
top_50_cafes = json_normalize(result.json()["businesses"])

# Add an offset parameter to get cafes 51-100
params = {"term": "cafe", 
          "location": "NYC",
          "sort_by": "rating", 
          "limit": 50,
          "offset":50}

result = requests.get(api_url, headers=headers, params=params)
next_50_cafes = json_normalize(result.json()["businesses"])

# Append the results, setting ignore_index to renumber rows
cafes = top_50_cafes.append(next_50_cafes,ignore_index=True)

# Print shape of cafes
print(cafes.shape)

#if multiple dataframes, start with an empty dataframe and use a for or while loop to append additional ones.


  top_50_cafes = json_normalize(result.json()["businesses"])


(100, 24)


  next_50_cafes = json_normalize(result.json()["businesses"])
  cafes = top_50_cafes.append(next_50_cafes,ignore_index=True)


In [37]:
###################################
### Merge dataframes             ##
###################################
#we built a dataset of the top 100 cafes in New York City according to Yelp. 
#Now, we'll combine that with demographic data 
#to investigate which neighborhood has the most good cafes per capita.

# Merge crosswalk into cafes on their zip code fields
#cafes_with_pumas = cafes.merge(crosswalk, left_on="location_zip_code",right_on="zipcode")

# Merge pop_data into cafes_with_pumas on puma field
#cafes_with_pop = cafes_with_pumas.merge(pop_data, on="puma")



NameError: name 'crosswalk' is not defined