In [1]:
import pandas as pd
import numpy as np
import requests
import acquire
import warnings
warnings.filterwarnings("ignore")

In [2]:
base_url = 'https://python.zach.lol'
print(requests.get(base_url).text)

{"api":"/api/v1","help":"/documentation"}



In [187]:
response = requests.get(base_url + '/documentation')

In [188]:
print(response.json()['payload'])


The API accepts GET requests for all endpoints, where endpoints are prefixed
with

    /api/{version}

Where version is "v1"

Valid endpoints:

- /stores[/{store_id}]
- /items[/{item_id}]
- /sales[/{sale_id}]

All endpoints accept a `page` parameter that can be used to navigate through
the results.



In [189]:
response = requests.get('https://python.zach.lol/api/v1/items')

In [190]:
response.json().keys()

dict_keys(['payload', 'status'])

In [191]:
response.json()['payload'].keys()

dict_keys(['items', 'max_page', 'next_page', 'page', 'previous_page'])

In [192]:
response.json()['payload']['page']

1

In [193]:
response.json()['payload']['max_page']

3

In [194]:
response.json()['payload']['next_page']

'/api/v1/items?page=2'

In [195]:
data = response.json()

In [196]:
data.keys()

dict_keys(['payload', 'status'])

In [198]:
df = pd.DataFrame(data['payload']['items'])

In [199]:
df.head()

Unnamed: 0,item_brand,item_id,item_name,item_price,item_upc12,item_upc14
0,Riceland,1,Riceland American Jazmine Rice,0.84,35200264013,35200264013
1,Caress,2,Caress Velvet Bliss Ultra Silkening Beauty Bar...,6.44,11111065925,11111065925
2,Earths Best,3,Earths Best Organic Fruit Yogurt Smoothie Mixe...,2.43,23923330139,23923330139
3,Boars Head,4,Boars Head Sliced White American Cheese - 120 Ct,3.14,208528800007,208528800007
4,Back To Nature,5,Back To Nature Gluten Free White Cheddar Rice ...,2.61,759283100036,759283100036


In [200]:
# data from next page
data['payload']['next_page']

'/api/v1/items?page=2'

In [201]:
response = requests.get(base_url + data['payload']['next_page'])
data = response.json()

In [202]:
response.json()['payload']

{'items': [{'item_brand': 'Doctors Best',
   'item_id': 21,
   'item_name': 'Doctors Best Best Curcumin C3 Complex 1000mg Tablets - 120 Ct',
   'item_price': 8.09,
   'item_upc12': '753950001954',
   'item_upc14': '753950001954'},
  {'item_brand': 'Betty Crocker',
   'item_id': 22,
   'item_name': 'Betty Crocker Twin Pack Real Potatoes Scalloped 2 Pouches For 2 Meals - 2 Pk',
   'item_price': 7.31,
   'item_upc12': '16000288829',
   'item_upc14': '16000288829'},
  {'item_brand': 'Reese',
   'item_id': 23,
   'item_name': 'Reese Mandarin Oranges Segments In Light Syrup',
   'item_price': 1.78,
   'item_upc12': '70670009658',
   'item_upc14': '70670009658'},
  {'item_brand': 'Smart Living',
   'item_id': 24,
   'item_name': 'Smart Living Charcoal Lighter Fluid',
   'item_price': 5.34,
   'item_upc12': '688267084225',
   'item_upc14': '688267084225'},
  {'item_brand': 'Hood',
   'item_id': 25,
   'item_name': 'Hood Latte Iced Coffee Drink Vanilla Latte',
   'item_price': 2.43,
   'item_up

In [203]:
df = pd.concat([df, pd.DataFrame(response.json()['payload']['items'])]).reset_index()

In [204]:
data['payload']['next_page']

'/api/v1/items?page=3'

In [205]:
print('max_page: %s' % data['payload']['max_page'])
print('next_page: %s' % data['payload']['next_page'])

max_page: 3
next_page: /api/v1/items?page=3


In [283]:
def get_items():
    base_url = 'https://python.zach.lol'
    response = requests.get('https://python.zach.lol/api/v1/items')
    data = response.json()
    max_page = data['payload']['max_page']
    df = pd.DataFrame(data['payload']['items'])
    for i in range (1, max_page):
        response = requests.get(base_url + data['payload']['next_page'])
        data = response.json()
        #print(data)
        df = pd.concat([df,pd.DataFrame(data['payload']['items'])])
        i = i+1
    return df


In [284]:
items = get_items()

In [285]:
items.head(4)

Unnamed: 0,item_brand,item_id,item_name,item_price,item_upc12,item_upc14
0,Riceland,1,Riceland American Jazmine Rice,0.84,35200264013,35200264013
1,Caress,2,Caress Velvet Bliss Ultra Silkening Beauty Bar...,6.44,11111065925,11111065925
2,Earths Best,3,Earths Best Organic Fruit Yogurt Smoothie Mixe...,2.43,23923330139,23923330139
3,Boars Head,4,Boars Head Sliced White American Cheese - 120 Ct,3.14,208528800007,208528800007


In [286]:
items.to_csv('items.csv')

#### Do the same thing, but for stores.

In [234]:
def get_stores():
    base_url = 'https://python.zach.lol'
    response = requests.get('https://python.zach.lol/api/v1/stores')
    data = response.json()
    df = pd.DataFrame(data['payload']['stores'])
    return df

In [235]:
stores = get_stores()
stores.head(4)

Unnamed: 0,store_address,store_city,store_id,store_state,store_zipcode
0,12125 Alamo Ranch Pkwy,San Antonio,1,TX,78253
1,9255 FM 471 West,San Antonio,2,TX,78251
2,2118 Fredericksburg Rdj,San Antonio,3,TX,78201
3,516 S Flores St,San Antonio,4,TX,78204


In [281]:
stores.to_csv('stores.csv')

#### Extract the data for sales. There are a lot of pages of data here, so your code will need to be a little more complex. Your code should continue fetching data from the next page until all of the data is extracted.

In [239]:
response = requests.get('https://python.zach.lol/api/v1/sales')
response.json()['payload'].keys()

dict_keys(['max_page', 'next_page', 'page', 'previous_page', 'sales'])

In [277]:
def get_sales():
    response = requests.get('https://python.zach.lol/api/v1/sales')
    data = response.json()
    max_page = data['payload']['max_page']
    df = pd.DataFrame(data['payload']['sales'])
    for i in range (1,max_page):
        response = requests.get(base_url + data['payload']['next_page'])
        #print(base_url + data['payload']['next_page'])
        data = response.json()
        df = pd.concat([df, pd.DataFrame(data['payload']['sales'])])
    return df

In [275]:
response = requests.get('https://python.zach.lol/api/v1/sales')
data = response.json()
max_page = data['payload']['max_page']
max_page

183

In [278]:
sales = get_sales()
sales.head()

Unnamed: 0,item,sale_amount,sale_date,sale_id,store
0,1,13.0,"Tue, 01 Jan 2013 00:00:00 GMT",1,1
1,1,11.0,"Wed, 02 Jan 2013 00:00:00 GMT",2,1
2,1,14.0,"Thu, 03 Jan 2013 00:00:00 GMT",3,1
3,1,13.0,"Fri, 04 Jan 2013 00:00:00 GMT",4,1
4,1,10.0,"Sat, 05 Jan 2013 00:00:00 GMT",5,1


In [280]:
sales.to_csv('sales.csv')

#### Combine all data in single dataframe

In [287]:
sales = sales.rename(columns = {'item': 'item_id', 'store': 'store_id'})

In [288]:
sales.head()

Unnamed: 0,item_id,sale_amount,sale_date,sale_id,store_id
0,1,13.0,"Tue, 01 Jan 2013 00:00:00 GMT",1,1
1,1,11.0,"Wed, 02 Jan 2013 00:00:00 GMT",2,1
2,1,14.0,"Thu, 03 Jan 2013 00:00:00 GMT",3,1
3,1,13.0,"Fri, 04 Jan 2013 00:00:00 GMT",4,1
4,1,10.0,"Sat, 05 Jan 2013 00:00:00 GMT",5,1


In [290]:
sales = sales.merge(stores, on= 'store_id')


In [292]:
sales = sales.merge(items, on = 'item_id')

In [294]:
sales.shape

(913000, 14)

In [3]:
sales = acquire.get_all_data()

##### Acquire the Open Power Systems Data for Germany, which has been rapidly expanding its renewable energy production in recent years. The data set includes country-wide totals of electricity consumption, wind power production, and solar power production for 2006-2017. You can get the data here: https://raw.githubusercontent.com/jenfly/opsd/master/opsd_germany_daily.csv

In [5]:
data = pd.read_csv('https://raw.githubusercontent.com/jenfly/opsd/master/opsd_germany_daily.csv')

In [6]:
data.head()

Unnamed: 0,Date,Consumption,Wind,Solar,Wind+Solar
0,2006-01-01,1069.184,,,
1,2006-01-02,1380.521,,,
2,2006-01-03,1442.533,,,
3,2006-01-04,1457.217,,,
4,2006-01-05,1477.131,,,


In [2]:
data = acquire.get_power_data()

In [3]:
data

Unnamed: 0,Date,Consumption,Wind,Solar,Wind+Solar
0,2006-01-01,1069.18400,,,
1,2006-01-02,1380.52100,,,
2,2006-01-03,1442.53300,,,
3,2006-01-04,1457.21700,,,
4,2006-01-05,1477.13100,,,
...,...,...,...,...,...
4378,2017-12-27,1263.94091,394.507,16.530,411.037
4379,2017-12-28,1299.86398,506.424,14.162,520.586
4380,2017-12-29,1295.08753,584.277,29.854,614.131
4381,2017-12-30,1215.44897,721.247,7.467,728.714
