In [1]:
# importing notebook dependencies 
%matplotlib inline
import matplotlib as mlp
mlp.rcParams['figure.dpi'] = 150

# main libraries
import pandas as pd
import numpy as np


# visualization libraries/modules
import matplotlib.pyplot as plt
plt.style.use('ipynb')
import seaborn as sns

import requests

from tqdm.notebook import tqdm, trange
import time # to be used in loop iterations

----
### **``Time Series: Acquire Exercises``**
    start: Tuesday, August 9th 2022

**``Exercise Number 1: Using the code from the lesson as a guide and the REST API from https://python.zgulde.net/api/v1/items as we did in the lesson, create a dataframe named items that has all of the data for items.``**

In [2]:
response = requests.get('https://python.zgulde.net/api/v1/items')
response # status code 200, checks out!

<Response [200]>

In [3]:
# let's check the 'response' object type

type(response)

requests.models.Response

In [4]:
# seeing what's contained in the response object

print(response.text)

{"payload":{"items":[{"item_brand":"Riceland","item_id":1,"item_name":"Riceland American Jazmine Rice","item_price":0.84,"item_upc12":"35200264013","item_upc14":"35200264013"},{"item_brand":"Caress","item_id":2,"item_name":"Caress Velvet Bliss Ultra Silkening Beauty Bar - 6 Ct","item_price":6.44,"item_upc12":"11111065925","item_upc14":"11111065925"},{"item_brand":"Earths Best","item_id":3,"item_name":"Earths Best Organic Fruit Yogurt Smoothie Mixed Berry","item_price":2.43,"item_upc12":"23923330139","item_upc14":"23923330139"},{"item_brand":"Boars Head","item_id":4,"item_name":"Boars Head Sliced White American Cheese - 120 Ct","item_price":3.14,"item_upc12":"208528800007","item_upc14":"208528800007"},{"item_brand":"Back To Nature","item_id":5,"item_name":"Back To Nature Gluten Free White Cheddar Rice Thin Crackers","item_price":2.61,"item_upc12":"759283100036","item_upc14":"759283100036"},{"item_brand":"Sally Hansen","item_id":6,"item_name":"Sally Hansen Nail Color Magnetic 903 Silver 

In [5]:
# can i convert the 'response' object to a json type file/variable

data = response.json()
type(data) # seems like it converted the original 'request.model.Response' type to a dictionary

dict

In [6]:
# let's now see what's in the data object

data # a more cleaner and manageable data object

{'payload': {'items': [{'item_brand': 'Riceland',
    'item_id': 1,
    'item_name': 'Riceland American Jazmine Rice',
    'item_price': 0.84,
    'item_upc12': '35200264013',
    'item_upc14': '35200264013'},
   {'item_brand': 'Caress',
    'item_id': 2,
    'item_name': 'Caress Velvet Bliss Ultra Silkening Beauty Bar - 6 Ct',
    'item_price': 6.44,
    'item_upc12': '11111065925',
    'item_upc14': '11111065925'},
   {'item_brand': 'Earths Best',
    'item_id': 3,
    'item_name': 'Earths Best Organic Fruit Yogurt Smoothie Mixed Berry',
    'item_price': 2.43,
    'item_upc12': '23923330139',
    'item_upc14': '23923330139'},
   {'item_brand': 'Boars Head',
    'item_id': 4,
    'item_name': 'Boars Head Sliced White American Cheese - 120 Ct',
    'item_price': 3.14,
    'item_upc12': '208528800007',
    'item_upc14': '208528800007'},
   {'item_brand': 'Back To Nature',
    'item_id': 5,
    'item_name': 'Back To Nature Gluten Free White Cheddar Rice Thin Crackers',
    'item_price':

In [7]:
# returning the first-level keys in the data dictionary

data.keys() # returns back 'payload', and 'status' -- exploring both

dict_keys(['payload', 'status'])

In [8]:
# first, payload

data['payload'] # returns back several unique dictionary keys and values

{'items': [{'item_brand': 'Riceland',
   'item_id': 1,
   'item_name': 'Riceland American Jazmine Rice',
   'item_price': 0.84,
   'item_upc12': '35200264013',
   'item_upc14': '35200264013'},
  {'item_brand': 'Caress',
   'item_id': 2,
   'item_name': 'Caress Velvet Bliss Ultra Silkening Beauty Bar - 6 Ct',
   'item_price': 6.44,
   'item_upc12': '11111065925',
   'item_upc14': '11111065925'},
  {'item_brand': 'Earths Best',
   'item_id': 3,
   'item_name': 'Earths Best Organic Fruit Yogurt Smoothie Mixed Berry',
   'item_price': 2.43,
   'item_upc12': '23923330139',
   'item_upc14': '23923330139'},
  {'item_brand': 'Boars Head',
   'item_id': 4,
   'item_name': 'Boars Head Sliced White American Cheese - 120 Ct',
   'item_price': 3.14,
   'item_upc12': '208528800007',
   'item_upc14': '208528800007'},
  {'item_brand': 'Back To Nature',
   'item_id': 5,
   'item_name': 'Back To Nature Gluten Free White Cheddar Rice Thin Crackers',
   'item_price': 2.61,
   'item_upc12': '759283100036',

In [9]:
# let's look into keys within 'payload'

data['payload'].keys()

dict_keys(['items', 'max_page', 'next_page', 'page', 'previous_page'])

In [10]:
# looking into 'items' within 'payload'

data['payload']['items'] # looks like more information

[{'item_brand': 'Riceland',
  'item_id': 1,
  'item_name': 'Riceland American Jazmine Rice',
  'item_price': 0.84,
  'item_upc12': '35200264013',
  'item_upc14': '35200264013'},
 {'item_brand': 'Caress',
  'item_id': 2,
  'item_name': 'Caress Velvet Bliss Ultra Silkening Beauty Bar - 6 Ct',
  'item_price': 6.44,
  'item_upc12': '11111065925',
  'item_upc14': '11111065925'},
 {'item_brand': 'Earths Best',
  'item_id': 3,
  'item_name': 'Earths Best Organic Fruit Yogurt Smoothie Mixed Berry',
  'item_price': 2.43,
  'item_upc12': '23923330139',
  'item_upc14': '23923330139'},
 {'item_brand': 'Boars Head',
  'item_id': 4,
  'item_name': 'Boars Head Sliced White American Cheese - 120 Ct',
  'item_price': 3.14,
  'item_upc12': '208528800007',
  'item_upc14': '208528800007'},
 {'item_brand': 'Back To Nature',
  'item_id': 5,
  'item_name': 'Back To Nature Gluten Free White Cheddar Rice Thin Crackers',
  'item_price': 2.61,
  'item_upc12': '759283100036',
  'item_upc14': '759283100036'},
 {'i

In [11]:
# are there keys within items?

data['payload']['items'][1]

{'item_brand': 'Caress',
 'item_id': 2,
 'item_name': 'Caress Velvet Bliss Ultra Silkening Beauty Bar - 6 Ct',
 'item_price': 6.44,
 'item_upc12': '11111065925',
 'item_upc14': '11111065925'}

In [12]:
# second, status

data['status'] # not much, just 'ok'

'ok'

In [13]:
# what is the length/size of payload?

data['payload'].keys()

dict_keys(['items', 'max_page', 'next_page', 'page', 'previous_page'])

In [14]:
# what is the length/size of payload cont.?

data['payload']['max_page'] # 3 total pages

3

In [15]:
# creating a payload dataframe for API descriptive stats
(
data['payload']['max_page'],
data['payload']['next_page'],
data['payload']['page'],
data['payload']['previous_page'],
)

(3, '/api/v1/items?page=2', 1, None)

In [16]:
# let's create an items dataframe

items = pd.DataFrame(data['payload']['items'])
items.shape # where there are 20 records/rows and 6 features/columns in the 'items' page

(20, 6)

In [17]:
# previewing the dataframe

items.head()

Unnamed: 0,item_brand,item_id,item_name,item_price,item_upc12,item_upc14
0,Riceland,1,Riceland American Jazmine Rice,0.84,35200264013,35200264013
1,Caress,2,Caress Velvet Bliss Ultra Silkening Beauty Bar...,6.44,11111065925,11111065925
2,Earths Best,3,Earths Best Organic Fruit Yogurt Smoothie Mixe...,2.43,23923330139,23923330139
3,Boars Head,4,Boars Head Sliced White American Cheese - 120 Ct,3.14,208528800007,208528800007
4,Back To Nature,5,Back To Nature Gluten Free White Cheddar Rice ...,2.61,759283100036,759283100036


----
**Creating a loop to conduct the following:**

1. assign the API address to a url/request object
2. convert the url to a json() file 
3. from the json file, get the 'payload' key and secondly, 'items' key
4. convert/extend contents in items to the items.DataFrame object
5. then loop to next page
6. conduct this action until "None" is returned

In [18]:
# printing total number of pages in api

domain = 'https://python.zgulde.net'
endpoint = '/api/v1/items'
url = domain + endpoint

response = requests.get(url)
data = response.json()

pages = data['payload']['max_page']
print(f'total number of pages in payload/items api: {pages} pages')

total number of pages in payload/items api: 3 pages


In [19]:
# breaking down initial steps for process learning purposes

url = 'https://python.zgulde.net/api/v1/items'
items = []

# loop will iterate for the total number of api pages = 3 or 3 times
for info in range(pages):

    # create the response object using the first url
    response = requests.get(url)

    # convert the response object to a json object
    data = response.json()
    
    # add first contents to the items list
    items.extend(data['payload']['items'])
    
    # check next api page, if 'none' break out of loop
    if data['payload']['next_page'] == None:
        break
    
    # anything other than 'none', update the url with the next endpoint and continue loop until no more pages left
    else:
        url = 'https://python.zgulde.net' + data['payload']['next_page']


# creating and returning the items dataframe
items = pd.DataFrame(items)
print()
print(f'shape of items dataframe: {items.shape}')
items.head()


shape of items dataframe: (50, 6)


Unnamed: 0,item_brand,item_id,item_name,item_price,item_upc12,item_upc14
0,Riceland,1,Riceland American Jazmine Rice,0.84,35200264013,35200264013
1,Caress,2,Caress Velvet Bliss Ultra Silkening Beauty Bar...,6.44,11111065925,11111065925
2,Earths Best,3,Earths Best Organic Fruit Yogurt Smoothie Mixe...,2.43,23923330139,23923330139
3,Boars Head,4,Boars Head Sliced White American Cheese - 120 Ct,3.14,208528800007,208528800007
4,Back To Nature,5,Back To Nature Gluten Free White Cheddar Rice ...,2.61,759283100036,759283100036


----
**``Exercise Number 2: Taking same steps for for `stores` (https://python.zgulde.net/api/v1/stores)``**


**Creating a loop to conduct the following:**

1. assign the API address to a url/request object
2. convert the url to a json() file 
3. from the json file, get the 'payload' key and secondly, 'items' key
4. convert/extend contents in items to the items.DataFrame object
5. then loop to next page
6. conduct this action until "None" is returned

In [20]:
# let's first connect and inspect the api

url = 'https://python.zgulde.net/api/v1/stores'
response = requests.get(url)
response # successful connection!

<Response [200]>

In [21]:
# let's convert the response to a json type object

data = response.json()
data # checks out!

{'payload': {'max_page': 1,
  'next_page': None,
  'page': 1,
  'previous_page': None,
  'stores': [{'store_address': '12125 Alamo Ranch Pkwy',
    'store_city': 'San Antonio',
    'store_id': 1,
    'store_state': 'TX',
    'store_zipcode': '78253'},
   {'store_address': '9255 FM 471 West',
    'store_city': 'San Antonio',
    'store_id': 2,
    'store_state': 'TX',
    'store_zipcode': '78251'},
   {'store_address': '2118 Fredericksburg Rdj',
    'store_city': 'San Antonio',
    'store_id': 3,
    'store_state': 'TX',
    'store_zipcode': '78201'},
   {'store_address': '516 S Flores St',
    'store_city': 'San Antonio',
    'store_id': 4,
    'store_state': 'TX',
    'store_zipcode': '78204'},
   {'store_address': '1520 Austin Hwy',
    'store_city': 'San Antonio',
    'store_id': 5,
    'store_state': 'TX',
    'store_zipcode': '78218'},
   {'store_address': '1015 S WW White Rd',
    'store_city': 'San Antonio',
    'store_id': 6,
    'store_state': 'TX',
    'store_zipcode': '78220

In [22]:
# initial keys in data/api

data.keys() # dict_keys(['payload', 'status'])

dict_keys(['payload', 'status'])

In [23]:
# what about status?

data['status'] # nothing, but ok!

'ok'

In [24]:
# and payload?

data['payload'] # ok, seems to have some information -- let's view what's inside.

{'max_page': 1,
 'next_page': None,
 'page': 1,
 'previous_page': None,
 'stores': [{'store_address': '12125 Alamo Ranch Pkwy',
   'store_city': 'San Antonio',
   'store_id': 1,
   'store_state': 'TX',
   'store_zipcode': '78253'},
  {'store_address': '9255 FM 471 West',
   'store_city': 'San Antonio',
   'store_id': 2,
   'store_state': 'TX',
   'store_zipcode': '78251'},
  {'store_address': '2118 Fredericksburg Rdj',
   'store_city': 'San Antonio',
   'store_id': 3,
   'store_state': 'TX',
   'store_zipcode': '78201'},
  {'store_address': '516 S Flores St',
   'store_city': 'San Antonio',
   'store_id': 4,
   'store_state': 'TX',
   'store_zipcode': '78204'},
  {'store_address': '1520 Austin Hwy',
   'store_city': 'San Antonio',
   'store_id': 5,
   'store_state': 'TX',
   'store_zipcode': '78218'},
  {'store_address': '1015 S WW White Rd',
   'store_city': 'San Antonio',
   'store_id': 6,
   'store_state': 'TX',
   'store_zipcode': '78220'},
  {'store_address': '12018 Perrin Beitel 

In [25]:
# payload contains the following: dict_keys(['max_page', 'next_page', 'page', 'previous_page', 'stores'])
data['payload'].keys()

dict_keys(['max_page', 'next_page', 'page', 'previous_page', 'stores'])

In [26]:
# checking stores first
data['payload']['stores']

[{'store_address': '12125 Alamo Ranch Pkwy',
  'store_city': 'San Antonio',
  'store_id': 1,
  'store_state': 'TX',
  'store_zipcode': '78253'},
 {'store_address': '9255 FM 471 West',
  'store_city': 'San Antonio',
  'store_id': 2,
  'store_state': 'TX',
  'store_zipcode': '78251'},
 {'store_address': '2118 Fredericksburg Rdj',
  'store_city': 'San Antonio',
  'store_id': 3,
  'store_state': 'TX',
  'store_zipcode': '78201'},
 {'store_address': '516 S Flores St',
  'store_city': 'San Antonio',
  'store_id': 4,
  'store_state': 'TX',
  'store_zipcode': '78204'},
 {'store_address': '1520 Austin Hwy',
  'store_city': 'San Antonio',
  'store_id': 5,
  'store_state': 'TX',
  'store_zipcode': '78218'},
 {'store_address': '1015 S WW White Rd',
  'store_city': 'San Antonio',
  'store_id': 6,
  'store_state': 'TX',
  'store_zipcode': '78220'},
 {'store_address': '12018 Perrin Beitel Rd',
  'store_city': 'San Antonio',
  'store_id': 7,
  'store_state': 'TX',
  'store_zipcode': '78217'},
 {'store

In [27]:
# printing payload key contents

print(
data['payload']['max_page'],
data['payload']['next_page'],
data['payload']['page'],
data['payload']['previous_page']
)

1 None 1 None


In [28]:
# let's create the loop 

# assign total number of pages in api/data variable
pages = data['payload']['max_page']

url = 'https://python.zgulde.net/api/v1/stores'
stores = []

for page in range(pages):

    # use the initial url to create the response object
    response = requests.get(url)

    # convert response to json object
    data = response.json()
    
    # extend/append results from 'stores' key
    stores.extend(data['payload']['stores'])

    # check if next page is == 'none', if yes -- create the stores dataframe with current data
    if data['payload']['next_page'] == None:
        stores = pd.DataFrame(stores)
        break
    
    # if there are remaining pages, continue to next page by updating the url
    else: 
        url = 'https://python.zgulde.net/' + data['payload']['next_page']

print()
print(f'shape of items dataframe: {stores.shape}')
stores.head() # checks out!


shape of items dataframe: (10, 5)


Unnamed: 0,store_address,store_city,store_id,store_state,store_zipcode
0,12125 Alamo Ranch Pkwy,San Antonio,1,TX,78253
1,9255 FM 471 West,San Antonio,2,TX,78251
2,2118 Fredericksburg Rdj,San Antonio,3,TX,78201
3,516 S Flores St,San Antonio,4,TX,78204
4,1520 Austin Hwy,San Antonio,5,TX,78218


----
**``Exercise Number 3: Extract the data for `sales` (https://python.zgulde.net/api/v1/sales). There are a lot of pages of data here, so your code will need to be a little more complex. Your code should continue fetching data from the next page until all of the data is extracted.``**

In [29]:
# let's connect and inspect

url = 'https://python.zgulde.net/api/v1/sales'
response = requests.get(url)

# return response
response # checks out!

<Response [200]>

In [30]:
# creating the json object and inspecting the contents in sales

data = response.json()
data.keys()

dict_keys(['payload', 'status'])

In [31]:
# let's inspect payload keys

data['payload'].keys() # let's peek into the total num/max pages and create the sales loop

dict_keys(['max_page', 'next_page', 'page', 'previous_page', 'sales'])

In [32]:
# number of total pages in API

pages = data['payload']['max_page'] # 183 pages
print(f'total number of pages in payload/items api: {pages} pages')


total number of pages in payload/items api: 183 pages


In [33]:
# let's get loopy

# initial api url for connecting
url = 'https://python.zgulde.net/api/v1/sales'

# empty container list to hold data while looping through sales pages
sales = []

for page in tqdm(range(pages)):

    # use the initial url to create the response object
    response = requests.get(url)

    # convert response to json object
    data = response.json()
    
    # extend/append results from 'sales' key
    sales.extend(data['payload']['sales'])

    # check if next page is == 'none', if yes -- create the sales dataframe with current data
    if data['payload']['next_page'] == None:
        sales = pd.DataFrame(sales)
        break
    
    # if there are remaining pages, continue to next page by updating the url
    else: 
        url = 'https://python.zgulde.net/' + data['payload']['next_page']

print()
print(f'shape of items dataframe: {sales.shape}')
sales.head() # checks out!

  0%|          | 0/183 [00:00<?, ?it/s]


shape of items dataframe: (913000, 5)


Unnamed: 0,item,sale_amount,sale_date,sale_id,store
0,1,13.0,"Tue, 01 Jan 2013 00:00:00 GMT",1,1
1,1,11.0,"Wed, 02 Jan 2013 00:00:00 GMT",2,1
2,1,14.0,"Thu, 03 Jan 2013 00:00:00 GMT",3,1
3,1,13.0,"Fri, 04 Jan 2013 00:00:00 GMT",4,1
4,1,10.0,"Sat, 05 Jan 2013 00:00:00 GMT",5,1


----
**``Exercise Number 4: Save the data in your files to local csv files so that it will be faster to access in the future.``**

In [34]:
# creating the csv files for items, stores, and sales

# items.to_csv("/Users/mijailmariano/codeup-data-science/time-series-exercises/items.csv")
# stores.to_csv("/Users/mijailmariano/codeup-data-science/time-series-exercises/stores.csv")
# sales.to_csv("/Users/mijailmariano/codeup-data-science/time-series-exercises/sales.csv")

----
**``Exercise Number 5: Combine the data from your three separate dataframes into one large dataframe.``**

In [35]:
# let's inspect the dataframes for merger

print(items.columns)
print(stores.columns)
print(sales.columns)

Index(['item_brand', 'item_id', 'item_name', 'item_price', 'item_upc12',
       'item_upc14'],
      dtype='object')
Index(['store_address', 'store_city', 'store_id', 'store_state',
       'store_zipcode'],
      dtype='object')
Index(['item', 'sale_amount', 'sale_date', 'sale_id', 'store'], dtype='object')


In [36]:
items.head()

Unnamed: 0,item_brand,item_id,item_name,item_price,item_upc12,item_upc14
0,Riceland,1,Riceland American Jazmine Rice,0.84,35200264013,35200264013
1,Caress,2,Caress Velvet Bliss Ultra Silkening Beauty Bar...,6.44,11111065925,11111065925
2,Earths Best,3,Earths Best Organic Fruit Yogurt Smoothie Mixe...,2.43,23923330139,23923330139
3,Boars Head,4,Boars Head Sliced White American Cheese - 120 Ct,3.14,208528800007,208528800007
4,Back To Nature,5,Back To Nature Gluten Free White Cheddar Rice ...,2.61,759283100036,759283100036


In [37]:
sales.head()

Unnamed: 0,item,sale_amount,sale_date,sale_id,store
0,1,13.0,"Tue, 01 Jan 2013 00:00:00 GMT",1,1
1,1,11.0,"Wed, 02 Jan 2013 00:00:00 GMT",2,1
2,1,14.0,"Thu, 03 Jan 2013 00:00:00 GMT",3,1
3,1,13.0,"Fri, 04 Jan 2013 00:00:00 GMT",4,1
4,1,10.0,"Sat, 05 Jan 2013 00:00:00 GMT",5,1


In [38]:
stores.head()

Unnamed: 0,store_address,store_city,store_id,store_state,store_zipcode
0,12125 Alamo Ranch Pkwy,San Antonio,1,TX,78253
1,9255 FM 471 West,San Antonio,2,TX,78251
2,2118 Fredericksburg Rdj,San Antonio,3,TX,78201
3,516 S Flores St,San Antonio,4,TX,78204
4,1520 Austin Hwy,San Antonio,5,TX,78218


In [39]:
# notes (come back to this)

merged_sales = sales.merge(
    items, 
    how = "left", 
    left_on = "item",
    right_on = "item_id").merge(
        stores,
        how = "left",
        left_on = "store",
        right_on = "store_id"
    )

In [40]:
# checking the dataframe

merged_sales.head()

Unnamed: 0,item,sale_amount,sale_date,sale_id,store,item_brand,item_id,item_name,item_price,item_upc12,item_upc14,store_address,store_city,store_id,store_state,store_zipcode
0,1,13.0,"Tue, 01 Jan 2013 00:00:00 GMT",1,1,Riceland,1,Riceland American Jazmine Rice,0.84,35200264013,35200264013,12125 Alamo Ranch Pkwy,San Antonio,1,TX,78253
1,1,11.0,"Wed, 02 Jan 2013 00:00:00 GMT",2,1,Riceland,1,Riceland American Jazmine Rice,0.84,35200264013,35200264013,12125 Alamo Ranch Pkwy,San Antonio,1,TX,78253
2,1,14.0,"Thu, 03 Jan 2013 00:00:00 GMT",3,1,Riceland,1,Riceland American Jazmine Rice,0.84,35200264013,35200264013,12125 Alamo Ranch Pkwy,San Antonio,1,TX,78253
3,1,13.0,"Fri, 04 Jan 2013 00:00:00 GMT",4,1,Riceland,1,Riceland American Jazmine Rice,0.84,35200264013,35200264013,12125 Alamo Ranch Pkwy,San Antonio,1,TX,78253
4,1,10.0,"Sat, 05 Jan 2013 00:00:00 GMT",5,1,Riceland,1,Riceland American Jazmine Rice,0.84,35200264013,35200264013,12125 Alamo Ranch Pkwy,San Antonio,1,TX,78253


In [41]:
# removing redundant columns

merged_sales = merged_sales.drop(
    columns = ["sale_id", "item_id", "store_id"]
)

merged_sales.head()

Unnamed: 0,item,sale_amount,sale_date,store,item_brand,item_name,item_price,item_upc12,item_upc14,store_address,store_city,store_state,store_zipcode
0,1,13.0,"Tue, 01 Jan 2013 00:00:00 GMT",1,Riceland,Riceland American Jazmine Rice,0.84,35200264013,35200264013,12125 Alamo Ranch Pkwy,San Antonio,TX,78253
1,1,11.0,"Wed, 02 Jan 2013 00:00:00 GMT",1,Riceland,Riceland American Jazmine Rice,0.84,35200264013,35200264013,12125 Alamo Ranch Pkwy,San Antonio,TX,78253
2,1,14.0,"Thu, 03 Jan 2013 00:00:00 GMT",1,Riceland,Riceland American Jazmine Rice,0.84,35200264013,35200264013,12125 Alamo Ranch Pkwy,San Antonio,TX,78253
3,1,13.0,"Fri, 04 Jan 2013 00:00:00 GMT",1,Riceland,Riceland American Jazmine Rice,0.84,35200264013,35200264013,12125 Alamo Ranch Pkwy,San Antonio,TX,78253
4,1,10.0,"Sat, 05 Jan 2013 00:00:00 GMT",1,Riceland,Riceland American Jazmine Rice,0.84,35200264013,35200264013,12125 Alamo Ranch Pkwy,San Antonio,TX,78253


In [42]:
# saving the df

merged_sales.to_csv("/Users/mijailmariano/codeup-data-science/time-series-exercises/merged_sales.csv", index = False) # completed!

----
**``Exercise Number 6: Acquire the Open Power Systems Data for Germany, which has been rapidly expanding its renewable energy production in recent years. The data set includes country-wide totals of electricity consumption, wind power production, and solar power production for 2006-2017. You can get the data here: https://raw.githubusercontent.com/jenfly/opsd/master/opsd_germany_daily.csv``**

In [49]:
# getting data from url and saving to a csv

german_energy = pd.read_csv('https://raw.githubusercontent.com/jenfly/opsd/master/opsd_germany_daily.csv')
german_energy # checks out!

Unnamed: 0,Date,Consumption,Wind,Solar,Wind+Solar
0,2006-01-01,1069.18400,,,
1,2006-01-02,1380.52100,,,
2,2006-01-03,1442.53300,,,
3,2006-01-04,1457.21700,,,
4,2006-01-05,1477.13100,,,
...,...,...,...,...,...
4378,2017-12-27,1263.94091,394.507,16.530,411.037
4379,2017-12-28,1299.86398,506.424,14.162,520.586
4380,2017-12-29,1295.08753,584.277,29.854,614.131
4381,2017-12-30,1215.44897,721.247,7.467,728.714


In [50]:
# saving the df to a csv

german_energy.to_csv("german_energy.csv") # complete!

----
**``Exercise Number 7: Make sure all the work that you have done above is reproducible. That is, you should put the code above into separate functions in the acquire.py file and be able to re-run the functions and get the same data.``**

In [45]:
# creating a function that connects to api and returns the data/dataframe

def get_items():

    # breaking down initial steps for process learning purposes

    url = 'https://python.zgulde.net/api/v1/items'
    items = []

    # loop will iterate for the total number of api pages = 3 or 3 times
    for info in range(pages):

        # create the response object using the first url
        response = requests.get(url)

        # convert the response object to a json object
        data = response.json()
        
        # add first contents to the items list
        items.extend(data['payload']['items'])
        
        # check next api page, if 'none' break out of loop
        if data['payload']['next_page'] == None:
            items = pd.DataFrame(items)
            break
        
        # anything other than 'none', update the url with the next endpoint and continue loop until no more pages left
        else:
            url = 'https://python.zgulde.net' + data['payload']['next_page']


    # creating and returning the items dataframe

    return items

In [46]:
# checking to make sure function works

test = get_items()

test.head() #checks out! / can also make this function with a 'while loop'

Unnamed: 0,item_brand,item_id,item_name,item_price,item_upc12,item_upc14
0,Riceland,1,Riceland American Jazmine Rice,0.84,35200264013,35200264013
1,Caress,2,Caress Velvet Bliss Ultra Silkening Beauty Bar...,6.44,11111065925,11111065925
2,Earths Best,3,Earths Best Organic Fruit Yogurt Smoothie Mixe...,2.43,23923330139,23923330139
3,Boars Head,4,Boars Head Sliced White American Cheese - 120 Ct,3.14,208528800007,208528800007
4,Back To Nature,5,Back To Nature Gluten Free White Cheddar Rice ...,2.61,759283100036,759283100036
