In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
import itertools

# JSON API
import requests
import json

# data visualization
import matplotlib
import seaborn as sns
import statsmodels.api as sm

%matplotlib inline

# ignore warnings
import warnings
warnings.filterwarnings("ignore")

In [2]:
def get_response(url):
    return requests.get(url)

In [3]:
url = 'https://python.zach.lol/api/v1/'

In [4]:
get_response(url)

<Response [200]>

In [5]:
resources = ['items', 'stores', 'sales']

In [6]:
df = pd.DataFrame()

In [7]:
response0 = requests.get(url + resources[0])
response1 = requests.get(url + resources[1])
response2 = requests.get(url + resources[2])

data0 = response0.json()
data1 = response1.json()
data2 = response2.json()

for i in (data0, data1, data2):
    print(i['payload'].keys())

dict_keys(['items', 'max_page', 'next_page', 'page', 'previous_page'])
dict_keys(['max_page', 'next_page', 'page', 'previous_page', 'stores'])
dict_keys(['max_page', 'next_page', 'page', 'previous_page', 'sales'])


In [8]:
def get_any(resource):
    base_url = 'https://python.zach.lol'
    df = pd.DataFrame()
    url = 'https://python.zach.lol/api/v1/' + resource
    r = requests.get('https://python.zach.lol/api/v1/' + resource)
    data = r.json()
    
    if data['payload']['max_page'] == 1:
        df = pd.concat([df, pd.DataFrame(data['payload'][resource])])
    else:
        df = pd.concat([df, pd.DataFrame(data['payload'][resource])])
        r = requests.get(base_url + data['payload']['next_page'])
        data = r.json()

        while data['payload']['page'] <= data['payload']['max_page']:
            df = pd.concat([df, pd.DataFrame(data['payload'][resource])])
            if data['payload']['page'] == data['payload']['max_page']:
                break
            r = requests.get(base_url + data['payload']['next_page'])
            data = r.json()
    df.to_csv(resource + '.csv')
    return df.reset_index(drop=True)

In [9]:
def get_sales(csv):
    base_url = 'https://python.zach.lol'
    
    df = pd.DataFrame()
    url = 'https://python.zach.lol/api/v1/sales'
    r = requests.get('https://python.zach.lol/api/v1/sales')
    data = r.json()
    
    df = pd.concat([df, pd.DataFrame(data['payload']['sales'])])

    r = requests.get(base_url + data['payload']['next_page'])
    data = r.json()
    
    while data['payload']['page'] <= data['payload']['max_page']:
        
        df = pd.concat([df, pd.DataFrame(data['payload']['sales'])])
        
        if data['payload']['page'] == data['payload']['max_page']:
            break
            
        r = requests.get(base_url + data['payload']['next_page'])
        data = r.json()
    df.to_csv(csv)
    return df.reset_index(drop=True)

In [10]:
def get_stores(csv):
    base_url = 'https://python.zach.lol'
    
    df = pd.DataFrame()
    url = 'https://python.zach.lol/api/v1/stores'
    r = requests.get('https://python.zach.lol/api/v1/stores')
    data = r.json()
    
    df = pd.concat([df, pd.DataFrame(data['payload']['stores'])])
    df.to_csv(csv)
    return df.reset_index(drop=True)

In [11]:
def get_items(csv):
    base_url = 'https://python.zach.lol'
    
    df = pd.DataFrame()
    url = 'https://python.zach.lol/api/v1/items'
    r = requests.get('https://python.zach.lol/api/v1/items')
    data = r.json()
    
    df = pd.concat([df, pd.DataFrame(data['payload']['items'])])

    r = requests.get(base_url + data['payload']['next_page'])
    data = r.json()
    
    while data['payload']['page'] <= data['payload']['max_page']:
        
        df = pd.concat([df, pd.DataFrame(data['payload']['items'])])
        
        if data['payload']['page'] == data['payload']['max_page']:
            break
            
        r = requests.get(base_url + data['payload']['next_page'])
        data = r.json()
    df.to_csv(csv)
    return df.reset_index(drop=True)

In [12]:
def merge_dfs(df1, df2, how, left, right):
    return pd.merge(df1, df2, how=how, left_on=left, right_on=right)

In [13]:
items = pd.read_csv('~/python/Methodologies/time_series/items.csv', index_col=0)
stores = pd.read_csv('~/python/Methodologies/time_series/stores.csv', index_col=0)
sales = pd.read_csv('~/python/Methodologies/time_series/sales.csv', index_col=0)

In [14]:
sales.shape

(913000, 5)

In [15]:
df = merge_dfs(items, sales, 'left', 'item_id', 'item')
df.shape

(913000, 11)

In [16]:
df.head()

Unnamed: 0,item_brand,item_id,item_name,item_price,item_upc12,item_upc14,item,sale_amount,sale_date,sale_id,store
0,Riceland,1,Riceland American Jazmine Rice,0.84,35200264013,35200264013,1,13.0,"Tue, 01 Jan 2013 00:00:00 GMT",1,1
1,Riceland,1,Riceland American Jazmine Rice,0.84,35200264013,35200264013,1,11.0,"Wed, 02 Jan 2013 00:00:00 GMT",2,1
2,Riceland,1,Riceland American Jazmine Rice,0.84,35200264013,35200264013,1,14.0,"Thu, 03 Jan 2013 00:00:00 GMT",3,1
3,Riceland,1,Riceland American Jazmine Rice,0.84,35200264013,35200264013,1,13.0,"Fri, 04 Jan 2013 00:00:00 GMT",4,1
4,Riceland,1,Riceland American Jazmine Rice,0.84,35200264013,35200264013,1,10.0,"Sat, 05 Jan 2013 00:00:00 GMT",5,1


In [17]:
df = merge_dfs(df, stores, 'left', 'store', 'store_id')
df.shape

(913000, 16)

In [18]:
def clean_up_columns():
    return df.drop(columns=(['store', 'item']))

In [19]:
df = clean_up_columns()

In [22]:
df.columns

Index(['item_brand', 'item_id', 'item_name', 'item_price', 'item_upc12',
       'item_upc14', 'sale_amount', 'sale_date', 'sale_id', 'store_address',
       'store_city', 'store_id', 'store_state', 'store_zipcode'],
      dtype='object')

In [21]:
df

Unnamed: 0,item_brand,item_id,item_name,item_price,item_upc12,item_upc14,sale_amount,sale_date,sale_id,store_address,store_city,store_id,store_state,store_zipcode
0,Riceland,1,Riceland American Jazmine Rice,0.84,35200264013,35200264013,13.0,"Tue, 01 Jan 2013 00:00:00 GMT",1,12125 Alamo Ranch Pkwy,San Antonio,1,TX,78253
1,Riceland,1,Riceland American Jazmine Rice,0.84,35200264013,35200264013,11.0,"Wed, 02 Jan 2013 00:00:00 GMT",2,12125 Alamo Ranch Pkwy,San Antonio,1,TX,78253
2,Riceland,1,Riceland American Jazmine Rice,0.84,35200264013,35200264013,14.0,"Thu, 03 Jan 2013 00:00:00 GMT",3,12125 Alamo Ranch Pkwy,San Antonio,1,TX,78253
3,Riceland,1,Riceland American Jazmine Rice,0.84,35200264013,35200264013,13.0,"Fri, 04 Jan 2013 00:00:00 GMT",4,12125 Alamo Ranch Pkwy,San Antonio,1,TX,78253
4,Riceland,1,Riceland American Jazmine Rice,0.84,35200264013,35200264013,10.0,"Sat, 05 Jan 2013 00:00:00 GMT",5,12125 Alamo Ranch Pkwy,San Antonio,1,TX,78253
5,Riceland,1,Riceland American Jazmine Rice,0.84,35200264013,35200264013,12.0,"Sun, 06 Jan 2013 00:00:00 GMT",6,12125 Alamo Ranch Pkwy,San Antonio,1,TX,78253
6,Riceland,1,Riceland American Jazmine Rice,0.84,35200264013,35200264013,10.0,"Mon, 07 Jan 2013 00:00:00 GMT",7,12125 Alamo Ranch Pkwy,San Antonio,1,TX,78253
7,Riceland,1,Riceland American Jazmine Rice,0.84,35200264013,35200264013,9.0,"Tue, 08 Jan 2013 00:00:00 GMT",8,12125 Alamo Ranch Pkwy,San Antonio,1,TX,78253
8,Riceland,1,Riceland American Jazmine Rice,0.84,35200264013,35200264013,12.0,"Wed, 09 Jan 2013 00:00:00 GMT",9,12125 Alamo Ranch Pkwy,San Antonio,1,TX,78253
9,Riceland,1,Riceland American Jazmine Rice,0.84,35200264013,35200264013,9.0,"Thu, 10 Jan 2013 00:00:00 GMT",10,12125 Alamo Ranch Pkwy,San Antonio,1,TX,78253
