# Testing the API

Satellite data starts being recored on {'date': '2015-06-13'}

In [1]:
import requests
import os
from IPython.display import Image
import pandas as pd
import numpy as np
import datetime

In [2]:
# All Data
url = 'https://epic.gsfc.nasa.gov/api/natural/all'

# Most Recent Pictures
url = 'https://epic.gsfc.nasa.gov/api/natural/'

# Get data from a specific date
date = '20180428'
url = f'https://epic.gsfc.nasa.gov/api/enhanced/date/{date}'
#res = requests.get(url)

In [3]:
# Example data retrieved
#print(f'There are {len(res.json())} images available')
#res.json()[0]

# getSat()

In [4]:
## VERSION 05
def getSat(collection, img_type, YYYY,MM,DD, queryParams=dict()):
    
    date = f"{YYYY}-{MM}-{DD}"
    
    # Define types of image collections and size 
    collections = {'nat': 'natural', 'enh':'enhanced'}

    size = {'png': ['png', 'png'],
            'jpg': ['jpg', 'jpg'],
            'thumb': ['thumbs', 'jpg']}
    
    
    # Request API metadata using query parameters
    host = f'https://epic.gsfc.nasa.gov'
    metadata = f"{host}/api/{collections[collection]}/date/{date}"
    res = requests.get(metadata, params=queryParams)
    
    
    # Store reelevant data from requests' response body
    data = []
    for e in res.json():
        data.append((e['image'], e['centroid_coordinates'], e['date']))
    
    
    # Report status of query response
    print(res.status_code, res.url)
    print(f"There are {len(res.json())} satellite images available for this date: {date}")

    
    # Construct the image resource url
    satellite_images = []
    if len(data) > 0:
        for e in data:
            img = f"{host}/archive/{collections[collection]}/{YYYY}/{MM}/{DD}/{size[img_type][0]}/{e[0]}.{size[img_type][1]}"
            satellite_images.append([img, e[1], len(res.json())])
            
    #Return relevant data as list
    return satellite_images

images = getSat('enh', 'thumb', '2020', '03', '01')

200 https://epic.gsfc.nasa.gov/api/enhanced/date/2020-03-01
There are 12 satellite images available for this date: 2020-03-01


# Working with cleaned dataset dates

In [5]:
df = pd.read_csv('OUTPUT/volcanic-eruptions.csv')
df = df.drop(columns='Unnamed: 0')

# Reach API to get the links for each volcanic eruption in the dataset

In [6]:
# Access the START dates on each eruptive phase

# Add arguments for which types of images i want to retrieve (natural/enhanced) (png, jpg, thumb)
def enrich_from_api():
# Version '01'

    # What are we going to retrieve from each request?
    img_urls = []
    sat_lats = []
    sat_lons = []
    pics_that_day = []
    
    # Start looping the dataframe
    ## ♠ OPTIMIZATION IDEA: 
    ##   Check if the row in question has the data already
    ##   If it has it, copy it and add it to the new list
    ##   If it does not have the data, request and add it.
    for phase_start in df['start']:
        print()
        print(phase_start)
        
        # Make the request to the API and store the data for a moment
        ## Normally, the output from this request should include several image urls
        ## at the next step of this function I pick only the last element
        phase_data = getSat('nat', 'thumb', *phase_start.split('-'))
        
        # If there is data in the response, include the response data to the lists 
        if len(phase_data) > 0:
            
            # Indexes here are [0] because I only want to get the first image of the day
            # Possibly change the index to [-1] to get the last picture from that day 
            print('This is the first available image for that date')
            print(phase_data[0])
            img_urls.append(phase_data[0][0])
            sat_lats.append(phase_data[0][1]['lat'])
            sat_lons.append(phase_data[0][1]['lon'])
            pics_that_day.append(phase_data[0][2])
            
        # If there are no images to retrieve, set the null values
        else:
            img_urls.append('no-img')
            sat_lats.append('0')
            sat_lons.append('0')
            pics_that_day.append(0)
            
            
    # POSSIBLY INCLUDE A ZIP HERE ~
    
    return [img_urls, sat_lats, sat_lons, pics_that_day]

In [7]:
# Watch out, this cell takes about 4 minutes to run
# ♠ OPTIMIZATION IDEA: 
## Define this as 'vesuvius.updateData()' to be called from main.py when `--update` flag is True

# Call the function
new_data = enrich_from_api()

# Store the new data in the following columns
df['start_img'] = new_data[0]
df['sat_lats'] = new_data[1]
df['sat_lons'] = new_data[2]
df['start_img_available_in_api'] = new_data[3]


2020-02-10
200 https://epic.gsfc.nasa.gov/api/natural/date/2020-02-10
There are 0 satellite images available for this date: 2020-02-10

2020-01-31
200 https://epic.gsfc.nasa.gov/api/natural/date/2020-01-31
There are 0 satellite images available for this date: 2020-01-31

2020-01-12
200 https://epic.gsfc.nasa.gov/api/natural/date/2020-01-12
There are 0 satellite images available for this date: 2020-01-12

2020-01-12
200 https://epic.gsfc.nasa.gov/api/natural/date/2020-01-12
There are 0 satellite images available for this date: 2020-01-12

2020-01-11
200 https://epic.gsfc.nasa.gov/api/natural/date/2020-01-11
There are 0 satellite images available for this date: 2020-01-11

2019-12-09
200 https://epic.gsfc.nasa.gov/api/natural/date/2019-12-09
There are 0 satellite images available for this date: 2019-12-09

2019-12-07
200 https://epic.gsfc.nasa.gov/api/natural/date/2019-12-07
There are 0 satellite images available for this date: 2019-12-07

2019-12-05
200 https://epic.gsfc.nasa.gov/api/n

200 https://epic.gsfc.nasa.gov/api/natural/date/2018-11-07
There are 12 satellite images available for this date: 2018-11-07
This is the first available image for that date
['https://epic.gsfc.nasa.gov/archive/natural/2018/11/07/thumbs/epic_1b_20181107021516.jpg', {'lat': -12.128906, 'lon': 151.347656}, 12]

2018-10-26
200 https://epic.gsfc.nasa.gov/api/natural/date/2018-10-26
There are 13 satellite images available for this date: 2018-10-26
This is the first available image for that date
['https://epic.gsfc.nasa.gov/archive/natural/2018/10/26/thumbs/epic_1b_20181026010436.jpg', {'lat': -6.943359, 'lon': 165.651855}, 13]

2018-10-21
200 https://epic.gsfc.nasa.gov/api/natural/date/2018-10-21
There are 2 satellite images available for this date: 2018-10-21
This is the first available image for that date
['https://epic.gsfc.nasa.gov/archive/natural/2018/10/21/thumbs/epic_1b_20181021041239.jpg', {'lat': -4.943848, 'lon': 117.246094}, 2]

2018-10-04
200 https://epic.gsfc.nasa.gov/api/natura

200 https://epic.gsfc.nasa.gov/api/natural/date/2018-04-18
There are 13 satellite images available for this date: 2018-04-18
This is the first available image for that date
['https://epic.gsfc.nasa.gov/archive/natural/2018/04/18/thumbs/epic_1b_20180418002712.jpg', {'lat': 16.369629, 'lon': 169.49707}, 13]

2018-04-03
200 https://epic.gsfc.nasa.gov/api/natural/date/2018-04-03
There are 10 satellite images available for this date: 2018-04-03
This is the first available image for that date
['https://epic.gsfc.nasa.gov/archive/natural/2018/04/03/thumbs/epic_1b_20180403000830.jpg', {'lat': 6.628418, 'lon': 170.939941}, 10]

2018-04-01
200 https://epic.gsfc.nasa.gov/api/natural/date/2018-04-01
There are 13 satellite images available for this date: 2018-04-01
This is the first available image for that date
['https://epic.gsfc.nasa.gov/archive/natural/2018/04/01/thumbs/epic_1b_20180401003633.jpg', {'lat': 5.273438, 'lon': 163.806152}, 13]

2018-03-01
200 https://epic.gsfc.nasa.gov/api/natural/

200 https://epic.gsfc.nasa.gov/api/natural/date/2017-06-04
There are 21 satellite images available for this date: 2017-06-04
This is the first available image for that date
['https://epic.gsfc.nasa.gov/archive/natural/2017/06/04/thumbs/epic_1b_20170604003634.jpg', {'lat': 25.393066, 'lon': -179.94873}, 21]

2017-06-04
200 https://epic.gsfc.nasa.gov/api/natural/date/2017-06-04
There are 21 satellite images available for this date: 2017-06-04
This is the first available image for that date
['https://epic.gsfc.nasa.gov/archive/natural/2017/06/04/thumbs/epic_1b_20170604003634.jpg', {'lat': 25.393066, 'lon': -179.94873}, 21]

2017-05-23
200 https://epic.gsfc.nasa.gov/api/natural/date/2017-05-23
There are 22 satellite images available for this date: 2017-05-23
This is the first available image for that date
['https://epic.gsfc.nasa.gov/archive/natural/2017/05/23/thumbs/epic_1b_20170523000830.jpg', {'lat': 26.242676, 'lon': -177.502441}, 22]

2017-05-10
200 https://epic.gsfc.nasa.gov/api/natu

200 https://epic.gsfc.nasa.gov/api/natural/date/2016-10-05
There are 13 satellite images available for this date: 2016-10-05
This is the first available image for that date
['https://epic.gsfc.nasa.gov/archive/natural/2016/10/05/thumbs/epic_1b_20161005002712.jpg', {'lat': 1.889648, 'lon': 162.399902}, 13]

2016-09-21
200 https://epic.gsfc.nasa.gov/api/natural/date/2016-09-21
There are 13 satellite images available for this date: 2016-09-21
This is the first available image for that date
['https://epic.gsfc.nasa.gov/archive/natural/2016/09/21/thumbs/epic_1b_20160921002712.jpg', {'lat': 4.094238, 'lon': 163.044434}, 13]

2016-09-13
200 https://epic.gsfc.nasa.gov/api/natural/date/2016-09-13
There are 13 satellite images available for this date: 2016-09-13
This is the first available image for that date
['https://epic.gsfc.nasa.gov/archive/natural/2016/09/13/thumbs/epic_1b_20160913000830.jpg', {'lat': 4.87793, 'lon': 169.40918}, 13]

2016-09-13
200 https://epic.gsfc.nasa.gov/api/natural/da

200 https://epic.gsfc.nasa.gov/api/natural/date/2015-12-01
There are 10 satellite images available for this date: 2015-12-01
This is the first available image for that date
['https://epic.gsfc.nasa.gov/archive/natural/2015/12/01/thumbs/epic_1b_20151201002712.jpg', {'lat': -16.501465, 'lon': 171.97998}, 10]

2015-11-24
200 https://epic.gsfc.nasa.gov/api/natural/date/2015-11-24
There are 9 satellite images available for this date: 2015-11-24
This is the first available image for that date
['https://epic.gsfc.nasa.gov/archive/natural/2015/11/24/thumbs/epic_1b_20151124002712.jpg', {'lat': -13.864746, 'lon': 169.189453}, 9]

2015-11-14
200 https://epic.gsfc.nasa.gov/api/natural/date/2015-11-14
There are 12 satellite images available for this date: 2015-11-14
This is the first available image for that date
['https://epic.gsfc.nasa.gov/archive/natural/2015/11/14/thumbs/epic_1b_20151114003634.jpg', {'lat': -10.012207, 'lon': 163.161621}, 12]

2015-11-12
200 https://epic.gsfc.nasa.gov/api/natur

In [8]:
# Display a sample of the new found data
df[20:30]

Unnamed: 0,v_num,v_name,erup_num,erup_cat,vei,start_y,start_m,start_d,end_y,end_m,end_d,lat,lon,start,end,delta,start_img,sat_lats,sat_lons,start_img_available_in_api
20,290250,Raikoke,22321,Confirmed Eruption,,2019,6,22,2019,7,1,48.292,153.25,2019-06-22,2019-07-01,9 days 00:00:00.000000000,https://epic.gsfc.nasa.gov/archive/natural/201...,20.3577,173.43,22
21,233020,"Fournaise, Piton de la",22318,Confirmed Eruption,,2019,6,11,2019,10,27,-21.244,55.708,2019-06-11,2019-10-27,138 days 00:00:00.000000000,https://epic.gsfc.nasa.gov/archive/natural/201...,22.6318,178.857,22
22,311120,Great Sitkin,22322,Confirmed Eruption,,2019,6,1,2019,6,7,52.076,-176.13,2019-06-01,2019-06-07,6 days 00:00:00.000000000,https://epic.gsfc.nasa.gov/archive/natural/201...,24.2944,177.664,22
23,290240,Sarychev Peak,22320,Confirmed Eruption,2.0,2019,5,16,2019,10,7,48.092,153.2,2019-05-16,2019-10-07,144 days 00:00:00.000000000,https://epic.gsfc.nasa.gov/archive/natural/201...,25.5542,-177.891,21
24,341040,Colima,22323,Confirmed Eruption,,2019,5,11,2019,7,12,19.514,-103.62,2019-05-11,2019-07-12,62 days 00:00:00.000000000,https://epic.gsfc.nasa.gov/archive/natural/201...,25.2686,171.98,22
25,282110,Asosan,22317,Confirmed Eruption,,2019,4,16,2020,2,17,32.884,131.104,2019-04-16,2020-02-17,307 days 00:00:00.000000000,https://epic.gsfc.nasa.gov/archive/natural/201...,17.3584,178.857,13
26,300260,Klyuchevskoy,22310,Confirmed Eruption,,2019,4,9,2019,7,2,56.056,160.642,2019-04-09,2019-07-02,84 days 00:00:00.000000000,https://epic.gsfc.nasa.gov/archive/natural/201...,13.1396,176.968,13
27,352090,Sangay,22312,Confirmed Eruption,,2019,3,26,2020,2,19,-2.005,-78.341,2019-03-26,2020-02-19,330 days 00:00:00.000000000,https://epic.gsfc.nasa.gov/archive/natural/201...,3.20801,173.738,11
28,344020,San Cristobal,22311,Confirmed Eruption,,2019,3,4,2019,3,4,12.702,-87.004,2019-03-04,2019-03-04,0 days 00:00:00.000000000,https://epic.gsfc.nasa.gov/archive/natural/201...,-12.085,170.178,13
29,233020,"Fournaise, Piton de la",22307,Confirmed Eruption,,2019,2,18,2019,3,10,-21.244,55.708,2019-02-18,2019-03-10,20 days 00:00:00.000000000,https://epic.gsfc.nasa.gov/archive/natural/201...,-19.5337,171.255,13


In [9]:
0/0

ZeroDivisionError: division by zero

In [10]:
# For this to work, it has to be an it, not a str
df.start_img_available_in_api.describe()

count    183.000000
mean      10.540984
std        7.684522
min        0.000000
25%        0.000000
50%       12.000000
75%       13.000000
max       22.000000
Name: start_img_available_in_api, dtype: float64

# Exceeding the Rate limit

In [9]:
import requests
from bs4 import BeautifulSoup

In [11]:
res = requests.get('https://www.google.com')
soup = BeautifulSoup(res.content)
soup

<!DOCTYPE html>
<html itemscope="" itemtype="http://schema.org/WebPage" lang="es"><head><meta content="Google.es permite acceder a la información mundial en castellano, catalán, gallego, euskara e inglés." name="description"/><meta content="noodp" name="robots"/><meta content="text/html; charset=utf-8" http-equiv="Content-Type"/><meta content="/images/branding/googleg/1x/googleg_standard_color_128dp.png" itemprop="image"/><title>Google</title><script nonce="wTTMm59ifSPD/1N64mJCWw==">(function(){window.google={kEI:'mU6UXqnYIMqelwTt0aCwAQ',kEXPI:'31',kBL:'DguZ'};google.sn='webhp';google.kHL='es';})();(function(){google.lc=[];google.li=0;google.getEI=function(a){for(var c;a&&(!a.getAttribute||!(c=a.getAttribute("eid")));)a=a.parentNode;return c||google.kEI};google.getLEI=function(a){for(var c=null;a&&(!a.getAttribute||!(c=a.getAttribute("leid")));)a=a.parentNode;return c};google.ml=function(){return null};google.time=function(){return Date.now()};google.log=function(a,c,b,d,g){if(b=google

In [12]:
res = requests.get('https://epic.gsfc.nasa.gov/archive/natural/2018/04/22/png/epic_1b_20180422003633.png')
soup = BeautifulSoup(res.content)
soup

ConnectionError: HTTPSConnectionPool(host='epic.gsfc.nasa.gov', port=443): Max retries exceeded with url: /archive/natural/2018/04/22/png/epic_1b_20180422003633.png (Caused by NewConnectionError('<urllib3.connection.VerifiedHTTPSConnection object at 0x7fe3c284f8d0>: Failed to establish a new connection: [Errno 101] Network is unreachable',))

In [None]:
df.to_csv('OUTPUT/enriched-data.csv')

In [None]:
display(Image(url=images[0][0]))

In [None]:
df[20:50]

In [None]:
df = pd.DataFrame(date_rng, columns=['date'])
df['data'] = np.random.randint(0,100, size=(len(date_rng)))
df.head()

In [None]:
date_rng = pd.date_range(start='1/1/2015', end='10/04/2020', freq='D')
date_rng

In [None]:
df['datetime'] = pd.to_datetime(df.date)
df = df.set_index('datetime')

#list only the second day of each month
#df[df.index.day == 2]

# calculate mean of each year
df.resample('M').mean()