# 1. Using an API to collate images

## Example usage: [the NASA images API](https://images.nasa.gov/docs/images.nasa.gov_api_docs.pdf)

### 1. Get the data

In [10]:
import requests 
import pandas as pd


search_items = ['europa', 'mars', 'voyager']

results = []


for i in search_items:
    url = "https://images-api.nasa.gov/search"
    params = {
    "q": i,
    "year_start": "1999",
    "year_end": "2020",
    "media_type": "image"}
    response = requests.get(url, params=params)
    results.append(response.json())

    

### 2. Convert the data into a pandas dataframe

In [28]:
dfs = []

for i in range(len(results)):
    dfs.append(pd.DataFrame(results[i]['collection']['items']))
    

df = pd.concat(dfs).reset_index(drop = True)

### 3. Extract the metadata of interest

In [31]:
title = [i[0]['title'] for i in df['data']]
URLs = [i[0]['href'] for i in df['links']]

df['title'] = title
df['URLs'] = URLs

In [None]:
df

### 4. Download the images of interest

In [None]:
for i in range(len(df)):
    response = requests.get(df['URLs'][i])
    with open("/Users/james/Documents/images/images/"+df['title'][i]+".jpeg", 'wb') as file:
        file.write(response.content)

In [19]:
with open("/Users/james/Documents/images/images/example.jpeg", 'wb') as file:
            file.write(response.content)

In [None]:
df.to_excel('/Users/james/Documents/images/images/data.xlsx')

   ## Exercise: Find an image API and try to access it using the `requests` module

# 2. Interrogating an existing dataset

In [9]:
import glob
import os
import matplotlib.pyplot as plt
from IPython.display import display, HTML, Image

files = glob.glob("/Users/james/Documents/images/images/*.jpeg")

In [10]:
vger = [i for i in files if "voyager" in i.lower()]

In [None]:
for image_path in vger:
    display(Image(filename=image_path, width=200))


## Exercise: Find an existing dataset and sub-sample it for a topic of interest