In [None]:
import requests
import json
import glob
import string
from secrets import ASTROBIN_KEY, ASTROBIN_SECRET

In [None]:
astrobin_url = 'http://www.astrobin.com'
api_url = '/api/v1/image/'
base_params = {'api_key': ASTROBIN_KEY, 'api_secret': ASTROBIN_SECRET}
data_dir = 'data/'
jupiter_dir = data_dir+'jupiter/'
moon_dir = data_dir+'moon/'

In [None]:
def search(queries):
    results = []
    for query in queries:
        query_results = []
        print 'Querying... ' + str(query)
        query.update(base_params)
        search_request = requests.get(astrobin_url+api_url, params=query)
        if search_request.status_code == 200:
            page_results = json.loads(search_request.text)
            print 'Attempting to fetch ' + str(page_results['meta']['total_count']) + ' results...'
            query_results.extend(page_results['objects'])
            while page_results['meta']['next']:
                search_request = requests.get(astrobin_url+page_results['meta']['next'])
                if search_request.status_code == 200:
                    page_results = json.loads(search_request.text)
                    query_results.extend(page_results['objects'])
                    print page_results['meta']['offset']
                else:
                    print 'NEXT PAGE FAILED: ' + str(page_results['meta']['offset'])
            
            print 'No more pages!'
            
        else:
            print 'SEARCH FAILED'
            print query
        
        print 'Fetched ' + str(len(query_results)) + ' results for query ' + str(query)
        results.extend(query_results)
    
    print 'Fetched ' + str(len(results)) + ' results total.'
    return results

In [None]:
def build_metadata(results):
    metadata = {}
    for result in results:
        if result['id'] not in metadata:
            metadata[result['id']] = result
    
    print 'Found ' + str(len(metadata.keys())) + ' unique metadata entries for ' + str(len(results)) + ' results.'
    return metadata

In [None]:
def download_image(details, image_dir=''):
    image_request = requests.get(details['url_real'])
    if image_request.status_code == 200:
        with open(image_dir+details['id']+'.jpg', 'wb') as image:
            image.write(image_request.content)
    else:
        raise Exception('IMAGE REQUEST FAILED: ' + details['id'])

In [None]:
def download_all_images(metadata, directory):
    print 'Attempting to download '+str(len(metadata.keys()))+' images...'
    found_images = glob.glob(directory+'*.jpg')
    all_images = {filename.translate(None, string.letters).translate(None, string.punctuation): True for filename in found_images}
    downloaded_images = 0
    for image_id, image_details in metadata.items():
        if image_id not in all_images:
            try:
                download_image(image_details, directory)
            except Exception as e:
                print e
            else:
                downloaded_images += 1
                # In case duplicate results weren't already removed.
                all_images[image_id] = True
    print 'Found '+str(len(found_images))+' and downloaded '+str(downloaded_images)+' images for a total of '+str(len(all_images.keys()))+' images.'

In [None]:
jupiter_results = search([{'title__icontains': 'jupiter'}, {'description__icontains': 'jupiter'}])

In [None]:
with open(jupiter_dir+'jupiter_results.json','w') as outfile:
    json.dump(jupiter_results, outfile)

In [None]:
jupiter_metadata = build_metadata(jupiter_results)

In [None]:
with open(jupiter_dir+'jupiter_metadata.json', 'w') as outfile:
    json.dump(jupiter_metadata, outfile)

In [None]:
download_all_images(jupiter_metadata, jupiter_dir)

In [None]:
moon_results = search([{'title__icontains': 'moon'}, {'description__icontains': 'moon'}])

In [None]:
with open(moon_dir+'moon_results.json','w') as outfile:
    json.dump(moon_results, outfile)

In [None]:
moon_metadata = build_metadata(moon_results)

In [None]:
with open(moon_dir+'moon_metadata.json', 'w') as outfile:
    json.dump(moon_metadata, outfile)