In [1]:
import json

In [2]:
import requests
from urllib.parse import urlencode
import uuid
import hashlib
import base64
from email.utils import formatdate

class EDAN:
    edan_base = 'http://edan.si.edu/metadata/v2.0/collections/search.htm' 
    
    def __init__(self, app_id, app_key):
        self.app_id = app_id
        self.app_key = app_key
        self.session = requests.Session()
    def query(self, query_string, start=0, rows=10):
        request_date = formatdate()
        nonce = str(uuid.uuid4())
        query_params = {'q': query_string,
                        'rows': rows,
                        'start': start}
        query_param_string = urlencode(query_params)
        string_to_sign = '\n'.join([nonce, query_param_string, request_date, 
                                    self.app_key])

        hashed_string = hashlib.sha1(string_to_sign.encode()).hexdigest()
        encoded_string = base64.b64encode(hashed_string.encode())
        request_headers = {'X-AppId': self.app_id,
                           'X-Nonce': nonce,
                           'X-RequestDate': request_date,
                           'X-AuthContent': encoded_string}

        r = self.session.get(self.edan_base, params = query_params, 
                             headers = request_headers)
        response_json = r.json()
        return response_json
    
    def fetch_all(self, query_string, step=100):
        results = []
        quick_result = self.query(query_string, rows=1)
        if 'rowCount' in quick_result:
            result_count = quick_result['rowCount']
        if result_count > 0:
            for start in range(0, result_count, step):
                edan_json = self.query(query_string, start=start, rows=step)
                edan_rows = edan_json['rows']
                results += edan_rows
        return results
        

In [3]:
with open('edan_api_key.json') as api_json:
    edan_key = json.load(api_json)
print(edan_key['APP_ID'])

OCIO_DATA_SCIENCE


In [4]:
edan = EDAN(edan_key['APP_ID'], edan_key['APP_KEY'])

In [5]:
NEGATIVE_QUERY = '2007.1.69*'
PRINT_QUERY = '2012.137*'

In [6]:
test_call = edan.query(NEGATIVE_QUERY, start=10, rows=1)
test_call

{'rows': [{'id': 'edanmdm-nmaahc_2007.1.69.1.11.A',
   'title': 'Studio Portrait of a Couple Sitting, Ruth Ann Phinesee',
   'unitCode': 'NMAAHC',
   'linkedId': '0',
   'type': 'edanmdm',
   'url': 'edanmdm:nmaahc_2007.1.69.1.11.A',
   'content': {'descriptiveNonRepeating': {'record_ID': 'nmaahc_2007.1.69.1.11.A',
     'online_media': {'mediaCount': '1',
      'media': [{'thumbnail': 'http://ids.si.edu/ids/deliveryService?id=NMAAHC-HCA_1_11_A',
        'idsId': 'NMAAHC-HCA_1_11_A',
        'type': 'Images',
        'content': 'http://ids.si.edu/ids/deliveryService?id=NMAAHC-HCA_1_11_A'}]},
     'unit_code': 'NMAAHC',
     'title_sort': 'STUDIO PORTRAIT OF A COUPLE SITTING, RUTH ANN PHINESEE',
     'title': {'label': 'Object Name',
      'content': 'Studio Portrait of a Couple Sitting, Ruth Ann Phinesee'},
     'data_source': 'National Museum of African American History and Culture'},
    'indexedStructured': {'object_type': ['Portraits', 'Photographs'],
     'geoLocation': [{'L1': {'t

In [7]:
neg_results = edan.fetch_all(NEGATIVE_QUERY)
len(neg_results)

4272

In [8]:
print_results = edan.fetch_all(PRINT_QUERY)
len(print_results)

579

In [9]:
combined_metadata = neg_results + print_results
combined_metadata = sorted(combined_metadata, key=lambda k: k['id']) 
len(combined_metadata)

4851

In [10]:
with open('data/metadata/edan_anderson_photos.json','w') as json_out:
    json.dump(combined_metadata, json_out, indent=2)