Should study [open-context-jupyter/Open Context Measurements.ipynb at master · ekansa/open-context-jupyter](https://github.com/ekansa/open-context-jupyter/blob/master/notebooks/Open%20Context%20Measurements.ipynb)

May make use of [digitalbazaar/pyld: JSON-LD processor written in Python](https://github.com/digitalbazaar/pyld)

In [1]:
from itertools import islice
import json
from urllib.parse import urljoin
from urllib.parse import urlencode, quote_plus

import requests
from requests.packages.urllib3.util.retry import Retry
from requests.adapters import HTTPAdapter

from pyld import jsonld

import pandas as pd
import numpy as np
from pandas import DataFrame, Series, Index

In [2]:
def filter_none(d):
    return dict([(k,v) for (k,v) in d.items() if v is not None])


class OpenContextAPI(object):
    def __init__(self, api_url="https://opencontext.org/"):
        self.api_url = api_url
        self.session = requests.Session()
        retries = Retry(total=5,
                backoff_factor=0.3,
                status_forcelist=[ 500, 502, 503, 504 ])
        self.session.mount(self.api_url, HTTPAdapter(max_retries=retries))
        
    def _pager(self, url):
        while url:
            r = self.session.get(url)
            if r.status_code == 200:
                features = r.json().get('features', [])
                yield from features
            else:
                # to do: figure what to do when non-200 status code
                pass
            url = r.json().get('next-json', None)
        
        
    def projects_search(self, start=None, stop=None, rows=None):

        kw = filter_none({
            'start': start,
            'stop': stop,
            'rows': rows
        })
        
        url = urljoin(self.api_url, "projects-search.json") + "?" + urlencode(kw, quote_via=quote_plus)
        yield from self._pager(url)
        
                
    def project(self, project_id):
        
        url = urljoin(self.api_url, "projects/{}.json".format(project_id))
        r = self.session.get(url)
        if r.status_code == 200:
            return r.json()
        
    def subjects_search(self, project_id):
        """
        start with https://opencontext.org/subjects-search.json/?proj=12-presidio-of-san-francisco
        
        more complex possibilties:
        https://opencontext.org/subjects-search.json/?proj=12-presidio-of-san-francisco#10/37.8001/-122.4559/20/any/Google-Satellite.json
        """
        url = urljoin(self.api_url, "subjects-search.json/") + "?proj={}".format(project_id)
        print (url)
        yield from self._pager(url)


In [3]:
oc = OpenContextAPI()

In [4]:
features = oc.projects_search(rows=1000)

projects_df = DataFrame(list(islice(features,None)))
projects_df

Unnamed: 0,category,geometry,id,label,properties,rdfs:isDefinedBy,type
0,oc-api:geo-record,"{'id': '#geo-rec-geom-1-of-109', 'type': 'Poin...",#record-1-of-109,Kenan Tepe,"{'id': '#rec-1-of-109', 'feature-type': 'item ...",http://opencontext.org/projects/3DE4CD9C-259E-...,Feature
1,oc-api:geo-record,"{'id': '#geo-rec-geom-2-of-109', 'type': 'Poin...",#record-2-of-109,"Architecture and Urbanism at Seyitömer Höyük, ...","{'id': '#rec-2-of-109', 'feature-type': 'item ...",http://opencontext.org/projects/347286db-b6c6-...,Feature
2,oc-api:geo-record,"{'id': '#geo-rec-geom-3-of-109', 'type': 'Poin...",#record-3-of-109,Iraq Heritage Program,"{'id': '#rec-3-of-109', 'feature-type': 'item ...",http://opencontext.org/projects/GHF1PRJ0000000025,Feature
3,oc-api:geo-record,"{'id': '#geo-rec-geom-4-of-109', 'type': 'Poin...",#record-4-of-109,ARCE Sphinx Project 1979-1983 Archive,"{'id': '#rec-4-of-109', 'feature-type': 'item ...",http://opencontext.org/projects/141e814a-ba2d-...,Feature
4,oc-api:geo-record,"{'id': '#geo-rec-geom-5-of-109', 'type': 'Poin...",#record-5-of-109,Bade Museum,"{'id': '#rec-5-of-109', 'feature-type': 'item ...",http://opencontext.org/projects/B4345F6A-F926-...,Feature
5,oc-api:geo-record,"{'id': '#geo-rec-geom-6-of-109', 'type': 'Poin...",#record-6-of-109,Dhiban Excavation and Development Project,"{'id': '#rec-6-of-109', 'feature-type': 'item ...",http://opencontext.org/projects/01D080DF-2F6B-...,Feature
6,oc-api:geo-record,"{'id': '#geo-rec-geom-7-of-109', 'type': 'Poin...",#record-7-of-109,Zooarchaeology of Öküzini Cave,"{'id': '#rec-7-of-109', 'feature-type': 'item ...",http://opencontext.org/projects/8894EEC0-DC96-...,Feature
7,oc-api:geo-record,"{'id': '#geo-rec-geom-8-of-109', 'type': 'Poin...",#record-8-of-109,Murlo,"{'id': '#rec-8-of-109', 'feature-type': 'item ...",http://opencontext.org/projects/DF043419-F23B-...,Feature
8,oc-api:geo-record,"{'id': '#geo-rec-geom-10-of-109', 'type': 'Poi...",#record-10-of-109,Çukuriçi Höyük Zooarchaeology,"{'id': '#rec-10-of-109', 'feature-type': 'item...",http://opencontext.org/projects/BC90D462-6639-...,Feature
9,oc-api:geo-record,"{'id': '#geo-rec-geom-11-of-109', 'type': 'Poi...",#record-11-of-109,Köşk Höyük Faunal Data,"{'id': '#rec-11-of-109', 'feature-type': 'item...",http://opencontext.org/projects/05F5B702-2967-...,Feature


I don't understand why we get only 78 results and not 109.

# Studying a specific project

https://opencontext.org/projects/C5B4F73B-5EF8-4099-590E-B0275EDBA2A7  --> Presidio project

aka https://opencontext.org/projects/12-presidio-of-san-francisco

In [5]:
projects_df.iloc[0].properties.get('uri')

'http://opencontext.org/projects/3DE4CD9C-259E-4C14-9B03-8B10454BA66E'

In [6]:
import json

k = projects_df.apply(lambda row: row.properties.get('uri'), axis=1)
k.to_dict()

{0: 'http://opencontext.org/projects/3DE4CD9C-259E-4C14-9B03-8B10454BA66E',
 1: 'http://opencontext.org/projects/347286db-b6c6-4fd2-b3bd-b50316b0cb9f',
 2: 'http://opencontext.org/projects/GHF1PRJ0000000025',
 3: 'http://opencontext.org/projects/141e814a-ba2d-4560-879f-80f1afb019e9',
 4: 'http://opencontext.org/projects/B4345F6A-F926-4062-144E-3FBC175CC7B6',
 5: 'http://opencontext.org/projects/01D080DF-2F6B-4F59-BCF0-87543AC89574',
 6: 'http://opencontext.org/projects/8894EEC0-DC96-4304-1EFC-4572FD91717A',
 7: 'http://opencontext.org/projects/DF043419-F23B-41DA-7E4D-EE52AF22F92F',
 8: 'http://opencontext.org/projects/BC90D462-6639-4087-8527-6BB9E528E07D',
 9: 'http://opencontext.org/projects/05F5B702-2967-49B1-FEAA-9B2AA0184513',
 10: 'http://opencontext.org/projects/731B0670-CE2A-414A-8EF6-9C050A1C60F5',
 11: 'http://opencontext.org/projects/1B426F7C-99EC-4322-4069-E8DBD927CCF1',
 12: 'http://opencontext.org/projects/CF179695-1E6A-440F-1DDB-4FEA7B02A5B5',
 13: 'http://opencontext.org

In [7]:
#presidio project is not in that list

projects_df[projects_df.apply(lambda row: row.properties.get('uri'), axis=1) == 'https://opencontext.org/projects/C5B4F73B-5EF8-4099-590E-B0275EDBA2A7']

Unnamed: 0,category,geometry,id,label,properties,rdfs:isDefinedBy,type


In [8]:
oc.project('C5B4F73B-5EF8-4099-590E-B0275EDBA2A7')

{'@context': ['https://opencontext.org/contexts/item.json',
  'http://geojson.org/geojson-ld/geojson-context.jsonld',
  {}],
 'bibo:status': [{'id': 'http://opencontext.org/vocabularies/oc-general/edit-level-3',
   'label': 'Managing editor reviewed',
   'slug': 'oc-gen-edit-level-3'},
  {'id': 'http://purl.org/ontology/bibo/status/peerReviewed',
   'label': 'Peer reviewed',
   'slug': 'bibo-status-peerreviewed'}],
 'dc-terms:abstract': "<p>The Presidio of San Francisco is a national park site measuring 1491 acres, administered jointly by the Presidio Trust and the National Park Service. The Presidio was in continuous use as a military post from 1776 to 1994, serving under the flags of Spain, Mexico, and the United States. During the transition from an active Army Post to a National Park, an update to the Presidio\x92s National Historic Landmark designation was undertaken. This effort documented nearly 4000 sites, buildings, structures, and objects as contributing features to the Landm

* Data Records
* Media

In [9]:
# this takes a while to get through all 1000+ records 
# limit this to 50

for (i, record) in enumerate(islice(oc.subjects_search('12-presidio-of-san-francisco'),50)):
    print ("\r", i, end="")

https://opencontext.org/subjects-search.json/?proj=12-presidio-of-san-francisco
 49