In [9]:
import requests
import pandas as pd
import json
import os

# Enter BPIDs

In [10]:
BPIDS = ["2000323035", "3000201735", "3000123021"] # Utrecht  

# Prerequisites 

### 1. Functions for Extracting all parents and children

Parents look like this

```
// http://institution-service.live.cf.private.springer.com/v1/children/2000323035
{'institution': {'id': '2000323035',
  'name': 'Utrecht University',
  'type': 'Institution'},
 'parents': [{'institution': {'id': '3000174899',
    'name': 'UKB Consortium p/a SURFmarket B.V.',
    'type': 'Consortium'},
   'parents': []},
  {'institution': {'id': '3001974408',
    'name': 'UKB Consortium p/a SURFmarket B.V. Springer Compact',
    'type': 'Consortium'},
   'parents': []}]}
```

Children look like this

```
// http://institution-service.live.cf.private.springer.com/v1/children/3000201735

[
  {
    "id": "3000175179",
    "name": "Bayer Business Services GMBH",
    "type": "INSTITUTION",
    "validFrom": "1990-01-01T00:00:00",
    "validTo": "9999-12-31T00:00:00",
    "legacyId": "4559"
  },
  {
    "id": "3000171692",
    "name": "Bayer Pharma AG",
    "type": "INSTITUTION",
    "validFrom": "1990-01-01T00:00:00",
    "validTo": "9999-12-31T00:00:00",
    "legacyId": null
  },
etc...
]
```

In [13]:
api_parents = "http://institution-service.live.cf.private.springer.com/v1/hierarchy/{}"
api_children = "http://institution-service.live.cf.private.springer.com/v1/children/{}"

def gen_folder(name):
    if not os.path.exists(name):
        os.makedirs(name)
       
        
def get_related_bpids(BPID):
    print("======\n===\nChecking Top Level BPID: ", BPID, "\n===\n======")
    gen_folder(BPID)
    parents = requests.get(api_parents.format(BPID)).json()
    children = requests.get(api_children.format(BPID)).json()
    with open(BPID + '/parents.json', 'w') as outfile:
        json.dump(parents, outfile)
    with open(BPID + '/children.json', 'w') as outfile:
        json.dump(children, outfile)
    
    out = [BPID] # seed with main bpid
    for x in parents["parents"]: 
        out.append(x['institution']['id'])
        if x['parents']:  # just go one level up
            for z in x["parents"]: 
                out.append(z['institution']['id'])            
    for x in children:
        out.append(x['id'])
    print("...\nRelated BPIDs found: ", len(out), str(out))
    return out

### 2. Functions for Extracting KBARTs

In [18]:
url_books = "http://ml-read-online.live.sl.i.springer.com:7655/admin-portal/201802052054-134/kbart/book-report.xqy?bp={}"
url_journals = "http://ml-read-online.live.sl.i.springer.com:7655/admin-portal/201802052054-134/kbart/journal-report.xqy?bp={}"
url_nature = "http://entitlements-api.live.private.springernature.io/entitlements/v1/journals/kbart?bpid={}"


def get_kbarts(BPID_LIST, toplevel):
    print("===\nExtracting KBARTs...")
    for BPID in BPID_LIST:
        print("Item: ", BPID)
        print("..books...")
        d = requests.get(url_books.format(BPID), auth=('casper-db-user', 'casper'))
        with open("{}/{}_books.kbart".format(toplevel, BPID), 'wb') as outfile:
            outfile.write(d.text.encode("utf-8"))
        print("done")
        #
        print("..journals...")
        d = requests.get(url_journals.format(BPID), auth=('casper-db-user', 'casper'))
        with open("{}/{}_journals.kbart".format(toplevel, BPID), 'wb') as outfile:
            outfile.write(d.text.encode("utf-8"))
        print("done")
        #
        print("..nature...")
        d = requests.get(url_nature.format(BPID), auth=('casper-db-user', 'casper'))
        with open("{}/{}_journals_nature.kbart".format(toplevel, BPID), 'wb') as outfile:
            outfile.write(d.text.encode("utf-8"))
        print("done")


### 3. Functions for Extracting Usage

> TODO 

# Running the Extraction

Note the results of the extraction are saves in the same folder where this notebook is located. 
Each top level BPIDs generates a separate folder, containing parents/children info as well as KBARTs for all of them (including the top level BPID).

In [None]:
for b in BPIDS:
    related = get_related_bpids(b)
    get_kbarts(related, b)
    print("**** completed ****")

===
Checking Top Level BPID:  2000323035 
===
...
Related BPIDs found:  3 ['2000323035', '3000174899', '3001974408']
===
Extracting KBARTs...
Item:  2000323035
..books...
done
..journals...
done
..nature...
done
Item:  3000174899
..books...
done
..journals...
done
..nature...
done
Item:  3001974408
..books...
done
..journals...
done
..nature...
done
**** completed ****
===
Checking Top Level BPID:  3000201735 
===
...
Related BPIDs found:  5 ['3000201735', '3000175179', '3000171692', '1600001227', '8200982076']
===
Extracting KBARTs...
Item:  3000201735
..books...
done
..journals...
done
..nature...
done
Item:  3000175179
..books...
done
..journals...
done
..nature...
done
Item:  3000171692
..books...
done
..journals...
done
..nature...
done
Item:  1600001227
..books...
done
..journals...
done
..nature...
done
Item:  8200982076
..books...
done
..journals...
done
..nature...
done
**** completed ****
===
Checking Top Level BPID:  3000123021 
===
...
Related BPIDs found:  9 ['3000123021',