# Question: How are number of works counted when looking at institutions within a country?

In [13]:
import requests
country_code = 'ES'
url = f"https://api.openalex.org/works"
params = {
    'filter': f'institutions.country_code:{country_code}',
    'group_by': 'institutions.id',
}
r = requests.get(url, params=params)

In [14]:
counts_by_institutions_from_works_endpoint = r.json()['group_by']

In [16]:
counts_by_institutions_from_works_endpoint[0]

{'key': 'https://openalex.org/I71999127',
 'key_display_name': 'University of Barcelona',
 'count': 106711}

In [18]:
country_code = 'ES'
# url with a placeholder for page number
url = f"https://api.openalex.org/institutions"
params = {
    'filter': f'country_code:{country_code}',
    'page': 1,  # initaliaze `page` param to 1
}

has_more_pages = True
fewer_than_10k_results = True

institutions_data_from_institutions_endpoint = []

# loop through pages
loop_index = 0
while has_more_pages and fewer_than_10k_results:
    
    page_with_results = requests.get(url, params=params).json()
    
    # loop through partial list of results
    results = page_with_results['results']
    for api_result in results:
        # # Collect the fields we are interested in, for this source
        # source = {field: api_result[field] for field in fields}
        # Append this source to our `japanese_sourcers` list
        institutions_data_from_institutions_endpoint.append(api_result)

    # next page
    params['page'] += 1
    
    # end loop when either there are no more results on the requested page 
    # or the next request would exceed 10,000 results
    per_page = page_with_results['meta']['per_page']
    has_more_pages = len(results) == per_page
    fewer_than_10k_results = per_page * params['page'] <= 10000
    loop_index += 1
print(f"collected {len(institutions_data_from_institutions_endpoint)} sources (using {loop_index+1} api calls)")

collected 1804 sources (using 74 api calls)


In [24]:
keyed_counts = {item['key']: item for item in counts_by_institutions_from_works_endpoint}
data = []
for inst in institutions_data_from_institutions_endpoint:
    id = inst['id']
    c = keyed_counts.get(id)
    if c:
        data.append({
            'id': id,
            'count1': inst['works_count'],
            'count2': c['count'],
        })

In [25]:
data

[{'id': 'https://openalex.org/I71999127', 'count1': 106775, 'count2': 106711},
 {'id': 'https://openalex.org/I121748325', 'count1': 101654, 'count2': 101658},
 {'id': 'https://openalex.org/I123044942', 'count1': 88726, 'count2': 88466},
 {'id': 'https://openalex.org/I16097986', 'count1': 75688, 'count2': 74845},
 {'id': 'https://openalex.org/I173304897', 'count1': 70325, 'count2': 70273},
 {'id': 'https://openalex.org/I63634437', 'count1': 63111, 'count2': 61569},
 {'id': 'https://openalex.org/I79238269', 'count1': 58094, 'count2': 58138},
 {'id': 'https://openalex.org/I169108374', 'count1': 55330, 'count2': 55244},
 {'id': 'https://openalex.org/I9617848', 'count1': 51480, 'count2': 51501},
 {'id': 'https://openalex.org/I200284239', 'count1': 49358, 'count2': 49181},
 {'id': 'https://openalex.org/I255234318', 'count1': 47887, 'count2': 47898},
 {'id': 'https://openalex.org/I88060688', 'count1': 42417, 'count2': 42434},
 {'id': 'https://openalex.org/I60053951', 'count1': 42409, 'count2'