# create a list of sources of Japanese author/article data for OpenAlex
- sources that are in japan:
  - journals (and publishers?)
  - repositories (and their host_orgs?)
  - Report numbers for the sources and for works


In [1]:
import requests

Sources with `country_code=JP`

In [2]:
country_code = 'JP'
url = f"https://api.openalex.org/sources"
params = {
    'filter': f'country_code:{country_code}',
}
r = requests.get(url, params=params)
num_sources = r.json()['meta']['count']
print(f"There are {num_sources} sources with country_code 'JP' (Japan)")

There are 2162 sources with country_code 'JP' (Japan)


In [3]:
params = {
    'filter': f'country_code:{country_code}',
    'group_by': 'type',
}
r = requests.get(url, params=params)
print(r.json()['group_by'])

[{'key': 'journal', 'key_display_name': 'journal', 'count': 2162}]


All of the Japanese sources are of type: `journal`.

In [4]:
# page through to get all sources
# use paging technique from `paging.ipynb`
# url with a placeholder for page number
country_code = 'JP'
url = f"https://api.openalex.org/sources"
params = {
    'filter': f'country_code:{country_code}',
    'page': 1,  # initaliaze `page` param to 1
}

has_more_pages = True
fewer_than_10k_results = True

japanese_sources = []

# loop through pages
loop_index = 0
while has_more_pages and fewer_than_10k_results:
    
    page_with_results = requests.get(url, params=params).json()
    
    # loop through partial list of results
    results = page_with_results['results']
    for source in results:
        japanese_sources.append(source)

    # next page
    params['page'] += 1
    
    # end loop when either there are no more results on the requested page 
    # or the next request would exceed 10,000 results
    per_page = page_with_results['meta']['per_page']
    has_more_pages = len(results) == per_page
    fewer_than_10k_results = per_page * params['page'] <= 10000
    loop_index += 1
print(f"collected {len(japanese_sources)} sources (using {loop_index+1} api calls)")

collected 2162 sources (using 88 api calls)


In [5]:
from collections import Counter
c = Counter([source['host_organization'] for source in japanese_sources if source['host_organization']])
print(f"The Japanese sources are associated with {len(c)} different host organizations (publishers).")

The Japanese sources are associated with 181 different host organizations (publishers).


In [6]:
num_works = sum([source['works_count'] for source in japanese_sources])
print(f"There are {num_works} works (articles) with Japanese sources.")

There are 3700657 works (articles) with Japanese sources.
