# OpenAlex API - Pull SFU authors

In [3]:
# import necessary libraries 
import requests
import json
import pandas as pd
import os
from dotenv import load_dotenv

# load the .env file
# may need to create your own based on .env.example!
load_dotenv()

# pull proxies for the API call out of the .env file
proxy_url_http = os.environ.get("PROXY_URL_HTTP")
proxy_url_https = os.environ.get("PROXY_URL_HTTPS")

# prepare the proxies for the API call
proxies = {
    "http": proxy_url_http, 
    "https": proxy_url_https
}


##### NOTE:
This process will take a couple of mins to run from start to finish

In [4]:
# initialize the API URL. 
# the cursor allows us to pull multiple pages of results to get all SFU authors in one go. 
# NOTE: SFU's Institution ID in OpenAlex is I18014758
url_with_cursor = 'https://api.openalex.org/authors?page=1&filter=last_known_institutions.id:i18014758&cursor={}'
cursor = '*'

# empty container to store results later
full_results = []

# loop through pages
while cursor:
    
    # set cursor value and request page from OpenAlex
    url = url_with_cursor.format(cursor)
    
    print("\n" + url)
    page_with_results = requests.get(url)
    # page_with_results = requests.get(url, proxies=proxies) # chenge to this if not running locally
    page_with_results = page_with_results.json()
    
    # loop through partial list of results
    results = page_with_results['results']
    # printing the results here is not strictly necessary but it is nice to see that the code is running in real time
    for i,author in enumerate(results):
        openalex_id = author['id'].replace("https://openalex.org/", "")
        print(openalex_id, end='\t' if (i+1)%10!=0 else '\n')
    
    full_results += results

    # update cursor to meta.next_cursor
    cursor = page_with_results['meta']['next_cursor']
    print("next cursor is: ", cursor)


https://api.openalex.org/authors?page=1&filter=last_known_institutions.id:i18014758&cursor=*
A5114378471	A5019316470	A5077377484	A5039614567	A5100397026	A5100728059	A5029004287	A5107886267	A5048009605	A5100405681
A5023555343	A5030251424	A5088720034	A5100354297	A5034865907	A5028440357	A5100413849	A5077475073	A5039311485	A5062247330
A5007584535	A5018086501	A5080164344	A5035673972	A5090873433	next cursor is:  Ils2NDMsICdodHRwczovL29wZW5hbGV4Lm9yZy9BNTA5MDg3MzQzMyddIg==

https://api.openalex.org/authors?page=1&filter=last_known_institutions.id:i18014758&cursor=Ils2NDMsICdodHRwczovL29wZW5hbGV4Lm9yZy9BNTA5MDg3MzQzMyddIg==
A5100371992	A5021515928	A5077202069	A5029972713	A5018486913	A5005684483	A5006194799	A5068175779	A5031049285	A5050825978
A5105504768	A5072684302	A5005993212	A5090621082	A5089046130	A5039744323	A5012561411	A5112228279	A5076154696	A5029261392
A5113500609	A5076714348	A5081644676	A5102732555	A5061285579	next cursor is:  IlszOTksICdodHRwczovL29wZW5hbGV4Lm9yZy9BNTA2MTI4NTU3OSddIg

### save data as csv

In [5]:
pd.DataFrame(full_results).to_csv('../data_pulls/sfu_all_authors.csv', index = False)