# OpenAlex API - Pull SFU works


In [10]:
# import necessary libraries
import requests
import json
import pandas as pd
import os
from dotenv import load_dotenv

# load the .env file
# may need to create your own .env based on .env.example!
load_dotenv()

# pull proxies for the API call out of the .env file
proxy_url_http = os.environ.get("PROXY_URL_HTTP")
proxy_url_https = os.environ.get("PROXY_URL_HTTPS")

# prepare the proxies for the API call
proxies = {
    "http": proxy_url_http, 
    "https": proxy_url_https
}


##### NOTE:
This process will take approx 30 mins - 1 hr to run from start to finish, possibly faster, since OpenAlex switched to the new codebase. 

In [None]:
# initialize the API URL
# the cursor allows us to pull multiple pages of results to get all SFU works in one call. 
# NOTE: SFU's institutional id in OpenAlex is I18014f758
url_with_cursor = 'https://api.openalex.org/works?filter=institutions.id:I18014758&cursor={}'
cursor = '*'

# empty containter to store the results
full_results = []

# loop through pages
while cursor:
    
    # set cursor value and request page from OpenAlex
    url = url_with_cursor.format(cursor)
    
    print("\n" + url)
    page_with_results = requests.get(url)
    # page_with_results = requests.get(url, proxies=proxies) # change to this if not running locally
    page_with_results = page_with_results.json()
    
    # loop through partial list of results
    results = page_with_results['results']
    # printing results isn't entirely necessary but a good check to see if the code is doing something. 
    for i,work in enumerate(results):
        openalex_id = work['id'].replace("https://openalex.org/", "")
        print(openalex_id, end='\t' if (i+1)%10!=0 else '\n')
    
    full_results += results

    # update cursor to meta.next_cursor
    cursor = page_with_results['meta']['next_cursor']
    print("next cursor is: ", cursor)


https://api.openalex.org/works?filter=institutions.id:I18014758&cursor=*
W2140190241	W4288079944	W2107140090	W2064853889	W2141718064	W2030561037	W2165893637	W2116647894	W3123679541	W2765726697
W2010227280	W1981934656	W2127779696	W2115981184	W2159038577	W2163668399	W2081234327	W4252403066	W2084046180	W2982531601
W2095907159	W3121777248	W2020595149	W2147583867	W2236822143	next cursor is:  IlsxMDAuMCwgMjY4NywgJ2h0dHBzOi8vb3BlbmFsZXgub3JnL1cyMjM2ODIyMTQzJ10i

https://api.openalex.org/works?filter=institutions.id:I18014758&cursor=IlsxMDAuMCwgMjY4NywgJ2h0dHBzOi8vb3BlbmFsZXgub3JnL1cyMjM2ODIyMTQzJ10i
W2010155324	W2544493586	W2115482638	W2088864483	W1558773931	W2117368100	W2162792752	W2740924709	W2155347783	W2153028052
W4211141710	W2111887934	W3121464887	W2087107032	W2052279837	W2963444790	W2140679462	W2105316344	W2134813353	W2083722668
W1554573531	W2094520873	W2051353887	W2547623031	W2154841861	next cursor is:  IlsxMDAuMCwgMTg0NiwgJ2h0dHBzOi8vb3BlbmFsZXgub3JnL1cyMTU0ODQxODYxJ10i

https://api.

KeyboardInterrupt: 

### save data as csv

In [None]:
pd.DataFrame(full_results).to_csv('../data_pulls/sfu_all_works.csv', index = False)