# OpenAlex API - SFU Institution Overview

In [6]:
# import necessary libraries
import requests
import json
import pandas as pd
import numpy as np
import ast
import os
from dotenv import load_dotenv

# load the .env file 
# may need to create your own .env based on .env.example
load_dotenv()

# pull proxies for the API call out of the .env file
proxy_url_http = os.environ.get("PROXY_URL_HTTP")
proxy_url_https = os.environ.get("PROXY_URL_HTTPS")

# prepare the proxies for the API call
proxies = {
    "http": proxy_url_http, 
    "https": proxy_url_https
}

# initialize the API url
url = 'https://api.openalex.org/institutions?search=simon fraser university'

#response = requests.get(url, proxies=proxies)
response = requests.get(url) # no proxy required when running locally but will need to use proxies param in future

# Check the status code and process the response
if response.status_code == 200:
    data = response.json()  # Parse the JSON response
else:
    print(f"Request failed with status code {response.status_code}")

# save sfu overview statistics as a dictionary
sfu_results = data['results'][0]

sfu_results.keys()


dict_keys(['id', 'ror', 'display_name', 'relevance_score', 'country_code', 'type', 'type_id', 'lineage', 'homepage_url', 'image_url', 'image_thumbnail_url', 'display_name_acronyms', 'display_name_alternatives', 'repositories', 'works_count', 'cited_by_count', 'summary_stats', 'ids', 'geo', 'international', 'associated_institutions', 'counts_by_year', 'roles', 'topics', 'topic_share', 'is_super_system', 'works_api_url', 'updated_date', 'created_date'])

### call the API to pull some at-a-glance stats about SFU

In [7]:
#response = requests.get(url, proxies=proxies)
response = requests.get(url) # no proxy required when running locally but will need to use proxies param in future

# Check the status code and process the response
if response.status_code == 200:
    data = response.json()  # Parse the JSON response
else:
    print(f"Request failed with status code {response.status_code}")

sfu_results = data['results'][0]

sfu_results.keys()

dict_keys(['id', 'ror', 'display_name', 'relevance_score', 'country_code', 'type', 'type_id', 'lineage', 'homepage_url', 'image_url', 'image_thumbnail_url', 'display_name_acronyms', 'display_name_alternatives', 'repositories', 'works_count', 'cited_by_count', 'summary_stats', 'ids', 'geo', 'international', 'associated_institutions', 'counts_by_year', 'roles', 'topics', 'topic_share', 'is_super_system', 'works_api_url', 'updated_date', 'created_date'])

##### 1. OpenAlex Institution ID

In [8]:
sfu_id = sfu_results['id']
sfu_id

'https://openalex.org/I18014758'

##### 2. Associated Institutions

In [9]:
sfu_associated_inst = pd.DataFrame(sfu_results['associated_institutions'])
sfu_associated_inst

Unnamed: 0,id,ror,display_name,country_code,type,relationship
0,https://openalex.org/I2802902549,https://ror.org/039cthy03,Bamfield Marine Sciences Centre,CA,education,child
1,https://openalex.org/I4210094865,https://ror.org/00hv6g197,Pacific Institute for Climate Solutions,CA,facility,child
2,https://openalex.org/I4210136721,https://ror.org/047c5ks69,Sunny Hill Health Centre for Children,CA,healthcare,related
3,https://openalex.org/I4210153200,https://ror.org/04n901w50,British Columbia Children's Hospital,CA,healthcare,related
4,https://openalex.org/I4210164805,https://ror.org/05c4nx247,B.C. Women's Hospital & Health Centre,CA,healthcare,related
5,https://openalex.org/I4387153203,https://ror.org/05ek4tb53,Public Knowledge Project,,other,child
6,https://openalex.org/I4388891788,https://ror.org/046n2jy73,Canadian Statistical Sciences Institute,,facility,child


##### 3. Summary Stats

We do not actually use this data anywhere, as I recalculate h-index manually later, and the other two metrics are not used. 

In [10]:
sfu_summary_stats = sfu_results['summary_stats']
sfu_summary_stats

{'2yr_mean_citedness': 4.614403778040142, 'h_index': 618, 'i10_index': 78963}

##### 4. Works and Citations

In [None]:
sfu_total_works = sfu_results['works_count']
sfu_total_works # this figure may not be up to date - when they swapped to Walden data it did not change this

80902

In [None]:
sfu_total_citations = sfu_results['cited_by_count']
sfu_total_citations # this figure may not be up to date - when they swapped to Walden data it did not change this

5635242

In [None]:
sfu_citations_per_work = sfu_total_citations/sfu_total_works
sfu_citations_per_work # this figure may not be up to date - using old works and citations -> wrong citations/work

69.65516303676053

##### 4. Works API link

In [14]:
sfu_works_api = sfu_results['works_api_url']
sfu_works_api

'https://api.openalex.org/works?filter=institutions.id:I18014758'

##### 5. Topics

In [15]:
sfu_topics = pd.DataFrame(sfu_results['topics'])[['id', 'display_name', 'count']]
sfu_topics.head()

Unnamed: 0,id,display_name,count
0,https://openalex.org/T10048,Particle physics theoretical and experimental ...,1732
1,https://openalex.org/T10527,High-Energy Particle Collisions Research,1321
2,https://openalex.org/T10302,Fish Ecology and Management Studies,1098
3,https://openalex.org/T10487,Plant and animal studies,1020
4,https://openalex.org/T10224,Quantum Chromodynamics and Particle Interactions,980


In [16]:
sfu_topic_share = pd.DataFrame(sfu_results['topic_share'])
sfu_topic_share['field_name'] = sfu_topic_share['field'].apply(lambda x: x['display_name'])

sfu_topic_share.head()

Unnamed: 0,id,display_name,value,subfield,field,domain,field_name
0,https://openalex.org/T10565,"Psychopathy, Forensic Psychiatry, Sexual Offen...",0.011886,"{'id': 'https://openalex.org/subfields/3203', ...","{'id': 'https://openalex.org/fields/32', 'disp...","{'id': 'https://openalex.org/domains/2', 'disp...",Psychology
1,https://openalex.org/T12210,Policy Transfer and Learning,0.011718,"{'id': 'https://openalex.org/subfields/3320', ...","{'id': 'https://openalex.org/fields/33', 'disp...","{'id': 'https://openalex.org/domains/2', 'disp...",Social Sciences
2,https://openalex.org/T10803,Innovative Human-Technology Interaction,0.010185,"{'id': 'https://openalex.org/subfields/1709', ...","{'id': 'https://openalex.org/fields/17', 'disp...","{'id': 'https://openalex.org/domains/3', 'disp...",Computer Science
3,https://openalex.org/T10574,Crime Patterns and Interventions,0.0101,"{'id': 'https://openalex.org/subfields/3312', ...","{'id': 'https://openalex.org/fields/33', 'disp...","{'id': 'https://openalex.org/domains/2', 'disp...",Social Sciences
4,https://openalex.org/T13655,Canadian Policy and Governance,0.009093,"{'id': 'https://openalex.org/subfields/3320', ...","{'id': 'https://openalex.org/fields/33', 'disp...","{'id': 'https://openalex.org/domains/2', 'disp...",Social Sciences


### Save summary stats as a df
This will be populated and updated later on

In [17]:
# the dashboard looks at 5 years, 10 years, and all-time. 
# no worries about the stand-in words and NaNs, this will all be populated when dealing in detail with works and authors. 
sfu_summary_dict = {
    "Time Frame": ['All-time', '10 years', '5 years'],
    "Publications" : [sfu_total_works, np.nan, np.nan], 
    "Citations": [sfu_total_citations, np.nan, np.nan], 
    "Citations per Publication": [sfu_citations_per_work, np.nan, np.nan], 
    "h-Index": [sfu_summary_stats['h_index'], np.nan, np.nan], 
    "Field-Weighted Citation Impact": ['not available in OpenAlex on the institutional level', np.nan, np.nan],
    "Authors": ['dont know yet', np.nan, np.nan]
}

sfu_summary_df = pd.DataFrame(sfu_summary_dict)
sfu_summary_df

Unnamed: 0,Time Frame,Publications,Citations,Citations per Publication,h-Index,Field-Weighted Citation Impact,Authors
0,All-time,80902.0,5635242.0,69.655163,618.0,not available in OpenAlex on the institutional...,dont know yet
1,10 years,,,,,,
2,5 years,,,,,,


### Save works API url for later

In [18]:
sfu_works_url = {
    "URL" : [sfu_results['works_api_url']]
}

sfu_works_url_df = pd.DataFrame(sfu_works_url)
sfu_works_url_df

Unnamed: 0,URL
0,https://api.openalex.org/works?filter=institut...


### Send necessary info to csv format:

In [19]:
sfu_topics.to_csv('../data_pulls/sfu_topics.csv', index = False)
sfu_associated_inst.to_csv('../data_pulls/sfu_associated_institutions.csv', index = False)
sfu_summary_df.to_csv('../data_pulls/sfu_summary_stats.csv', index = False)
sfu_works_url_df.to_csv('../data_pulls/sfu_works_url.csv', index = False)