In [21]:
import requests
import pandas as pd
import os
import numpy as np

from dotenv import load_dotenv
import os

# Load environment variables from .env file
load_dotenv()

# Get API key from environment variables
API_KEY = os.getenv('API_KEY')

# Sample API URL construction (you will need to register for API key)
base_url = "https://api.data.gov/ed/collegescorecard/v1/schools"

In [None]:
params = {
    "api_key": API_KEY,
    "school.name": "Michigan State University",
    "fields": "school.name,latest.admissions.admission_rate.overall,latest.student.demographics.race_ethnicity.asian,latest.student.demographics.race_ethnicity.white,latest.student.demographics.race_ethnicity.hispanic,latest.student.demographics.race_ethnicity.black",
    "per_page": 10
}

response = requests.get(base_url, params=params)
data = response.json()
print(data)

In [None]:
# Convert to pandas dataframe
df = pd.json_normalize(data['results'])
df

In [None]:
url = "https://api.reporter.nih.gov/v2/projects/search"
headers = {"Content-Type": "application/json"}
payload = {
    "criteria": {
        "fy": [2024],
        "organization_name": ["University of California San Diego"]
    },
    "include_fields": ["organization", "fiscal_year", "total_cost"],
    "limit": 100
}

resp = requests.post(url, json=payload)
data = resp.json()
data['results']

Reference: https://support.qs.com/hc/en-gb/articles/4410488025106-QS-World-University-Rankings-by-Subject
### The rankings columns are:
* Academic Reputation (30% weight)
-- The Academic Reputation (AR) indicator measures the reputation of institutions and their programmes by asking academic experts to nominate universities based on their subject area of expertise. Pioneered by QS in 2004, it asks the question: which universities are demonstrating academic excellence? To answer this we collect and distil the collective intelligence of academics from around the world via our Academic Survey, evaluating nominations for approximately 7000 institutions each year.The indicator not only illuminates the quality of an institution's research, but also their approach to academic partnerships, their strategic impact, their educational innovativeness and the impact they have made on education and society at large.
The indicator is the centrepiece of almost all of the rankings across the QS portfolio. 

* Employer Reputation (15% weight)
-- The Employer Reputation (ER) indicator measures the reputation of institutions and their programmes among employers. We remain the only major ranking to focus on this vital aspect of a student's educational journey.

* Citations per Paper
-- The Citations per Paper (CPP) indicator measures the impact and quality of the scientific work done by institutions, on average per publication.

* H-Index
-- The h-index is an index that attempts to measure both the productivity and impact of the published work of a scientist or scholar. The index is based on the set of the scientist’s most cited papers and the number of citations that they have received in other publications. It can also be applied to the productivity and impact of a group of scientists, such as a department, or an institution (as in the case of our indicator), or a country, as well as a scholarly journal. The index is defined as the maximum value of h such that the given entity (author, journal, department, institution, etc.) has published at least h papers that have each been cited at least h times (https://doi.org/10.1073/pnas.0507655102). We use institution-level H Index.

* International Research Network
-- International Research Network (IRN) is a measure of an institution's success in creating and sustaining research partnerships with institutions in other locations. The indicator measures how diverse and rich an institution's research network is by looking at the number of different countries represented, and whether these relationships are renewed and repeated. We only consider sustained partnerships, defined as those which result in three or more joint papers published in a five-year period.


In [59]:
file_path = '2025_QS_rankings.xlsx'
# Reload with correct settings: skip to row 10 (0-based), treat row 10 as header
df_qs = pd.read_excel(file_path, sheet_name="Life Sciences & Medicine", skiprows=10, header=0)

# Drop rows with missing Institution (bottom padding, if any)
df_qs = df_qs.dropna(subset=["Institution"])

# Select top 100 programs
df_top100 = df_qs.head(100)

# Select relevant columns
df_top100 = df_top100[[
    "2025", "Institution", "Country / Territory", "Score", "Academic", "Employer", "Citations", "H", "IRN"
]]

# Rename 2025 column to "Rank" for clarity
df_top100 = df_top100.rename(columns={"2025": "Rank"})

df_top100

# Clean up the rank column that has = sign prefixed and make it an integer
df_top100['Rank'] = df_top100['Rank'].str.replace('=', '')
df_top100['Rank'] = df_top100['Rank'].astype(int)

# consider only US instituions and rank by H-index (higher the H-index, better the institution) and drop the country column
df_top100_us = df_top100[df_top100['Country / Territory'] == 'United States of America']
df_top100_us = df_top100_us.drop(columns=['Country / Territory'])
df_top100_us






Unnamed: 0,Rank,Institution,Score,Academic,Employer,Citations,H,IRN
0,1,Harvard University,98.7,100.0,100.0,93.7,100.0,100.0
2,3,Johns Hopkins University,93.8,99.6,83.2,91.2,90.7,92.5
3,4,Stanford University,92.6,93.9,97.1,93.7,91.5,82.7
4,5,Massachusetts Institute of Technology (MIT),91.2,91.8,99.8,100.0,86.9,71.4
6,7,"University of California, San Francisco (UCSF)",90.2,92.7,73.1,93.5,89.7,91.4
11,12,Yale University,87.7,89.4,89.2,90.6,86.5,76.3
13,14,"University of California, Los Angeles (UCLA)",86.5,89.4,79.5,90.2,85.8,75.8
15,16,Cornell University,85.4,87.3,83.7,90.4,85.9,68.8
16,17,"University of California, San Diego (UCSD)",85.1,86.8,76.2,91.7,86.5,70.8
17,18,University of Pennsylvania,84.9,84.7,79.5,90.2,88.7,72.5
