In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import requests
from bs4 import BeautifulSoup
import time
import csv
import json
from datetime import datetime

In [2]:
# Define the API endpoint and query
query = "University of Michigan"
url = f"https://api.semanticscholar.org/graph/v1/author/search?query={query}&limit=10"

# Send a GET request to the API
response = requests.get(url)

# Check the status code of the response
if response.status_code == 200:
    # If the response is successful, parse the JSON data
    data = response.json()
    
    # Print the list of authors retrieved
    for author in data['data']:
        print(f"Author Name: {author['name']}")
else:
    print(f"Failed to retrieve data: Status code {response.status_code}")


Author Name: U. Michigan
Author Name: M. N. U. O. Michigan
Author Name: D. M. U. O. Michigan
Author Name: F. S. U. O. Michigan
Author Name: T. U. O. Michigan
Author Name: S. U. O. Michigan
Author Name: A. B. U. O. Michigan
Author Name: Univ. of Michigan in Ann Arbor
Author Name: Y. Michigan
Author Name: J. W. U. O. Michigan


In [3]:
# Function to clean the faculty name
def clean_name(faculty_name):
    return faculty_name.split(",")[0].strip()

# Function to search for a faculty member by name, with retry logic
def search_author_by_name(faculty_name, retries=3):
    cleaned_name = clean_name(faculty_name)
    url = f"https://api.semanticscholar.org/graph/v1/author/search?query={cleaned_name}&limit=1"
    
    for attempt in range(retries):
        response = requests.get(url)
        
        if response.status_code == 200:
            data = response.json()
            if data['data']:
                author = data['data'][0]
                author_id = author['authorId']
                print(f"Author ID for {cleaned_name}: {author_id}")
                return author_id
            else:
                print(f"No author found for {cleaned_name}")
                return None
        elif response.status_code == 429:
            print("Rate limit hit. Waiting before retrying...")
            time.sleep(5 * (attempt + 1))  # Exponential backoff: wait longer with each retry
        else:
            print(f"Failed to retrieve data: Status code {response.status_code}")
            return None

    print(f"Failed to retrieve data after {retries} attempts")
    return None

def get_recent_publications(author_id, retries=3):
    current_year = datetime.now().year
    url = f"https://api.semanticscholar.org/graph/v1/author/{author_id}/papers?fields=title,year,venue"
    
    for attempt in range(retries):
        response = requests.get(url)
        
        if response.status_code == 200:
            data = response.json()
            # Filter publications within the last 5 years, handling None cases for 'year'
            recent_publications = [
                {
                    'title': paper['title'],
                    'year': paper['year'],
                    'venue': paper.get('venue', 'N/A')
                }
                for paper in data['data']
                if paper['year'] is not None and paper['year'] >= current_year - 5
            ]
            return recent_publications
        elif response.status_code == 429:
            print("Rate limit hit while retrieving publications. Waiting before retrying...")
            time.sleep(5 * (attempt + 1))  # Exponential backoff
        else:
            print(f"Failed to retrieve publications for author ID {author_id}: Status code {response.status_code}")
            return []

    print(f"Failed to retrieve publications after {retries} attempts for author ID {author_id}")
    return []

# Main function to process the CSV file and get recent publications
def get_publications_from_csv(input_csv, output_csv):
    with open(input_csv, mode='r') as file:
        reader = csv.reader(file)
        next(reader)  # Skip header row if present
        
        all_publications = []
        for row in reader:
            faculty_name = row[0]
            author_id = search_author_by_name(faculty_name)
            if author_id:
                publications = get_recent_publications(author_id)
                for pub in publications:
                    all_publications.append({
                        'Name': faculty_name,
                        'Author ID': author_id,
                        'Title': pub['title'],
                        'Year': pub['year'],
                        'Venue': pub['venue']
                    })
            # Delay added to avoid rapid-fire requests
            time.sleep(1)

    # Write all publications to a new CSV file
    with open(output_csv, mode='w', newline='') as file:
        writer = csv.DictWriter(file, fieldnames=['Name', 'Author ID', 'Title', 'Year', 'Venue'])
        writer.writeheader()
        writer.writerows(all_publications)
        
    print(f"Publications have been written to {output_csv}")

# Get recent publications for the author IDs in the CSV file
input_csv = 'faculty_author_ids.csv'  # Input CSV with a list of names
output_csv = 'recent_publications.csv'  # Output CSV to save names, IDs, and publications
get_publications_from_csv(input_csv, output_csv)


Author ID for Phillip E. Rodgers: 27439187
Author ID for Emad R.  Abou-Arab: 2296421051
Author ID for Mercy A.  Adetoye: 80060016
Author ID for James E. Aikens: 6953599
Author ID for Barbara S. Apgar: 2894374
Author ID for Ricardo R. Bartelme: 87714911
No author found for Benjamin D. Beduhn
Author ID for Aleksandr Belakovskiy: 40898100
Author ID for Catherine M. Bettcher: 12759753
No author found for Anup A.  Bhandiwad
Author ID for Neha V. Bhave: 77003078
No author found for Julie A.  Blaszczak
Author ID for Lorraine Buis: 2252596
Author ID for Juana Nicoll Capizzano: 2106928266
Author ID for Kristine L. Cece: 1581956094
Author ID for P. Paul Chandanabhumma: 51199344
Author ID for Tammy Chang: 2251275444
Author ID for Jane E. Chargot: 83197460
No author found for William E. Chavey II
Author ID for Christina W. Chiang: 31806364
Author ID for Christine T. Cigolle: 5884039
Author ID for James M. Cooke: 144154830
No author found for Colleen O. Cooper
No author found for Laura M. Crespo Al

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=3ed45b78-b7a2-422e-8516-f8c1d435ecb0' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>