In [8]:
import requests
import pandas as pd
import csv
import time
import sys

# Function to fetch metadata from CrossRef by title
def get_metadata_from_title_crossref(title):
    url = f"https://api.crossref.org/works?query.title={title}"
    try:
        print(f"Fetching metadata for title: {title}", flush=True)
        response = requests.get(url, timeout=10)  # Add timeout to avoid long delays
        if response.status_code == 200 and response.json()['message']['items']:
            paper = response.json()['message']['items'][0]
            # Safely handle cases where 'family' key might be missing
            authors = ', '.join([author.get('family', 'Unknown') for author in paper.get('author', [])])
            doi = paper.get('DOI', 'N/A')  # Fetch DOI from the response
            return {
                'title': paper['title'][0],
                'journal': paper.get('container-title', ['N/A'])[0],
                'authors': authors,
                'year': paper.get('published-print', {}).get('date-parts', [[None]])[0][0],
                'url': paper.get('URL', 'N/A'),
                'doi': doi  # Include DOI in the metadata
            }
        else:
            print(f"Warning: No items found for title '{title}'", flush=True)
    except requests.exceptions.Timeout:
        print(f"Request for title '{title}' timed out.", flush=True)
    except Exception as e:
        print(f"Error occurred for title '{title}': {e}", flush=True)
    return None

# Function to save metadata to a CSV file
def save_metadata_to_csv(metadata_list, filename="crossref_metadata_output.csv"):
    # Ensure that the fieldnames match the keys in the metadata dictionary
    fieldnames = ['title', 'journal', 'authors', 'year', 'url', 'doi']
    
    with open(filename, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.DictWriter(file, fieldnames=fieldnames)
        writer.writeheader()
        for metadata in metadata_list:
            writer.writerow(metadata)

# Function to read titles from a CSV file
def read_titles_from_csv(filename):
    # Read the CSV file using pandas
    df = pd.read_excel(filename, engine='odf')  # If your file is .ods (adjust engine for .csv)
    
    # Assuming the titles are in the 'Title' column, adjust if needed
    titles = df['Title'].tolist()
    
    return titles

# Path to the CSV/ODS file containing titles
csv_file_path = 'Book1.ods'  # Replace with your actual file path

# Read titles from the provided CSV file
titles = read_titles_from_csv(csv_file_path)

# Fetch metadata for all titles
metadata_list = []
total_titles = len(titles)

for index, title in enumerate(titles, 1):
    print(f"Processing {index}/{total_titles}: {title}", flush=True)
    metadata = get_metadata_from_title_crossref(title)
    if metadata:
        metadata_list.append(metadata)
    time.sleep(0.5)  # Optional delay, adjust if necessary

# Save metadata to a CSV file
output_file_path = 'crossref_metadata_output.csv'
save_metadata_to_csv(metadata_list, filename=output_file_path)

print(f"Metadata has been saved to {output_file_path}", flush=True)


Processing 1/1072: 3D printing for cultural heritage: Preservation, accessibility, research and education
Fetching metadata for title: 3D printing for cultural heritage: Preservation, accessibility, research and education
Processing 2/1072: Design for assistive technology oriented to design methodology: a systematic review on user-centered design and 3D printing approaches
Fetching metadata for title: Design for assistive technology oriented to design methodology: a systematic review on user-centered design and 3D printing approaches
Processing 3/1072: Uncovering challenges and opportunities for 3D printing assistive technology with physical therapists
Fetching metadata for title: Uncovering challenges and opportunities for 3D printing assistive technology with physical therapists
Processing 4/1072: Coming to grips: 3D printing for accessibility
Fetching metadata for title: Coming to grips: 3D printing for accessibility
Processing 5/1072: Interdisciplinary Contributions in the Design o