In [1]:
import os
import json
import requests
from urllib.parse import urlparse, unquote

def is_doi(url):
    """Check if a URL is a DOI."""
    parsed_url = urlparse(url)
    if 'doi.org' in parsed_url.netloc:
        return True
    return False

def get_doi_from_url(url):
    """Extract DOI from a DOI URL."""
    parsed_url = urlparse(url)
    if 'doi.org' in parsed_url.netloc:
        doi = parsed_url.path.lstrip('/')
        return unquote(doi)
    return None

def fetch_doi_metadata(doi):
    """Fetch metadata for a DOI from CrossRef API."""
    headers = {
        'Accept': 'application/vnd.citationstyles.csl+json'
    }
    url = f'https://doi.org/{doi}'
    try:
        response = requests.get(url, headers=headers)
        if response.status_code == 200:
            return response.json()
        else:
            print(f'Failed to fetch metadata for DOI {doi}: HTTP {response.status_code}')
    except requests.RequestException as e:
        print(f'Error fetching DOI metadata: {e}')
    return None

def update_json_file(file_path):
    """Update the JSON file with additional metadata for DOIs."""
    with open(file_path, 'r', encoding='utf-8') as f:
        data = json.load(f)

    subject_of = data.get('schema:subjectOf', [])
    updated = False

    for entry in subject_of:
        doi_url = entry.get('@id', '')
        if is_doi(doi_url):
            doi = get_doi_from_url(doi_url)
            if doi:
                metadata = fetch_doi_metadata(doi)
                if metadata:
                    # Update the entry with additional metadata
                    entry['schema:headline'] = metadata.get('title', '')
                    # Join authors' names
                    authors = metadata.get('author', [])
                    author_names = []
                    for author in authors:
                        given = author.get('given', '')
                        family = author.get('family', '')
                        full_name = f"{given} {family}".strip()
                        author_names.append(full_name)
                    entry['schema:author'] = ' and '.join(author_names)
                    # Add description if available
                    entry['schema:description'] = metadata.get('abstract', '')
                    # Add publication date if available
                    if 'published' in metadata and 'date-parts' in metadata['published']:
                        date_parts = metadata['published']['date-parts'][0]
                        if date_parts:
                            entry['schema:datePublished'] = date_parts[0]
                    updated = True

    if updated:
        # Write the updated data back to the file
        with open(file_path, 'w', encoding='utf-8') as f:
            json.dump(data, f, indent=4)
        print(f'Updated file: {file_path}')

def process_folder(folder_path):
    """Process all JSON files in the given folder."""
    for filename in os.listdir(folder_path):
        if filename.endswith('.json'):
            file_path = os.path.join(folder_path, filename)
            update_json_file(file_path)

if __name__ == '__main__':
    folder_path = 'BatteryTypeJson'  # Replace with your folder path
    process_folder(folder_path)


Failed to fetch metadata for DOI XFbauY8BADpm3NYud5hb: HTTP 400
Failed to fetch metadata for DOI VFajuo8BADpm3NYuJp/C: HTTP 404
Failed to fetch metadata for DOI -lYzuo8BADpm3NYu1Zo1: HTTP 400
Failed to fetch metadata for DOI bFaEuo8BADpm3NYu552a: HTTP 400
Updated file: BatteryTypeJson/A123_AMP20M1HD-A.json
Updated file: BatteryTypeJson/Kokam_SLPB120255255.json
Updated file: BatteryTypeJson/LG_Chem_E66A.json
Updated file: BatteryTypeJson/LG_Chem_E63.json
Updated file: BatteryTypeJson/Murata_US21700VTC6A.json
Updated file: BatteryTypeJson/Kokam_SLPB125255255H.json
Failed to fetch metadata for DOI hFanuo8BADpm3NYuYJ/i: HTTP 404
Updated file: BatteryTypeJson/Thunder-Sky_WB-LYP40AHA.json
Updated file: BatteryTypeJson/LG_Chem_INR18650_MJ1.json
Updated file: BatteryTypeJson/Quallion_LLC_QL015KA.json
Updated file: BatteryTypeJson/Kokam_SLPB50106100.json
Updated file: BatteryTypeJson/Lishen_LR2170SA.json
Updated file: BatteryTypeJson/Saft_MP176065.json
Updated file: BatteryTypeJson/Kokam_SLPB78