In [2]:
import pandas as pd
import json
from os import walk

In [3]:
def process_json_to_df(file_path):
    try:
        # Read JSON file
        with open(file_path, 'r') as file:
            data = json.load(file)
        
        abstracts_data = data.get('abstracts-retrieval-response', {})
        coredata = abstracts_data.get('coredata', {})
        
        # Helper function to safely get joined values
        def safe_join(data_dict, key_path, subkey=None):
            try:
                if not data_dict.get(key_path):
                    return None
                if subkey:
                    return ", ".join([item.get(subkey, '') for item in data_dict[key_path]]) or None
                return ", ".join(data_dict[key_path]) or None
            except Exception:
                return None

        # Helper function to safely get author names
        def get_author_names():
            try:
                authors = abstracts_data.get('authors', {}).get('author', [])
                if not authors:
                    return None
                names = []
                for author in authors:
                    preferred_name = author.get('preferred-name', {})
                    if preferred_name and 'ce:indexed-name' in preferred_name:
                        names.append(preferred_name['ce:indexed-name'])
                return ", ".join(names) if names else None
            except Exception:
                return None

        # Helper function to safely get keywords
        def get_keywords():
            try:
                keywords = abstracts_data.get('authkeywords', {}).get('author-keyword', [])
                if not keywords:
                    return None
                keyword_list = [key.get('$', '') for key in keywords]
                return ", ".join(keyword_list) if any(keyword_list) else None
            except Exception:
                return None

        # Helper function to safely get subjects
        def get_subjects():
            try:
                subjects = abstracts_data.get('subject-areas', {}).get('subject-area', [])
                if not subjects:
                    return None
                subject_list = [subject.get('@code', '') for subject in subjects]
                return ", ".join(subject_list) if any(subject_list) else None
            except Exception:
                return None
        
        # Create a dictionary with the data
        paper_data = {
            'Title': coredata.get('dc:title') or None,
            'Publisher': coredata.get('dc:publisher') or None,
            'Publish_date': coredata.get('prism:coverDate') or None,
            'Authors': get_author_names(),
            'Keywords': get_keywords(),
            'Subjects': get_subjects()
        }
        
        return paper_data
    
    except Exception as e:
        print(f"Error processing file {file_path}: {str(e)}")
        # Return empty dictionary with None values
        return {
            'Title': None,
            'Publisher': None,
            'Publish_date': None,
            'Authors': None,
            'Keywords': None,
            'Subjects': None
        }

# Rest of your code remains the same
base_path = r'C:\Users\USER\Desktop\DSDE_project\Project\2019\2019'
all_data = []
f = []
for (dirpath, dirnames, filenames) in walk(r'C:\Users\USER\Desktop\DSDE_project\Project\2019'):
    f.extend(filenames)
    break
for i in range(len(f)):
    file_number = str(i).zfill(5)
    file_path = base_path + file_number
    
    if os.path.exists(file_path):
        paper_data = process_json_to_df(file_path)
        all_data.append(paper_data)
    
    if i % 100 == 0:
        print(f"Processed {i} files...")

df = pd.DataFrame(all_data)

NameError: name 'os' is not defined

In [None]:
df

Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6
0,Identifying nursing research priorities in Vie...,Sciendomacvetrev@fvm.ukim.edu.mk,2019-12-31,"Aungsuroch Y., Nhu X.H.T., Linh T.T.K., Polsoo...","Delphi method, nursing, research priorities, V...","2900, 3304"
1,Evaluation of the sciatic nerve location regar...,Via Medica,2019-12-30,"Wan-Ae-Loh P., Huanmanop T., Agthong S., Chent...","Greater trochanter, Ischial tuberosity, Locali...","2702, 2722"
2,Breast conserving surgery versus modified radi...,Sciendomacvetrev@fvm.ukim.edu.mk,2019-12-30,Biomedicine A.,,1300
3,Clinical outcomes and surgical preferences for...,Sciendomacvetrev@fvm.ukim.edu.mk,2019-12-30,"Vongsaisuwon M., Pongpirul K., Chatamara K.","breast-conserving surgery, mastectomy, retrosp...",1300
4,Common orthopedic problems in the neonate: A c...,Sciendomacvetrev@fvm.ukim.edu.mk,2019-12-30,"Sirisabya A., Tooptakong T., Limpaphayom N.","foot deformities, fracture, hip dysplasia, inc...",1300
...,...,...,...,...,...,...
3077,Forecasting equilibrium quantity and price on ...,Kasetsart University Research and Development ...,2019-01-01,"Arunwarakorn S., Suthiwartnarueput K., Porncha...","Equilibrium price, Equilibrium quantity, Natur...",3300
3078,Top income shares and inequality: Evidences fr...,Kasetsart University Research and Development ...,2019-01-01,Vanitcharearnthum V.,"Gini coefficient, Income inequality, Pareto di...",3300
3079,Using the contemplative teaching method to enh...,Kasetsart University Research and Development ...,2019-01-01,Khayankij S.,"Aesthetic experience, awareness, Contemplative...",3300
3080,Looks can be deceiving: Body image dissatisfac...,De Gruyterpeter.golla@degruyter.com,2019-01-01,"Pawijit Y., Likhitsuwan W., Ludington J., Pisi...","Anxiety, Body image, Late adolescence, Thailand","2735, 2739"


In [None]:
df.isnull().sum()

Title             1
Publisher         1
Publish_date      0
Authors           0
Keywords        602
Subjects          0
dtype: int64

In [None]:
import requests

# Define the API URL and the DOI
url = 'https://api.elsevier.com/content/search/affiliation?query=af-id(60090656)'

# Define your API key (replace with your actual key)
api_key = 'your_api_key_here'

# Define the headers with the API key
headers = {
    'X-ELS-APIKey': 'faa5274eb341aee1f3c7877160d899de',
    'X-ELS-Insttoken': '2a99851e7ea868948a570d488a0ab64b'
}

# Send the request to the API
response = requests.get(url, headers=headers)

# Check if the request was successful
if response.status_code == 200:
    # Print the response data in JSON format
    print(response.json())
else:
    # Print error message
    print(f"Error {response.status_code}: {response.text}")


{'search-results': {'opensearch:totalResults': '1', 'opensearch:startIndex': '0', 'opensearch:itemsPerPage': '1', 'opensearch:Query': {'@role': 'request', '@searchTerms': 'af-id(60090656)', '@startPage': '0'}, 'link': [{'@_fa': 'true', '@ref': 'self', '@href': 'https://api.elsevier.com/content/search/affiliation?start=0&count=25&query=af-id%2860090656%29', '@type': 'application/json'}, {'@_fa': 'true', '@ref': 'first', '@href': 'https://api.elsevier.com/content/search/affiliation?start=0&count=25&query=af-id%2860090656%29', '@type': 'application/json'}], 'entry': [{'@_fa': 'true', 'link': [{'@_fa': 'true', '@ref': 'self', '@href': 'https://api.elsevier.com/content/affiliation/affiliation_id/60090656'}, {'@_fa': 'true', '@ref': 'search', '@href': 'https://api.elsevier.com/content/search/scopus?query=af-id%2860090656%29'}, {'@_fa': 'true', '@ref': 'scopus-affiliation', '@href': 'https://www.scopus.com/affil/profile.uri?afid=60090656&partnerID=HzOxMe3b&origin=inward'}], 'prism:url': 'http

In [None]:
import pybliometrics

pybliometrics.scopus.init()

from pybliometrics.scopus import AbstractRetrieval

x = AbstractRetrieval('10.1007/978-3-319-98485-8_15')
print(x.title)

Public health and international epidemiology for radiology
