In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
import time


In [8]:
# # Setup Chrome driver

# chrome_options = webdriver.ChromeOptions()
# chrome_options.add_argument('--headless')
# chrome_options.add_argument('--no-sandbox')
# chrome_options.add_argument('--disable-dev-shm-usage')
# driver = webdriver.Chrome(options=chrome_options)

: 

In [3]:

def get_author_profile(researcher_id):
    # Setup Chrome driver

    chrome_options = webdriver.ChromeOptions()
    chrome_options.add_argument('--headless')
    chrome_options.add_argument('--no-sandbox')
    chrome_options.add_argument('--disable-dev-shm-usage')
    driver = webdriver.Chrome(options=chrome_options)
    
    try:
        # Navigate to the SINTA author profile page
        url = f"https://sinta.kemdikbud.go.id/authors/profile/{researcher_id}/"
        driver.get(url)
        
        # Wait for the page to load
        WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.TAG_NAME, 'body')))
        
        # Extract the page source and parse with BeautifulSoup
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        
        author_name = extract_author_name(soup)
        print(f"Author Name: {author_name}")
        
        affiliation = extract_affiliation(soup)
        print(f"Affiliation: {affiliation}")
        
        sinta_score = extract_sinta_score(soup)
        if sinta_score:
            print(f"SINTA Score Overall: {sinta_score}")
        
        subject_list = extract_subjects(soup)
        print(f"Subjects: {', '.join(subject_list)}")
        
        total_pages = extract_total_pages(soup)
        if total_pages:
            print(f"Total Pages: {total_pages}")
            get_all_scopus_documents(driver, researcher_id, total_pages)
    finally:
        # Close the browser
        driver.quit()


In [4]:

def extract_author_name(soup):
    return soup.find('h3').get_text(strip=True)

def extract_affiliation(soup):
    return soup.find('a', href=lambda x: x and 'affiliations' in x).get_text(strip=True)

def extract_sinta_score(soup):
    sinta_score_div = soup.find('div', class_='pr-num')
    if sinta_score_div:
        return sinta_score_div.get_text(strip=True)
    return None

def extract_subjects(soup):
    subject_list = []
    subjects = soup.find_all('li', class_='subject-list')
    for subject in subjects:
        subject_list.append(subject.get_text(strip=True))
    return subject_list

def extract_total_pages(soup):
    pagination = soup.find('ul', class_='pagination justify-content-center')
    if pagination:
        pagination_links = pagination.find_all('li', class_='page-item')
        if len(pagination_links) > 1:
            last_page_link = pagination_links[-2].find('a', class_='page-link')
            if last_page_link:
                return int(last_page_link.get_text(strip=True))
    return 1

def get_all_scopus_documents(driver, researcher_id, total_pages):
    publications = []
    for page_num in range(1, total_pages + 1):
        page_url = f"https://sinta.kemdikbud.go.id/authors/profile/{researcher_id}/?page={page_num}&view=scopus"
        driver.get(page_url)
        
        # Wait for the page to load
        WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.TAG_NAME, 'body')))
        
        # Extract the page source and parse with BeautifulSoup
        page_soup = BeautifulSoup(driver.page_source, 'html.parser')
        publications.extend(extract_publications(page_soup))
    
    for idx, title in enumerate(publications, 1):
        print(f"Publication {idx}: {title}")

def extract_publications(soup):
    publications = soup.find_all('div', class_='ar-title')
    return [publication.get_text(strip=True) for publication in publications]


In [5]:

# Call the function with the researcher_id
researcher_id = "5986966"
get_author_profile(researcher_id)


Author Name: AJI SURAJI
Affiliation: Affiliations
SINTA Score Overall: 2.326
Subjects: 
Total Pages: 1
Publication 1: Effects of overloading commercial vehicles on road design life
Publication 2: Identification of Road Surface Defects Using Multiclass Support Vector Machine
Publication 3: Moving Car Observation (MCO) for Road Surface Defect Identification Using GPS Video
Publication 4: Smart Route Choice Based on Google Maps Application in Urban Road Network
Publication 5: Correlation of Expansive Soil and Road Pavement Conditions Using Data Mining from GIS Portal
Publication 6: Application of the Synthetic Over-Sampling Method to Increase the Sensitivity of Algorithm Classification for Class Imbalance in Small Spatial Datasets
Publication 7: Empirical Study of a Spatial Analysis for Prone Road Traffic Accident Classification based on MCDM Method
Publication 8: ACCIDENT RISK ANALYSIS OF ROAD GEOMETRIC COMPONENTS USING FUNCTIONAL WORTHINESS APPROACH
Publication 9: ANALYSIS OF INTERCITY 

In [6]:
import requests
from bs4 import BeautifulSoup

# URL profil penulis Sinta
url = "https://sinta.kemdikbud.go.id/authors/profile/5986966/"

try:
    # Mengirim permintaan ke server
    response = requests.get(url)
    response.raise_for_status()  # Memastikan respons yang sukses (status 200)
    
    # Mem-parsing konten HTML dengan BeautifulSoup
    soup = BeautifulSoup(response.text, "html.parser")
    
    # Memeriksa elemen pagination
    pagination = soup.find("ul", class_="pagination justify-content-center")
    
    if pagination:
        print("Halaman ini memiliki elemen pagination.")
        
        # Menghitung jumlah halaman dari elemen pagination
        pages = pagination.find_all("li", class_="page-item")
        total_pages = sum(1 for page in pages if page.text.isdigit())
        print(f"Total halaman publikasi: {total_pages}")
    else:
        print("Halaman ini tidak memiliki elemen pagination.")
        
except requests.exceptions.RequestException as e:
    print(f"Gagal mengakses halaman: {e}")


Halaman ini tidak memiliki elemen pagination.
