In [1]:
import requests
from bs4 import BeautifulSoup
import json

# Function to get publication date from the article detail page
def get_publication_date(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    publication_date_element = soup.select_one("div.citation_title p.citation_publication_date")
    return publication_date_element.get_text(strip=True) if publication_date_element else "Unknown"

# Function to get authors from the article detail page
def get_authors(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    author_elements = soup.select("p.citation_author")
    authors = [author.get_text(strip=True) for author in author_elements]
    return authors if authors else ["Unknown"]

# Function to get volume and date from the article detail page
def get_volume_and_date(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    volume_element = soup.select_one("div.citation_volume")
    date_element = soup.select_one("div.citation_date")
    
    volume = volume_element.get_text(strip=True) if volume_element else "Unknown"
    date = date_element.get_text(strip=True) if date_element else "Unknown"
    
    return {
        "Volume": volume,
        "Date": date
    }

# Function to get abstract from the article detail page
def get_abstract(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    abstract_element = soup.select_one("p.abs")
    return abstract_element.get_text(strip=True) if abstract_element else "No Abstract Available"

# Function to get number of downloads from the article detail page
def get_no_of_downloads(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    download_element = soup.select_one("div.entry.entry_re > p:nth-of-type(3)")
    return download_element.get_text(strip=True) if download_element else "Not Available"

# Function to get PDF link from the article detail page
def get_pdf_link(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    pdf_link_element = soup.select_one("a.hvk_ajax")
    return pdf_link_element['href'] if pdf_link_element and 'href' in pdf_link_element.attrs else "No PDF Available"

# Function to parse articles from a volume page
def parse_articles_from_volume(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    articles = soup.select("div.col-md-12 > a")
    
    article_data = []
    
    for article in articles:
        title = article.get_text(strip=True)
        link = article.get('href')
        publication_date = get_publication_date(link)
        authors = get_authors(link)
        volume_and_date = get_volume_and_date(link)
        abstract = get_abstract(link)
        no_of_downloads = get_no_of_downloads(link)
        pdf_link = get_pdf_link(link)
        
        article_data.append({
            "Article_Title": title,
            "Article_Details_Link": link,
            "Publication_Date": publication_date,
            "Authors": authors,
            "Volume": volume_and_date["Volume"],
            "Date": volume_and_date["Date"],
            "Abstract": abstract,
            "No_of_Downloads": no_of_downloads,
            "PDF_Link": pdf_link
        })
    
    return article_data

# Main URL to scrape
main_url = "https://mjcs-ikma.com/jr-current_issue/"
response = requests.get(main_url)
soup = BeautifulSoup(response.content, 'html.parser')

# Extract current volume title dynamically
current_volume_title_element = soup.select_one("h2.section-title > span")
current_volume_title = current_volume_title_element.get_text(strip=True) if current_volume_title_element else "Current Volume"

# Parse the current volume page
current_volume_data = parse_articles_from_volume(main_url)

# Find all volume links
volume_links = soup.select("div.wp-widget-group__inner-blocks ul li a")

# List to hold parsed data
data = current_volume_data

for volume_link in volume_links:
    volume_url = volume_link.get('href')
    volume_title = volume_link.get_text(strip=True)
    
    if volume_url:
        volume_data = parse_articles_from_volume(volume_url)
        data.extend(volume_data)

# Convert to JSON and save to a file
with open('mjcs_ikma_articles.json', 'w') as f:
    json.dump(data, f, indent=4, ensure_ascii=False)

# Print the data
print(json.dumps(data, indent=4, ensure_ascii=False))