In [2]:
import re
import requests
import xml.etree.ElementTree as ET

# --- Sample Input References ---
input_references = """[1]	R. L. Siegel, K. D. Miller, N. S. Wagle, and A. Jemal, “Cancer statistics, 2023,” CA. Cancer J. Clin., vol. 73, no. 1, pp. 17–48, Jan. 2023, doi: 10.3322/caac.21763.
[2]	K. V. Sriram and R. H. Havaldar, “Analytical review and study on object detection techniques in the image,” Int. J. Model. Simul. Sci. Comput., vol. 12, no. 05, p. 2150031, Oct. 2021, doi: 10.1142/S1793962321500318.
[3]	L. Fan, H. Zhao, H. Zhao, H. Hu, and Z. Wang, “Survey of target detection based on deep convolutional neural networks,” Opt. Precis. Eng., vol. 28, no. 5, pp. 1152–1164, 2020, doi: 10.3788/ope.20202805.1152.
[4]	P. Viola and M. Jones, “Robust real-time face detection,” in Proceedings Eighth IEEE International Conference on Computer Vision. ICCV 2001, Vancouver, BC, Canada: IEEE Comput. Soc, 2001, pp. 747–747, doi: 10.1109/ICCV.2001.937709.
[5]	N. Dalal and B. Triggs, “Histograms of Oriented Gradients for Human Detection,” in 2005 IEEE Computer Society Conference on Computer Vision and Pattern Recognition (CVPR’05), San Diego, CA, USA: IEEE, 2005, pp. 886–893, doi: 10.1109/CVPR.2005.177.
[6]	M. Hussain, “YOLO-v1 to YOLO-v8, the Rise of YOLO and Its Complementary Nature toward Digital Manufacturing and Industrial Defect Detection,” Machines, vol. 11, no. 7, p. 677, Jun. 2023, doi: 10.3390/machines11070677.
[7]	G. Sharma, R. Dave, J. Sanadya, P. Sharma, and K. K. Sharma, “Various types and management of breast cancer: An overview,” J. Adv. Pharm. Technol. Res., vol. 1, no. 2, p. 109, 2010, doi: 10.4103/2231-4040.72251.
[8]	C. Li et al., “YOLOv6: A Single-Stage Object Detection Framework for Industrial Applications,” Sep. 07, 2022, arXiv: arXiv:2209.02976, doi: 10.48550/arXiv.2209.02976.
[9]	S. Zahia, D. Sierra-Sosa, B. Garcia-Zapirain, and A. Elmaghraby, “Tissue classification and segmentation of pressure injuries using convolutional neural networks,” Comput. Methods Programs Biomed., vol. 159, pp. 51–58, Jun. 2018, doi: 10.1016/j.cmpb.2018.02.018.
[10]	X. Sun, X. Wang, J. Liu, and H. Huang, “Classic YOLO Series Target Detection Algorithms and Their Application in Breast Cancer Detection,” J. Comput. Syst. Appl., vol. 32, no. 12, pp. 52–62, 2023, doi: 10.15888/j.cnki.csa.009351.
[11]	F. Prinzi, M. Insalaco, A. Orlando, S. Gaglio, and S. Vitabile, “A Yolo-Based Model for Breast Cancer Detection in Mammograms,” Cogn. Comput., vol. 16, no. 1, pp. 107–120, Jan. 2024, doi: 10.1007/s12559-023-10189-6.
[12]	P. K. Samanta, A. Basuli, N. K. Rout, and G. Panda, “Improved Breast Cancer Detection from Ultrasound Images Using YOLOv8 Model,” in 2023 IEEE 3rd International Conference on Applied Electromagnetics, Signal Processing, & Communication (AESPC), Bhubaneswar, India: IEEE, Nov. 2023, pp. 1–6, doi: 10.1109/AESPC59761.2023.10390341.
[13]	H. Gui et al., “FS-YOLOv9: A Frequency and Spatial Feature-Based YOLOv9 for Real-time Breast Cancer Detection,” Acad. Radiol., Oct. 2024, doi: 10.1016/j.acra.2024.09.048.
[14]	L. Zheng et al., “Judging LLM-as-a-Judge with MT-Bench and Chatbot Arena,” Dec. 24, 2023, arXiv: arXiv:2306.05685, doi: 10.48550/arXiv.2306.05685.
[15]	A. Y. Yuan et al., “Hybrid deep learning network for vascular segmentation in photoacoustic imaging,” Biomed. Opt. Express, vol. 11, no. 11, p. 6445, Nov. 2020, doi: 10.1364/BOE.409246.
[16]	W. Al-Dhabyani, M. Gomaa, H. Khaled, and A. Fahmy, “Dataset of breast ultrasound images,” Data Brief, vol. 28, p. 104863, Feb. 2020, doi: 10.1016/j.dib.2019.104863.
"""


# --- Function to Extract DOI from a Reference ---
def extract_doi(reference):
    doi_match = re.search(r"doi:\s*(\S+)", reference, re.IGNORECASE)
    if doi_match:
        doi = doi_match.group(1)
        return doi.rstrip(".,;")
    return None

# --- Function to Query CrossRef API Using DOI ---
def get_crossref_details(doi):
    url = f"https://api.crossref.org/works/{doi}"
    try:
        response = requests.get(url)
        if response.status_code == 200:
            return response.json().get("message", {})
        else:
            print(f"Error: DOI {doi} returned status code {response.status_code}")
            return {}
    except Exception as e:
        print(f"Exception while querying DOI {doi}: {e}")
        return {}

# --- Function to Query arXiv API Using arXiv ID ---
def get_arxiv_details(doi):
    try:
        doi_lower = doi.lower()
        if "arxiv." in doi_lower:
            arxiv_id = doi_lower.split("arxiv.")[-1]
            url = f"http://export.arxiv.org/api/query?search_query=id:{arxiv_id}"
            response = requests.get(url)
            if response.status_code == 200:
                root = ET.fromstring(response.content)
                ns = {"atom": "http://www.w3.org/2005/Atom"}
                entry = root.find("atom:entry", ns)
                if entry is None:
                    print(f"No entry found for arXiv ID: {arxiv_id}")
                    return {}
                title = entry.find("atom:title", ns).text.strip() if entry.find("atom:title", ns) is not None else "No title found"
                authors = entry.findall("atom:author", ns)
                author_list = []
                for a in authors:
                    name = a.find("atom:name", ns).text.strip() if a.find("atom:name", ns) is not None else ""
                    author_list.append({"given": "", "family": name})
                published = entry.find("atom:published", ns).text if entry.find("atom:published", ns) is not None else "Not available"
                year = published.split("-")[0] if published != "Not available" else "Not available"
                return {
                    "title": [title],
                    "author": author_list,
                    "published-online": {"date-parts": [[year]]},
                    "container-title": ["arXiv"],
                    "page": "N/A"
                }
            else:
                print(f"Error: arXiv API for DOI {doi} returned status code {response.status_code}")
                return {}
        else:
            return {}
    except Exception as e:
        print(f"Exception while querying arXiv for DOI {doi}: {e}")
        return {}

# --- Function to Determine DOI Type and Query the Appropriate API ---
def get_doi_details(doi):
    if "arxiv" in doi.lower():
        details = get_arxiv_details(doi)
        if details:
            return details
    return get_crossref_details(doi)

# --- Helper Function to Extract Publication Year ---
def get_pub_year(details):
    for key in ["published-print", "published-online", "issued", "created"]:
        if key in details:
            date_parts = details[key].get("date-parts", [[]])
            if date_parts and date_parts[0] and date_parts[0][0]:
                return date_parts[0][0]
    return "Not available"

# --- Formatting Functions ---

def print_journal_details(doi, details):
    print(f"\n--- Journal Article Details for DOI: {doi} ---")
    title = details.get("title", ["No title found"])[0]
    print("Title:", title)
    authors = details.get("author", [])
    if authors:
        author_names = [f"{a.get('given', '').strip()} {a.get('family', '').strip()}" for a in authors]
        if len(author_names) >= 4:
            print("Authors:", f"{author_names[0]} et al. (use 'et al.' for 4 or more authors)")
        else:
            print("Authors:", ", ".join(author_names))
    else:
        print("Authors: Not available")
    pub_year = get_pub_year(details)
    print("Publication Year:", pub_year)
    container_title = details.get("container-title", ["Not available"])[0]
    print("Source (Journal):", container_title)
    print("Volume:", details.get("volume", "Not available"))
    print("Issue:", details.get("issue", "Not available"))
    # If "article-number" exists, use that; otherwise, use "page"
    if "article-number" in details:
        print("Article No:", details.get("article-number", "Not available"))
    else:
        print("Page:", details.get("page", "Not available"))
    print("DOI:", doi)
    print("Publisher:", details.get("publisher", "Not available"))
    print("------------------------------------------------------")

def print_conference_details(doi, details):
    print(f"\n--- Conference Paper Details for DOI: {doi} ---")
    title = details.get("title", ["No title found"])[0]
    print("Title:", title)
    authors = details.get("author", [])
    if authors:
        author_names = [f"{a.get('given', '').strip()} {a.get('family', '').strip()}" for a in authors]
        if len(author_names) >= 4:
            print("Authors:", f"{author_names[0]} et al. (use 'et al.' for 4 or more authors)")
        else:
            print("Authors:", ", ".join(author_names))
    else:
        print("Authors: Not available")
    pub_year = get_pub_year(details)
    print("Publication Year:", pub_year)
    container_title = details.get("container-title", ["Not available"])[0]
    print("Source (Proceedings):", container_title)
    print("Page:", details.get("page", "Not available"))
    event = details.get("event", {})
    print("Event Name:", event.get("name", "Not available"))
    print("Event Location:", event.get("location", "Not available"))
    print("DOI:", doi)
    print("Publisher:", details.get("publisher", "Not available"))
    print("------------------------------------------------------")

def print_arxiv_details(doi, details):
    print(f"\n--- arXiv Preprint Details for DOI: {doi} ---")
    title = details.get("title", ["No title found"])[0]
    print("Title:", title)
    authors = details.get("author", [])
    if authors:
        author_names = [f"{a.get('given', '').strip()} {a.get('family', '').strip()}" for a in authors]
        if len(author_names) >= 4:
            print("Authors:", f"{author_names[0]} et al. (use 'et al.' for 4 or more authors)")
        else:
            print("Authors:", ", ".join(author_names))
    else:
        print("Authors: Not available")
    pub_year = get_pub_year(details)
    print("Publication Year:", pub_year)
    container_title = details.get("container-title", ["Not available"])[0]
    print("Source:", container_title)
    print("Page:", details.get("page", "Not available"))
    print("DOI:", doi)
    print("------------------------------------------------------")

# --- Main Processing ---
references_list = [ref.strip() for ref in input_references.strip().split("\n") if ref.strip()]

for ref in references_list:
    doi = extract_doi(ref)
    if doi:
        details = get_doi_details(doi)
        if details:
            doi_type = details.get("type", "").lower()
            if doi_type == "proceedings-article":
                print_conference_details(doi, details)
            elif doi_type == "journal-article":
                print_journal_details(doi, details)
            elif "arxiv" in doi.lower() or "arxiv" in details.get("container-title", [""])[0].lower():
                print_arxiv_details(doi, details)
            else:
                # Default to journal details if type is unknown
                print_journal_details(doi, details)
        else:
            print(f"Details not found for DOI: {doi}")
    else:
        print("No DOI found in reference:")
        print(ref)



--- Journal Article Details for DOI: 10.3322/caac.21763 ---
Title: Cancer statistics, 2023
Authors: Rebecca L. Siegel et al. (use 'et al.' for 4 or more authors)
Publication Year: 2023
Source (Journal): CA: A Cancer Journal for Clinicians
Volume: 73
Issue: 1
Page: 17-48
DOI: 10.3322/caac.21763
Publisher: Wiley
------------------------------------------------------

--- Journal Article Details for DOI: 10.1142/S1793962321500318 ---
Title: Analytical review and study on object detection techniques in the image
Authors: K. V. Sriram, R. H. Havaldar
Publication Year: 2021
Source (Journal): International Journal of Modeling, Simulation, and Scientific Computing
Volume: 12
Issue: 05
Page: 2150031
DOI: 10.1142/S1793962321500318
Publisher: World Scientific Pub Co Pte Lt
------------------------------------------------------
Error: DOI 10.3788/ope.20202805.1152 returned status code 404
Details not found for DOI: 10.3788/ope.20202805.1152

--- Conference Paper Details for DOI: 10.1109/ICCV.2001