<a href="https://colab.research.google.com/github/nunoandrade80-cmd/Citation-counter/blob/main/Citation_counter.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import requests
import time
import pandas as pd
from google.colab import files

print("Please upload your text file containing PMIDs (either comma-separated or one per line).")

# 1. Prompt user to upload file
uploaded = files.upload()

# Assuming a single file is uploaded, get its content
pmid_content = None
file_name = None
for fn in uploaded.keys():
    file_name = fn
    pmid_content = uploaded[fn].decode('utf-8')
    print(f"User uploaded file: '{file_name}'")

if pmid_content is None:
    print("No file was uploaded. Exiting.")
else:
    # 2. Parse PMIDs from the uploaded file content
    if ',' in pmid_content:
        pmids_list = [pmid.strip() for pmid in pmid_content.split(',') if pmid.strip()]
    else:
        pmids_list = [pmid.strip() for pmid in pmid_content.split('\n') if pmid.strip()]

    print(f"Total PMIDs parsed: {len(pmids_list)}")
    if len(pmids_list) > 5:
        print(f"First 5 parsed PMIDs: {pmids_list[:5]}")
    else:
        print(f"Parsed PMIDs: {pmids_list}")

    # 3. Define API URL and initialize data storage
    EUROPE_PMC_API_BASE_URL = "https://www.ebi.ac.uk/europepmc/webservices/rest/search?query=EXT_ID:{}&resulttype=core&format=json"
    citation_data = []

    # 4. Fetch citation counts for each PMID
    for pmid in pmids_list:
        api_url = EUROPE_PMC_API_BASE_URL.format(pmid)
        try:
            response = requests.get(api_url)
            response.raise_for_status()  # Raise an exception for HTTP errors (4xx or 5xx)
            data = response.json()

            citation_count = None
            if 'resultList' in data and 'result' in data['resultList'] and len(data['resultList']['result']) > 0:
                citation_count = data['resultList']['result'][0].get('citedByCount')

            citation_data.append({'PMID': pmid, 'Citation Count': citation_count if citation_count is not None else 'N/A'})
            print(f"Fetched citation count for PMID {pmid}: {citation_count if citation_count is not None else 'N/A'}")

        except requests.exceptions.RequestException as e:
            print(f"Error fetching data for PMID {pmid}: {e}")
            citation_data.append({'PMID': pmid, 'Citation Count': 'Error'})
        except (KeyError, IndexError) as e:
            print(f"Error parsing JSON for PMID {pmid}: {e}")
            citation_data.append({'PMID': pmid, 'Citation Count': 'Parsing Error'})

        time.sleep(0.2)  # Introduce a small delay to avoid rate limiting

    print("\nFinished fetching all citation counts.")

    # 5. Create DataFrame and save to Excel
    df_citations = pd.DataFrame(citation_data)
    output_filename = 'citations.xlsx'
    df_citations.to_excel(output_filename, index=False)

    print(f"\nDataFrame created (first 5 rows):\n{df_citations.head()}")
    print(f"Citation data saved to '{output_filename}'.")

    # 6. Download the Excel file
    files.download(output_filename)
    print(f"'{output_filename}' has been downloaded.")