In [1]:
!pip install requests pandas




In [2]:
import requests
import pandas as pd


In [3]:
def fetch_articles(query):
    base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
    search_url = f"{base_url}esearch.fcgi?db=pubmed&term={query}&retmode=json"
    response = requests.get(search_url)
    if response.status_code == 200:
        data = response.json()
        return data['esearchresult']['idlist']
    else:
        print("Error fetching articles:", response.status_code)
        return []

# Example usage
query = "biotech research"
article_ids = fetch_articles(query)
print("Fetched PubMed IDs:", article_ids)


Fetched PubMed IDs: ['40051483', '40051141', '40050970', '40050842', '40050768', '40050521', '40050434', '40050409', '40049766', '40049403', '40049207', '40049206', '40049185', '40048691', '40047735', '40047724', '40047259', '40046028', '40046014', '40045789']


In [4]:
def fetch_article_details(article_ids):
    base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
    fetch_url = f"{base_url}esummary.fcgi?db=pubmed&id={','.join(article_ids)}&retmode=json"
    response = requests.get(fetch_url)
    if response.status_code == 200:
        return response.json()
    else:
        print("Error fetching article details:", response.status_code)
        return {}

# Example usage
article_details = fetch_article_details(article_ids)
print("Article Details:", article_details)


Article Details: {'header': {'type': 'esummary', 'version': '0.3'}, 'result': {'uids': ['40051483', '40051141', '40050970', '40050842', '40050768', '40050521', '40050434', '40050409', '40049766', '40049403', '40049207', '40049206', '40049185', '40048691', '40047735', '40047724', '40047259', '40046028', '40046014', '40045789'], '40051483': {'uid': '40051483', 'pubdate': '2024 Dec', 'epubdate': '2024 Dec 2', 'source': 'Psychedelic Med (New Rochelle)', 'authors': [{'name': 'Haggarty C', 'authtype': 'Author', 'clusterid': ''}, {'name': 'Molla H', 'authtype': 'Author', 'clusterid': ''}, {'name': 'Glazer J', 'authtype': 'Author', 'clusterid': ''}, {'name': 'Tare I', 'authtype': 'Author', 'clusterid': ''}, {'name': 'Rains A', 'authtype': 'Author', 'clusterid': ''}, {'name': 'de Wit H', 'authtype': 'Author', 'clusterid': ''}, {'name': 'Lee R', 'authtype': 'Author', 'clusterid': ''}], 'lastauthor': 'Lee R', 'title': 'Low-Dose LSD Alters Early and Late Event-Related Potentials to Emotional Faces

In [5]:
def filter_authors(authors):
    company_keywords = ["Pharma", "Biotech", "Inc.", "LLC"]
    company_authors = []
    for author in authors:
        if any(keyword in author.get('affiliation', '') for keyword in company_keywords):
            company_authors.append(author)
    return company_authors


In [6]:
def save_to_csv(data, filename="output.csv"):
    df = pd.DataFrame(data)
    df.to_csv(filename, index=False)
    print(f"Data saved to {filename}")

# Example dataset for testing
sample_data = [
    {"PubMed ID": "12345678", "Title": "Innovative Biotech Research", 
     "Publication Date": "2025-01-15", "Author Affiliation": "BioPharma Inc., New York, USA"}
]
save_to_csv(sample_data)


Data saved to output.csv
