# Title
[]()

In [1]:

import sys
sys.path.append('../src')
sys.path.append(r"/home/silvhua/custom_python")
from silvhua import *

In [69]:
# set the option to wrap text within cells
pd.set_option('display.max_colwidth', 200)
# pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_rows', 20)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

# Initialize

In [2]:
api_key = os.getenv('api_ncbi') # Pubmed API key
result_dict = dict()

# Iteration 1

In [3]:
import sys
sys.path.append(r"/home/silvhua/custom_python")
import os
import pandas as pd
import string
import re
import requests
# from article_processing import create_text_dict_from_folder
# from orm_summarize import *
api_key = os.getenv('api_ncbi') # Pubmed API key

import sys
import os
import requests
from Custom_Logger import *

class Pubmed_API:
    def __init__(self, api_key=os.getenv('api_ncbi'), logger=None, logging_level=logging.INFO):
        self.api_key = api_key
        self.logger = create_function_logger('Pubmed_API', logger, level=logging_level)
        self.iteration = 1
        self.responses_dict = {}
        self.results_dict = {}
        self.PMIDs_dict = {}
        self.record_strings_dict = {}

    def search_article(self, query, query_tag=None, publication=None, reldate=None, retmax=None,
        systematic_only=False, review_only=False, verbose=False, additional_search_params=None
        ):
        base_url = f'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi'
        if self.api_key:
            base_url += f'&api_key={self.api_key}'
        response = {}
        results = pd.DataFrame()
        search_term = f'"{re.sub(r"not", "", query)}"'  # Remove 'not' since it will be treated as a boolean
        if query_tag:
            search_term += f'{query_tag}'
        if publication:
            search_term = f'AND {publication} [ta]'
        if systematic_only:
            search_term += ' AND systematic[sb]'
        elif review_only:
            search_term += ' AND (systematic[sb] OR review[pt])'
        params = {
            'db': 'pubmed',
            'term': search_term,
            'retmax': 5,
            'retmode': 'json',
            'datetype': 'edat',
        }
        if reldate:
            params['reldate'] = reldate
        if retmax:
            params['retmax'] = retmax
        if additional_search_params:
            params.update(additional_search_params)
        self.logger.info(f'Search term: {search_term}')
        try:
            response = requests.get(base_url, params=params)
            response_dict = response.json()
            self.responses_dict[self.iteration] = response_dict
            result_dict = self.get_article_data_by_title()
            self.results_dict[self.iteration] = result_dict
            self.iteration += 1
            results = pd.DataFrame(result_dict).transpose()
        except Exception as error:
            error_messages = []
            exc_type, exc_obj, tb = sys.exc_info()
            file = tb.tb_frame
            lineno = tb.tb_lineno
            filename = file.f_code.co_filename
            message = f'\tAn error occurred on line {lineno} in {filename}: {error}'
            error_messages.append(message)
            self.logger.error('\n'.join(error_messages))
        
        return results

    def get_article_data_by_title(self):
        try:
            result_dict = {}
            record_strings_list = self.batch_retrieve_citation(self.responses_dict[self.iteration])
            self.record_strings_dict[self.iteration] = record_strings_list
            for index, record_string in enumerate(record_strings_list):
                result_dict[index] = self.extract_pubmed_details(record_string)

        except Exception as error:
            error_messages = []
            error_messages.append(f'Response: \n{self.PMIDs_dict[self.iteration]}')
            exc_type, exc_obj, tb = sys.exc_info()
            file = tb.tb_frame
            lineno = tb.tb_lineno
            filename = file.f_code.co_filename
            message = f'\tAn error occurred on line {lineno} in {filename}: {error}'
            error_messages.append(message)
            self.logger.error('\n'.join(error_messages))
        return result_dict

    def batch_retrieve_citation(self, response_dict):
        result_list = []
        messages = []
        try:
            id_list = response_dict['esearchresult']['idlist']
            self.PMIDs_dict[self.iteration] = id_list
            if id_list:
                self.logger.info(f'Extracting these {len(id_list)} PMIDs: {id_list}')
                for index, id in enumerate(id_list):
                    result_list.append(self.retrieve_citation(id).decode('utf-8'))
                    current_index, current_id = index+1, id
            else:
                self.logger.warning(f'No results found.')
        except Exception as error:
            messages.append(f'Response: \n{response_dict}')
            exc_type, exc_obj, tb = sys.exc_info()
            file = tb.tb_frame
            lineno = tb.tb_lineno
            filename = file.f_code.co_filename
            messages.append(f'\tAn error occurred on line {lineno} in {filename}: {error}')
            messages.append(f'Article {current_index} [{current_id}] not found.')
        return result_list

    def retrieve_citation(self, article_id):
        base_url = f'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi'
        if self.api_key:
            base_url += f'&api_key={self.api_key}'
        params = {
            'db': 'pubmed',
            'id': article_id
        }

        response = requests.get(base_url, params=params)
        return response.content

    def extract_pubmed_details(self, record_string):
        """
        Helper function called by `pubmed_details_by_title` to parse article metadata from PubMed database.
        """
        authors = re.findall(r'<Author ValidYN="Y".*?><LastName>(.*?)</LastName><ForeName>(.*?)</ForeName>', record_string)
        formatted_authors = ', '.join(['{} {}'.format(author[1], author[0]) for author in authors])

        # Extract publication year
        publication_year = re.search(r'<PubDate><Year>(\d{4})</Year>', record_string)
        publication_year = publication_year.group(1) if publication_year else ''
        publication_month = re.search(r'<PubDate>.*?<Month>(Aug)</Month>.*?</PubDate>', record_string)
        publication_month = publication_month.group(1) if publication_month else ''

        # Extract article title
        article_title = re.search(r'<ArticleTitle>(.*?)</ArticleTitle>', record_string)
        article_title = article_title.group(1) if article_title else ''

        # Extract journal title
        journal_title = re.search(r'<Title>(.*?)</Title>', record_string)
        journal_title = journal_title.group(1) if journal_title else ''

        # Extract journal volume
        journal_volume = re.search(r'<Volume>(.*?)</Volume>', record_string)
        journal_volume = journal_volume.group(1) if journal_volume else ''

        # Extract journal issue
        journal_issue = re.search(r'<Issue>(.*?)</Issue>', record_string)
        journal_issue = journal_issue.group(1) if journal_issue else ''

        # Extract start page
        start_page = re.search(r'<StartPage>(.*?)</StartPage>', record_string)
        start_page = start_page.group(1) if start_page else ''

        # Extract end page
        end_page = re.search(r'<EndPage>(.*?)</EndPage>', record_string)
        end_page = end_page.group(1) if end_page else ''

        # Extract ELocationID
        doi = re.search(r'<ELocationID.*?EIdType="doi".*?>(.*?)</ELocationID>', record_string)
        doi = doi.group(1) if doi else ''

        # Extract PMID
        pmid = re.search(r'<PMID.*?>(.*?)</PMID>', record_string)
        pmid = pmid.group(1) if pmid else ''

        abstract_matches = re.findall(r'(<AbstractText.*?>.*?</AbstractText>)', record_string)
        self.logger.debug(f'Number of abstract sections: {len(abstract_matches)}')
        if len(abstract_matches) > 1:
            cleaned_abstract_sections = []
            for match in abstract_matches:
                clean_match = re.sub(r'<AbstractText.*?((?:Label=".*")?.*?>.*)</AbstractText>', r'\1', match)
                clean_match = re.sub(r'(?: Label="(.*?)")?.*?>(.*)', r'\1: \2', clean_match)
                cleaned_abstract_sections.append(clean_match)
                
            abstract = ''.join([f'{group}<br>' for group in cleaned_abstract_sections])
        else:
            abstract = re.sub(r'<AbstractText.*?>(.*?)</AbstractText>', r'\1', abstract_matches[0])  if abstract_matches else ''
            
        # Extract MeshHeadingList
        MeshHeadingList = re.search(r'<MeshHeadingList>(.*?)</MeshHeadingList>', record_string)
        MeshHeadingList = MeshHeadingList.group(1) if MeshHeadingList else ''
        
        return {
            'pubmed_title': article_title,
            'abstract': abstract,
            'journal': journal_title,
            'authors': formatted_authors,
            'year': publication_year,
            'month': publication_month,
            'pub_volume': journal_volume,
            'pub_issue': journal_issue,
            'start_page': start_page,
            'end_page': end_page,
            'doi': doi,
            'pmid': pmid,
            'mesh_headings': MeshHeadingList
        }
    
iteration = 1
query = '("resistance train*"[All Fields]) AND ((y_10[Filter]) AND (meta-analysis[Filter] OR review[Filter] OR systematicreview[Filter]))'
result_dict[iteration] = Pubmed_API()
result_dict[iteration].search_article(query, retmax=2)

2024-04-03 14:30:09,078 - Pubmed_API - INFO:
Search term: "("resistance train*"[All Fields]) AND ((y_10[Filter]) AND (meta-analysis[Filter] OR review[Filter] OR systematicreview[Filter]))"

No results found.



In [5]:
iteration = 1.1
query = '("resistance train*"[All Fields]) AND (AND (meta-analysis[Filter] OR review[Filter] OR systematicreview[Filter]))'
result_dict[iteration] = Pubmed_API()
result_dict[iteration].search_article(query, retmax=2)

2024-04-03 14:34:34,405 - Pubmed_API - INFO:
Search term: "("resistance train*"[All Fields]) AND (AND (meta-analysis[Filter] OR review[Filter] OR systematicreview[Filter]))"

No results found.



In [6]:
iteration = 1.2
query = '("resistance train*")'
result_dict[iteration] = Pubmed_API()
result_dict[iteration].search_article(query, retmax=2)

2024-04-03 14:35:07,969 - Pubmed_API - INFO:
Search term: "("resistance train*")"

No results found.



In [7]:
iteration = 1.2
query = 'resistance train*'
result_dict[iteration] = Pubmed_API()
result_dict[iteration].search_article(query, retmax=2)

2024-04-03 14:35:35,164 - Pubmed_API - INFO:
Search term: "resistance train*"

2024-04-03 14:35:35,897 - Pubmed_API - INFO:
Extracting these 2 PMIDs: ['38568258', '38565633']



Unnamed: 0,pubmed_title,abstract,journal,authors,year,month,pub_volume,pub_issue,start_page,end_page,doi,pmid,mesh_headings
0,Is there evidence for the asymmetrical transfe...,PURPOSE: The literature predominantly addresse...,European journal of applied physiology,"Vickie Wong, Jun Seob Song, Yujiro Yamada, Ryo...",2024,,,,,,10.1007/s00421-024-05472-9,38568258,
1,Effect of resistance training plus enriched pr...,This study aimed to determine the effects of r...,Scientific reports,"Majid Mohabbat, Hamid Arazi",2024,,14.0,1.0,7744.0,,10.1038/s41598-024-58462-4,38565633,


In [8]:
iteration = 1.3
query = '"resistance train*"'
result_dict[iteration] = Pubmed_API()
result_dict[iteration].search_article(query, retmax=2)

2024-04-03 14:35:59,787 - Pubmed_API - INFO:
Search term: ""resistance train*""

2024-04-03 14:36:00,439 - Pubmed_API - INFO:
Extracting these 2 PMIDs: ['38568258', '38567973']



Unnamed: 0,pubmed_title,abstract,journal,authors,year,month,pub_volume,pub_issue,start_page,end_page,doi,pmid,mesh_headings
0,Is there evidence for the asymmetrical transfe...,PURPOSE: The literature predominantly addresse...,European journal of applied physiology,"Vickie Wong, Jun Seob Song, Yujiro Yamada, Ryo...",2024,,,,,,10.1007/s00421-024-05472-9,38568258,
1,Are train horns improving road safety? Road us...,: Train horns are used as a control at railway...,Ergonomics,"Gr&#xe9;goire S Larue, Danielle Villoresi, Son...",2024,,,,1.0,12.0,10.1080/00140139.2024.2333965,38567973,


# iteration

In [10]:
import sys
sys.path.append(r"/home/silvhua/custom_python")
import os
import pandas as pd
import string
import re
import requests
# from article_processing import create_text_dict_from_folder
# from orm_summarize import *
api_key = os.getenv('api_ncbi') # Pubmed API key

import sys
import os
import requests
from Custom_Logger import *

class Pubmed_API:
    def __init__(self, api_key=os.getenv('api_ncbi'), logger=None, logging_level=logging.INFO):
        self.api_key = api_key
        self.logger = create_function_logger('Pubmed_API', logger, level=logging_level)
        self.iteration = 1
        self.responses_dict = {}
        self.results_dict = {}
        self.PMIDs_dict = {}
        self.record_strings_dict = {}

    def search_article(self, query, query_tag=None, publication=None, reldate=None, retmax=None,
        systematic_only=False, review_only=False, additional_search_params=None, ids_only=False
        ):
        base_url = f'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi'
        if self.api_key:
            base_url += f'&api_key={self.api_key}'
        response = {}
        results = pd.DataFrame()
        search_term = f'{re.sub(r"not", "", query)}'  # Remove 'not' since it will be treated as a boolean
        if query_tag:
            search_term += f'{query_tag}'
        if publication:
            search_term = f'AND {publication} [ta]'
        if systematic_only:
            search_term += ' AND systematic[sb]'
        elif review_only:
            search_term += ' AND (systematic[sb] OR review[pt])'
        params = {
            'db': 'pubmed',
            'term': search_term,
            'retmax': 5,
            'retmode': 'json',
            'datetype': 'edat',
        }
        if reldate:
            params['reldate'] = reldate
        if retmax:
            params['retmax'] = retmax
        if additional_search_params:
            params.update(additional_search_params)
        self.logger.info(f'Search term: {search_term}')
        try:
            response = requests.get(base_url, params=params)
            response_dict = response.json()
            self.responses_dict[self.iteration] = response_dict
            if ids_only==False:
                result_dict = self.get_article_data_by_title()
                self.results_dict[self.iteration] = result_dict
                self.iteration += 1
                results = pd.DataFrame(result_dict).transpose()
            else:
                results = response_dict
        except Exception as error:
            error_messages = []
            exc_type, exc_obj, tb = sys.exc_info()
            file = tb.tb_frame
            lineno = tb.tb_lineno
            filename = file.f_code.co_filename
            message = f'\tAn error occurred on line {lineno} in {filename}: {error}'
            error_messages.append(message)
            self.logger.error('\n'.join(error_messages))
        
        return results

    def get_article_data_by_title(self):
        try:
            result_dict = {}
            record_strings_list = self.batch_retrieve_citation(self.responses_dict[self.iteration])
            self.record_strings_dict[self.iteration] = record_strings_list
            for index, record_string in enumerate(record_strings_list):
                result_dict[index] = self.extract_pubmed_details(record_string)

        except Exception as error:
            error_messages = []
            error_messages.append(f'Response: \n{self.PMIDs_dict[self.iteration]}')
            exc_type, exc_obj, tb = sys.exc_info()
            file = tb.tb_frame
            lineno = tb.tb_lineno
            filename = file.f_code.co_filename
            message = f'\tAn error occurred on line {lineno} in {filename}: {error}'
            error_messages.append(message)
            self.logger.error('\n'.join(error_messages))
        return result_dict

    def batch_retrieve_citation(self, response_dict):
        result_list = []
        messages = []
        try:
            id_list = response_dict['esearchresult']['idlist']
            self.PMIDs_dict[self.iteration] = id_list
            if id_list:
                self.logger.info(f'Extracting these {len(id_list)} PMIDs: {id_list}')
                for index, id in enumerate(id_list):
                    result_list.append(self.retrieve_citation(id).decode('utf-8'))
                    current_index, current_id = index+1, id
            else:
                self.logger.warning(f'No results found.')
        except Exception as error:
            messages.append(f'Response: \n{response_dict}')
            exc_type, exc_obj, tb = sys.exc_info()
            file = tb.tb_frame
            lineno = tb.tb_lineno
            filename = file.f_code.co_filename
            messages.append(f'\tAn error occurred on line {lineno} in {filename}: {error}')
            messages.append(f'Article {current_index} [{current_id}] not found.')
        return result_list

    def retrieve_citation(self, article_id):
        base_url = f'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi'
        if self.api_key:
            base_url += f'&api_key={self.api_key}'
        params = {
            'db': 'pubmed',
            'id': article_id
        }
        response = requests.get(base_url, params=params)
        return response.content

    def extract_pubmed_details(self, record_string):
        """
        Helper function called by `pubmed_details_by_title` to parse article metadata from PubMed database.
        """
        authors = re.findall(r'<Author ValidYN="Y".*?><LastName>(.*?)</LastName><ForeName>(.*?)</ForeName>', record_string)
        formatted_authors = ', '.join(['{} {}'.format(author[1], author[0]) for author in authors])

        # Extract publication year
        publication_year = re.search(r'<PubDate><Year>(\d{4})</Year>', record_string)
        publication_year = publication_year.group(1) if publication_year else ''
        publication_month = re.search(r'<PubDate>.*?<Month>(Aug)</Month>.*?</PubDate>', record_string)
        publication_month = publication_month.group(1) if publication_month else ''

        # Extract article title
        article_title = re.search(r'<ArticleTitle>(.*?)</ArticleTitle>', record_string)
        article_title = article_title.group(1) if article_title else ''

        # Extract journal title
        journal_title = re.search(r'<Title>(.*?)</Title>', record_string)
        journal_title = journal_title.group(1) if journal_title else ''

        # Extract journal volume
        journal_volume = re.search(r'<Volume>(.*?)</Volume>', record_string)
        journal_volume = journal_volume.group(1) if journal_volume else ''

        # Extract journal issue
        journal_issue = re.search(r'<Issue>(.*?)</Issue>', record_string)
        journal_issue = journal_issue.group(1) if journal_issue else ''

        # Extract start page
        start_page = re.search(r'<StartPage>(.*?)</StartPage>', record_string)
        start_page = start_page.group(1) if start_page else ''

        # Extract end page
        end_page = re.search(r'<EndPage>(.*?)</EndPage>', record_string)
        end_page = end_page.group(1) if end_page else ''

        # Extract ELocationID
        doi = re.search(r'<ELocationID.*?EIdType="doi".*?>(.*?)</ELocationID>', record_string)
        doi = doi.group(1) if doi else ''

        # Extract PMID
        pmid = re.search(r'<PMID.*?>(.*?)</PMID>', record_string)
        pmid = pmid.group(1) if pmid else ''

        abstract_matches = re.findall(r'(<AbstractText.*?>.*?</AbstractText>)', record_string)
        self.logger.debug(f'Number of abstract sections: {len(abstract_matches)}')
        if len(abstract_matches) > 1:
            cleaned_abstract_sections = []
            for match in abstract_matches:
                clean_match = re.sub(r'<AbstractText.*?((?:Label=".*")?.*?>.*)</AbstractText>', r'\1', match)
                clean_match = re.sub(r'(?: Label="(.*?)")?.*?>(.*)', r'\1: \2', clean_match)
                cleaned_abstract_sections.append(clean_match)
                
            abstract = ''.join([f'{group}<br>' for group in cleaned_abstract_sections])
        else:
            abstract = re.sub(r'<AbstractText.*?>(.*?)</AbstractText>', r'\1', abstract_matches[0])  if abstract_matches else ''
            
        # Extract MeshHeadingList
        MeshHeadingList = re.search(r'<MeshHeadingList>(.*?)</MeshHeadingList>', record_string)
        MeshHeadingList = MeshHeadingList.group(1) if MeshHeadingList else ''
        return {
            'pubmed_title': article_title,
            'abstract': abstract,
            'journal': journal_title,
            'authors': formatted_authors,
            'year': publication_year,
            'month': publication_month,
            'pub_volume': journal_volume,
            'pub_issue': journal_issue,
            'start_page': start_page,
            'end_page': end_page,
            'doi': doi,
            'pmid': pmid,
            'mesh_headings': MeshHeadingList
        }
    
iteration = 2.1
query = '("resistance train*"[All Fields]) AND ((y_10[Filter]) AND (meta-analysis[Filter] OR review[Filter] OR systematicreview[Filter]))'
result_dict[iteration] = Pubmed_API()
ids_list = result_dict[iteration].search_article(query, retmax=2, ids_only=True)
ids_list

2024-04-03 14:46:28,832 - Pubmed_API - INFO:
Search term: ("resistance train*"[All Fields]) AND ((y_10[Filter]) AND (meta-analysis[Filter] OR review[Filter] OR systematicreview[Filter]))



{'header': {'type': 'esearch', 'version': '0.3'},
 'esearchresult': {'count': '2537',
  'retmax': '2',
  'retstart': '0',
  'idlist': ['38563729', '38563037'],
  'translationset': [{'from': 'y_10[Filter]', 'to': '"last 10 years"[dp]'},
   {'from': 'meta-analysis[Filter]', 'to': 'meta-analysis [PT]'},
   {'from': 'review[Filter]', 'to': 'review [PT]'}],
  'querytranslation': '"resistance train*"[All Fields] AND ("2014/04/03 00:00":"3000/01/01 05:00"[Date - Publication] AND ("meta analysis"[Publication Type] OR "review"[Publication Type] OR "systematic review"[Filter]))'}}

# Iteration 2.2

In [12]:
import sys
sys.path.append(r"/home/silvhua/custom_python")
import os
import pandas as pd
import string
import re
import requests
# from article_processing import create_text_dict_from_folder
# from orm_summarize import *
api_key = os.getenv('api_ncbi') # Pubmed API key

import sys
import os
import requests
from Custom_Logger import *

class Pubmed_API:
    def __init__(self, api_key=os.getenv('api_ncbi'), logger=None, logging_level=logging.INFO):
        self.api_key = api_key
        self.logger = create_function_logger('Pubmed_API', logger, level=logging_level)
        self.iteration = 1
        self.responses_dict = {}
        self.results_dict = {}
        self.PMIDs_dict = {}
        self.record_strings_dict = {}

    def search_article(self, query, query_tag=None, publication=None, reldate=None, retmax=None,
        systematic_only=False, review_only=False, additional_search_params=None, ids_only=False
        ):
        base_url = f'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi'
        if self.api_key:
            base_url += f'&api_key={self.api_key}'
        response = {}
        results = pd.DataFrame()
        search_term = f'{re.sub(r"not", "", query)}'  # Remove 'not' since it will be treated as a boolean
        if query_tag:
            search_term += f'{query_tag}'
        if publication:
            search_term = f'AND {publication} [ta]'
        if systematic_only:
            search_term += ' AND systematic[sb]'
        elif review_only:
            search_term += ' AND (systematic[sb] OR review[pt])'
        params = {
            'db': 'pubmed',
            'term': search_term,
            'retmax': 5,
            'retmode': 'json',
            'datetype': 'edat',
        }
        if reldate:
            params['reldate'] = reldate
        if retmax:
            params['retmax'] = retmax
        if additional_search_params:
            params.update(additional_search_params)
        self.logger.info(f'Search term: {search_term}')
        try:
            response = requests.get(base_url, params=params)
            response_dict = response.json()
            id_list = response_dict['esearchresult']['idlist']
            self.PMIDs_dict[self.iteration] = id_list
            self.responses_dict[self.iteration] = response_dict
            if ids_only==False:
                result_dict = self.get_article_data_by_title()
                self.results_dict[self.iteration] = result_dict
                self.iteration += 1
                results = pd.DataFrame(result_dict).transpose()
            else:
                results = id_list
        except Exception as error:
            error_messages = []
            exc_type, exc_obj, tb = sys.exc_info()
            file = tb.tb_frame
            lineno = tb.tb_lineno
            filename = file.f_code.co_filename
            message = f'\tAn error occurred on line {lineno} in {filename}: {error}'
            error_messages.append(message)
            self.logger.error('\n'.join(error_messages))
        
        return results

    def get_article_data_by_title(self):
        try:
            result_dict = {}
            record_strings_list = self.batch_retrieve_citation()
            self.record_strings_dict[self.iteration] = record_strings_list
            for index, record_string in enumerate(record_strings_list):
                result_dict[index] = self.extract_pubmed_details(record_string)

        except Exception as error:
            error_messages = []
            error_messages.append(f'Response: \n{self.PMIDs_dict[self.iteration]}')
            exc_type, exc_obj, tb = sys.exc_info()
            file = tb.tb_frame
            lineno = tb.tb_lineno
            filename = file.f_code.co_filename
            message = f'\tAn error occurred on line {lineno} in {filename}: {error}'
            error_messages.append(message)
            self.logger.error('\n'.join(error_messages))
        return result_dict

    def batch_retrieve_citation(self, response_dict):
        result_list = []
        messages = []
        try:
            id_list = self.PMIDs_dict.get(self.iteration)
            if id_list:
                self.logger.info(f'Extracting these {len(id_list)} PMIDs: {id_list}')
                for index, id in enumerate(id_list):
                    result_list.append(self.retrieve_citation(id).decode('utf-8'))
                    current_index, current_id = index+1, id
            else:
                self.logger.warning(f'No results found.')
        except Exception as error:
            messages.append(f'Response: \n{response_dict}')
            exc_type, exc_obj, tb = sys.exc_info()
            file = tb.tb_frame
            lineno = tb.tb_lineno
            filename = file.f_code.co_filename
            messages.append(f'\tAn error occurred on line {lineno} in {filename}: {error}')
            messages.append(f'Article {current_index} [{current_id}] not found.')
        return result_list

    def retrieve_citation(self, article_id):
        base_url = f'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi'
        if self.api_key:
            base_url += f'&api_key={self.api_key}'
        params = {
            'db': 'pubmed',
            'id': article_id
        }
        response = requests.get(base_url, params=params)
        return response.content

    def extract_pubmed_details(self, record_string):
        """
        Helper function called by `pubmed_details_by_title` to parse article metadata from PubMed database.
        """
        authors = re.findall(r'<Author ValidYN="Y".*?><LastName>(.*?)</LastName><ForeName>(.*?)</ForeName>', record_string)
        formatted_authors = ', '.join(['{} {}'.format(author[1], author[0]) for author in authors])

        # Extract publication year
        publication_year = re.search(r'<PubDate><Year>(\d{4})</Year>', record_string)
        publication_year = publication_year.group(1) if publication_year else ''
        publication_month = re.search(r'<PubDate>.*?<Month>(Aug)</Month>.*?</PubDate>', record_string)
        publication_month = publication_month.group(1) if publication_month else ''

        # Extract article title
        article_title = re.search(r'<ArticleTitle>(.*?)</ArticleTitle>', record_string)
        article_title = article_title.group(1) if article_title else ''

        # Extract journal title
        journal_title = re.search(r'<Title>(.*?)</Title>', record_string)
        journal_title = journal_title.group(1) if journal_title else ''

        # Extract journal volume
        journal_volume = re.search(r'<Volume>(.*?)</Volume>', record_string)
        journal_volume = journal_volume.group(1) if journal_volume else ''

        # Extract journal issue
        journal_issue = re.search(r'<Issue>(.*?)</Issue>', record_string)
        journal_issue = journal_issue.group(1) if journal_issue else ''

        # Extract start page
        start_page = re.search(r'<StartPage>(.*?)</StartPage>', record_string)
        start_page = start_page.group(1) if start_page else ''

        # Extract end page
        end_page = re.search(r'<EndPage>(.*?)</EndPage>', record_string)
        end_page = end_page.group(1) if end_page else ''

        # Extract ELocationID
        doi = re.search(r'<ELocationID.*?EIdType="doi".*?>(.*?)</ELocationID>', record_string)
        doi = doi.group(1) if doi else ''

        # Extract PMID
        pmid = re.search(r'<PMID.*?>(.*?)</PMID>', record_string)
        pmid = pmid.group(1) if pmid else ''

        abstract_matches = re.findall(r'(<AbstractText.*?>.*?</AbstractText>)', record_string)
        self.logger.debug(f'Number of abstract sections: {len(abstract_matches)}')
        if len(abstract_matches) > 1:
            cleaned_abstract_sections = []
            for match in abstract_matches:
                clean_match = re.sub(r'<AbstractText.*?((?:Label=".*")?.*?>.*)</AbstractText>', r'\1', match)
                clean_match = re.sub(r'(?: Label="(.*?)")?.*?>(.*)', r'\1: \2', clean_match)
                cleaned_abstract_sections.append(clean_match)
                
            abstract = ''.join([f'{group}<br>' for group in cleaned_abstract_sections])
        else:
            abstract = re.sub(r'<AbstractText.*?>(.*?)</AbstractText>', r'\1', abstract_matches[0])  if abstract_matches else ''
            
        # Extract MeshHeadingList
        MeshHeadingList = re.search(r'<MeshHeadingList>(.*?)</MeshHeadingList>', record_string)
        MeshHeadingList = MeshHeadingList.group(1) if MeshHeadingList else ''
        return {
            'pubmed_title': article_title,
            'abstract': abstract,
            'journal': journal_title,
            'authors': formatted_authors,
            'year': publication_year,
            'month': publication_month,
            'pub_volume': journal_volume,
            'pub_issue': journal_issue,
            'start_page': start_page,
            'end_page': end_page,
            'doi': doi,
            'pmid': pmid,
            'mesh_headings': MeshHeadingList
        }
    
iteration = 2.2
query = '("resistance train*"[All Fields]) AND ((y_10[Filter]) AND (meta-analysis[Filter] OR review[Filter] OR systematicreview[Filter]))'
result_dict[iteration] = Pubmed_API()
ids_list = result_dict[iteration].search_article(query, retmax=2, ids_only=True)
ids_list

2024-04-03 14:57:07,071 - Pubmed_API - INFO:
Search term: ("resistance train*"[All Fields]) AND ((y_10[Filter]) AND (meta-analysis[Filter] OR review[Filter] OR systematicreview[Filter]))



['38563729', '38563037']

In [13]:
df = result_dict[iteration].get_article_data_by_title()

2024-04-03 14:57:55,977 - Pubmed_API - ERROR:
Response: 
['38563729', '38563037']
	An error occurred on line 86 in /tmp/ipykernel_2364/2201311683.py: Pubmed_API.batch_retrieve_citation() missing 1 required positional argument: 'response_dict'



# Iteration 2.3

In [14]:
import sys
sys.path.append(r"/home/silvhua/custom_python")
import os
import pandas as pd
import string
import re
import requests
# from article_processing import create_text_dict_from_folder
# from orm_summarize import *
api_key = os.getenv('api_ncbi') # Pubmed API key

import sys
import os
import requests
from Custom_Logger import *

class Pubmed_API:
    def __init__(self, api_key=os.getenv('api_ncbi'), logger=None, logging_level=logging.INFO):
        self.api_key = api_key
        self.logger = create_function_logger('Pubmed_API', logger, level=logging_level)
        self.iteration = 1
        self.responses_dict = {}
        self.results_dict = {}
        self.PMIDs_dict = {}
        self.record_strings_dict = {}

    def search_article(self, query, query_tag=None, publication=None, reldate=None, retmax=None,
        systematic_only=False, review_only=False, additional_search_params=None, ids_only=False, 
        verbose=True
        ):
        base_url = f'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi'
        if self.api_key:
            base_url += f'&api_key={self.api_key}'
        response = {}
        results = pd.DataFrame()
        search_term = f'{re.sub(r"not", "", query)}'  # Remove 'not' since it will be treated as a boolean
        if query_tag:
            search_term += f'{query_tag}'
        if publication:
            search_term = f'AND {publication} [ta]'
        if systematic_only:
            search_term += ' AND systematic[sb]'
        elif review_only:
            search_term += ' AND (systematic[sb] OR review[pt])'
        params = {
            'db': 'pubmed',
            'term': search_term,
            'retmax': 5,
            'retmode': 'json',
            'datetype': 'edat',
        }
        if reldate:
            params['reldate'] = reldate
        if retmax:
            params['retmax'] = retmax
        if additional_search_params:
            params.update(additional_search_params)
        self.logger.info(f'Search term: {search_term}')
        messages = []
        try:
            response = requests.get(base_url, params=params)
            response_dict = response.json()
            id_list = response_dict['esearchresult']['idlist']
            messages.append(f'{len(id_list)} PMIDs found.')
            if verbose==True:
                messages.append(f'{id_list}')
            self.PMIDs_dict[self.iteration] = id_list
            self.responses_dict[self.iteration] = response_dict
            if ids_only==False:
                result_dict = self.get_article_data_by_title()
                self.results_dict[self.iteration] = result_dict
                self.iteration += 1
                results = pd.DataFrame(result_dict).transpose()
            else:
                results = id_list
        except Exception as error:
            error_messages = []
            exc_type, exc_obj, tb = sys.exc_info()
            file = tb.tb_frame
            lineno = tb.tb_lineno
            filename = file.f_code.co_filename
            message = f'\tAn error occurred on line {lineno} in {filename}: {error}'
            error_messages.append(message)
            self.logger.error('\n'.join(error_messages))
        
        return results

    def get_article_data_by_title(self):
        try:
            result_dict = {}
            record_strings_list = self.batch_retrieve_citation()
            self.record_strings_dict[self.iteration] = record_strings_list
            for index, record_string in enumerate(record_strings_list):
                result_dict[index] = self.extract_pubmed_details(record_string)

        except Exception as error:
            error_messages = []
            error_messages.append(f'Response: \n{self.PMIDs_dict[self.iteration]}')
            exc_type, exc_obj, tb = sys.exc_info()
            file = tb.tb_frame
            lineno = tb.tb_lineno
            filename = file.f_code.co_filename
            message = f'\tAn error occurred on line {lineno} in {filename}: {error}'
            error_messages.append(message)
            self.logger.error('\n'.join(error_messages))
        return result_dict

    def batch_retrieve_citation(self):
        result_list = []
        messages = []
        try:
            id_list = self.PMIDs_dict.get(self.iteration)
            if id_list:
                self.logger.info(f'Extracting these {len(id_list)} PMIDs: {id_list}')
                for index, id in enumerate(id_list):
                    result_list.append(self.retrieve_citation(id).decode('utf-8'))
                    current_index, current_id = index+1, id
            else:
                self.logger.warning(f'No results found.')
        except Exception as error:
            messages.append(f'Response: \n{self.response_dict.get(self.iteration)}')
            exc_type, exc_obj, tb = sys.exc_info()
            file = tb.tb_frame
            lineno = tb.tb_lineno
            filename = file.f_code.co_filename
            messages.append(f'\tAn error occurred on line {lineno} in {filename}: {error}')
            messages.append(f'Article {current_index} [{current_id}] not found.')
        return result_list

    def retrieve_citation(self, article_id):
        base_url = f'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi'
        if self.api_key:
            base_url += f'&api_key={self.api_key}'
        params = {
            'db': 'pubmed',
            'id': article_id
        }
        response = requests.get(base_url, params=params)
        return response.content

    def extract_pubmed_details(self, record_string):
        """
        Helper function called by `pubmed_details_by_title` to parse article metadata from PubMed database.
        """
        authors = re.findall(r'<Author ValidYN="Y".*?><LastName>(.*?)</LastName><ForeName>(.*?)</ForeName>', record_string)
        formatted_authors = ', '.join(['{} {}'.format(author[1], author[0]) for author in authors])

        # Extract publication year
        publication_year = re.search(r'<PubDate><Year>(\d{4})</Year>', record_string)
        publication_year = publication_year.group(1) if publication_year else ''
        publication_month = re.search(r'<PubDate>.*?<Month>(Aug)</Month>.*?</PubDate>', record_string)
        publication_month = publication_month.group(1) if publication_month else ''

        # Extract article title
        article_title = re.search(r'<ArticleTitle>(.*?)</ArticleTitle>', record_string)
        article_title = article_title.group(1) if article_title else ''

        # Extract journal title
        journal_title = re.search(r'<Title>(.*?)</Title>', record_string)
        journal_title = journal_title.group(1) if journal_title else ''

        # Extract journal volume
        journal_volume = re.search(r'<Volume>(.*?)</Volume>', record_string)
        journal_volume = journal_volume.group(1) if journal_volume else ''

        # Extract journal issue
        journal_issue = re.search(r'<Issue>(.*?)</Issue>', record_string)
        journal_issue = journal_issue.group(1) if journal_issue else ''

        # Extract start page
        start_page = re.search(r'<StartPage>(.*?)</StartPage>', record_string)
        start_page = start_page.group(1) if start_page else ''

        # Extract end page
        end_page = re.search(r'<EndPage>(.*?)</EndPage>', record_string)
        end_page = end_page.group(1) if end_page else ''

        # Extract ELocationID
        doi = re.search(r'<ELocationID.*?EIdType="doi".*?>(.*?)</ELocationID>', record_string)
        doi = doi.group(1) if doi else ''

        # Extract PMID
        pmid = re.search(r'<PMID.*?>(.*?)</PMID>', record_string)
        pmid = pmid.group(1) if pmid else ''

        abstract_matches = re.findall(r'(<AbstractText.*?>.*?</AbstractText>)', record_string)
        self.logger.debug(f'Number of abstract sections: {len(abstract_matches)}')
        if len(abstract_matches) > 1:
            cleaned_abstract_sections = []
            for match in abstract_matches:
                clean_match = re.sub(r'<AbstractText.*?((?:Label=".*")?.*?>.*)</AbstractText>', r'\1', match)
                clean_match = re.sub(r'(?: Label="(.*?)")?.*?>(.*)', r'\1: \2', clean_match)
                cleaned_abstract_sections.append(clean_match)
                
            abstract = ''.join([f'{group}<br>' for group in cleaned_abstract_sections])
        else:
            abstract = re.sub(r'<AbstractText.*?>(.*?)</AbstractText>', r'\1', abstract_matches[0])  if abstract_matches else ''
            
        # Extract MeshHeadingList
        MeshHeadingList = re.search(r'<MeshHeadingList>(.*?)</MeshHeadingList>', record_string)
        MeshHeadingList = MeshHeadingList.group(1) if MeshHeadingList else ''
        return {
            'pubmed_title': article_title,
            'abstract': abstract,
            'journal': journal_title,
            'authors': formatted_authors,
            'year': publication_year,
            'month': publication_month,
            'pub_volume': journal_volume,
            'pub_issue': journal_issue,
            'start_page': start_page,
            'end_page': end_page,
            'doi': doi,
            'pmid': pmid,
            'mesh_headings': MeshHeadingList
        }
    
iteration = 2.3
query = '("resistance train*"[All Fields]) AND ((y_10[Filter]) AND (meta-analysis[Filter] OR review[Filter] OR systematicreview[Filter]))'
result_dict[iteration] = Pubmed_API()
ids_list = result_dict[iteration].search_article(query, retmax=2, ids_only=True)
df = result_dict[iteration].get_article_data_by_title()
df

2024-04-03 15:03:20,091 - Pubmed_API - INFO:
Search term: ("resistance train*"[All Fields]) AND ((y_10[Filter]) AND (meta-analysis[Filter] OR review[Filter] OR systematicreview[Filter]))

2024-04-03 15:03:21,031 - Pubmed_API - INFO:
Extracting these 2 PMIDs: ['38563729', '38563037']



{0: {'pubmed_title': 'Feasibility and Usefulness of Repetitions-In-Reserve Scales for Selecting Exercise Intensity: A Scoping Review.',
  'abstract': 'The intensity of resistance training (RT) exercise is an important consideration for determining relevant health and performance-related outcomes. Yet, current objective exercise intensity measures present concerns in terms of viability or cost. In response to these concerns, repetition-in-reserve (RIR) scales may represent an adequate method of measuring and regulating intensity. However, no recent review has focused on how RIR scales have been used for this purpose in prior research. We prepared the present scoping review to analyze the feasibility and usefulness of RIR scales in selecting RT intensity. We conducted a systematic search in PubMed, SPORTDiscus, PsycINFO, and ClinicalTrials.gov databases (last search date April 2023) for experimental and non-experimental studies that utilized an RIR scale to measure proximity to failure i

# Iteration 2.4

In [19]:
import sys
sys.path.append(r"/home/silvhua/custom_python")
import os
import pandas as pd
import string
import re
import requests
# from article_processing import create_text_dict_from_folder
# from orm_summarize import *
api_key = os.getenv('api_ncbi') # Pubmed API key

import sys
import os
import requests
from Custom_Logger import *

class Pubmed_API:
    def __init__(self, api_key=os.getenv('api_ncbi'), logger=None, logging_level=logging.INFO):
        self.api_key = api_key
        self.logger = create_function_logger('Pubmed_API', logger, level=logging_level)
        self.iteration = 0
        self.responses_dict = {}
        self.results_dict = {}
        self.PMIDs_dict = {}
        self.record_strings_dict = {}

    def search_article(self, query, query_tag=None, publication=None, reldate=None, retmax=None,
        systematic_only=False, review_only=False, additional_search_params=None, ids_only=False, 
        verbose=True
        ):
        base_url = f'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi'
        if self.api_key:
            base_url += f'&api_key={self.api_key}'
        response = {}
        results = pd.DataFrame()
        search_term = f'{re.sub(r"not", "", query)}'  # Remove 'not' since it will be treated as a boolean
        if query_tag:
            search_term += f'{query_tag}'
        if publication:
            search_term = f'AND {publication} [ta]'
        if systematic_only:
            search_term += ' AND systematic[sb]'
        elif review_only:
            search_term += ' AND (systematic[sb] OR review[pt])'
        params = {
            'db': 'pubmed',
            'term': search_term,
            'retmax': 5,
            'retmode': 'json',
            'datetype': 'edat',
        }
        if reldate:
            params['reldate'] = reldate
        if retmax:
            params['retmax'] = retmax
        if additional_search_params:
            params.update(additional_search_params)
        self.logger.info(f'Search term: {search_term}')
        messages = []
        try:
            self.iteration += 1
            response = requests.get(base_url, params=params)
            response_dict = response.json()
            id_list = response_dict['esearchresult']['idlist']
            messages.append(f'{len(id_list)} PMIDs found.')
            if verbose==True:
                messages.append(f'{id_list}')
            self.PMIDs_dict[self.iteration] = id_list
            self.responses_dict[self.iteration] = response_dict
            if ids_only==False:
                results = self.get_article_data_by_title()
            else:
                results = id_list
            self.logger.info('\n'.join(messages))
        except Exception as error:
            error_messages = []
            exc_type, exc_obj, tb = sys.exc_info()
            file = tb.tb_frame
            lineno = tb.tb_lineno
            filename = file.f_code.co_filename
            message = f'\tAn error occurred on line {lineno} in {filename}: {error}'
            error_messages.append(message)
            self.logger.error('\n'.join(error_messages))
        
        return results

    def get_article_data_by_title(self, iteration=None):
        result_df = pd.DataFrame()
        try:
            result_dict = {}
            iteration = self.iteration if iteration == None else iteration
            record_strings_list = self.batch_retrieve_citation(iteration)
            self.record_strings_dict[iteration] = record_strings_list
            for index, record_string in enumerate(record_strings_list):
                result_dict[index] = self.extract_pubmed_details(record_string)
            self.results_dict[iteration] = result_dict
            result_df = pd.DataFrame(result_dict).transpose()
        except Exception as error:
            error_messages = []
            error_messages.append(f'Response: \n{self.PMIDs_dict.get(iteration)}')
            exc_type, exc_obj, tb = sys.exc_info()
            file = tb.tb_frame
            lineno = tb.tb_lineno
            filename = file.f_code.co_filename
            message = f'\tAn error occurred on line {lineno} in {filename}: {error}'
            error_messages.append(message)
            self.logger.error('\n'.join(error_messages))
        return result_df

    def batch_retrieve_citation(self, iteration):
        result_list = []
        messages = []
        try:
            id_list = self.PMIDs_dict.get(iteration)
            if id_list:
                self.logger.info(f'Extracting these {len(id_list)} PMIDs: {id_list}')
                for index, id in enumerate(id_list):
                    result_list.append(self.retrieve_citation(id).decode('utf-8'))
                    current_index, current_id = index+1, id
            else:
                self.logger.warning(f'No results found.')
        except Exception as error:
            messages.append(f'Response: \n{self.responses_dict.get(iteration)}')
            exc_type, exc_obj, tb = sys.exc_info()
            file = tb.tb_frame
            lineno = tb.tb_lineno
            filename = file.f_code.co_filename
            messages.append(f'\tAn error occurred on line {lineno} in {filename}: {error}')
            messages.append(f'Article {current_index} [{current_id}] not found.')
        return result_list

    def retrieve_citation(self, article_id):
        base_url = f'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi'
        if self.api_key:
            base_url += f'&api_key={self.api_key}'
        params = {
            'db': 'pubmed',
            'id': article_id
        }
        response = requests.get(base_url, params=params)
        return response.content

    def extract_pubmed_details(self, record_string):
        """
        Helper function called by `pubmed_details_by_title` to parse article metadata from PubMed database.
        """
        authors = re.findall(r'<Author ValidYN="Y".*?><LastName>(.*?)</LastName><ForeName>(.*?)</ForeName>', record_string)
        formatted_authors = ', '.join(['{} {}'.format(author[1], author[0]) for author in authors])

        # Extract publication year
        publication_year = re.search(r'<PubDate><Year>(\d{4})</Year>', record_string)
        publication_year = publication_year.group(1) if publication_year else ''
        publication_month = re.search(r'<PubDate>.*?<Month>(Aug)</Month>.*?</PubDate>', record_string)
        publication_month = publication_month.group(1) if publication_month else ''

        # Extract article title
        article_title = re.search(r'<ArticleTitle>(.*?)</ArticleTitle>', record_string)
        article_title = article_title.group(1) if article_title else ''

        # Extract journal title
        journal_title = re.search(r'<Title>(.*?)</Title>', record_string)
        journal_title = journal_title.group(1) if journal_title else ''

        # Extract journal volume
        journal_volume = re.search(r'<Volume>(.*?)</Volume>', record_string)
        journal_volume = journal_volume.group(1) if journal_volume else ''

        # Extract journal issue
        journal_issue = re.search(r'<Issue>(.*?)</Issue>', record_string)
        journal_issue = journal_issue.group(1) if journal_issue else ''

        # Extract start page
        start_page = re.search(r'<StartPage>(.*?)</StartPage>', record_string)
        start_page = start_page.group(1) if start_page else ''

        # Extract end page
        end_page = re.search(r'<EndPage>(.*?)</EndPage>', record_string)
        end_page = end_page.group(1) if end_page else ''

        # Extract ELocationID
        doi = re.search(r'<ELocationID.*?EIdType="doi".*?>(.*?)</ELocationID>', record_string)
        doi = doi.group(1) if doi else ''

        # Extract PMID
        pmid = re.search(r'<PMID.*?>(.*?)</PMID>', record_string)
        pmid = pmid.group(1) if pmid else ''

        abstract_matches = re.findall(r'(<AbstractText.*?>.*?</AbstractText>)', record_string)
        self.logger.debug(f'Number of abstract sections: {len(abstract_matches)}')
        if len(abstract_matches) > 1:
            cleaned_abstract_sections = []
            for match in abstract_matches:
                clean_match = re.sub(r'<AbstractText.*?((?:Label=".*")?.*?>.*)</AbstractText>', r'\1', match)
                clean_match = re.sub(r'(?: Label="(.*?)")?.*?>(.*)', r'\1: \2', clean_match)
                cleaned_abstract_sections.append(clean_match)
                
            abstract = ''.join([f'{group}<br>' for group in cleaned_abstract_sections])
        else:
            abstract = re.sub(r'<AbstractText.*?>(.*?)</AbstractText>', r'\1', abstract_matches[0])  if abstract_matches else ''
            
        # Extract MeshHeadingList
        MeshHeadingList = re.search(r'<MeshHeadingList>(.*?)</MeshHeadingList>', record_string)
        MeshHeadingList = MeshHeadingList.group(1) if MeshHeadingList else ''
        return {
            'pubmed_title': article_title,
            'abstract': abstract,
            'journal': journal_title,
            'authors': formatted_authors,
            'year': publication_year,
            'month': publication_month,
            'pub_volume': journal_volume,
            'pub_issue': journal_issue,
            'start_page': start_page,
            'end_page': end_page,
            'doi': doi,
            'pmid': pmid,
            'mesh_headings': MeshHeadingList
        }
    
iteration = 2.4
query = '("resistance train*"[All Fields]) AND ((y_10[Filter]) AND (meta-analysis[Filter] OR review[Filter] OR systematicreview[Filter]))'
result_dict[iteration] = Pubmed_API()
ids_list = result_dict[iteration].search_article(query, retmax=2, ids_only=True)
df = result_dict[iteration].get_article_data_by_title()
df

2024-04-03 15:24:09,986 - Pubmed_API - INFO:
Search term: ("resistance train*"[All Fields]) AND ((y_10[Filter]) AND (meta-analysis[Filter] OR review[Filter] OR systematicreview[Filter]))



2024-04-03 15:24:10,634 - Pubmed_API - INFO:
2 PMIDs found.
['38563729', '38563037']

2024-04-03 15:24:10,640 - Pubmed_API - INFO:
Extracting these 2 PMIDs: ['38563729', '38563037']



Unnamed: 0,pubmed_title,abstract,journal,authors,year,month,pub_volume,pub_issue,start_page,end_page,doi,pmid,mesh_headings
0,Feasibility and Usefulness of Repetitions-In-R...,The intensity of resistance training (RT) exer...,Perceptual and motor skills,"Vasco Bastos, S&#xe9;rgio Machado, Diogo S Tei...",2024,,,,315125241241785,,10.1177/00315125241241785,38563729,
1,Differences in the Impact of Various Types of ...,"BACKGROUND: Irisin, a myokine that is responsi...",International journal of preventive medicine,"Atefe Torabi, Jalil Reisi, Mehdi Kargarfard, M...",2024,,15.0,,11,,10.4103/ijpvm.ijpvm_76_23,38563037,


In [22]:
result_dict[iteration].responses_dict

{1: {'header': {'type': 'esearch', 'version': '0.3'},
  'esearchresult': {'count': '2537',
   'retmax': '2',
   'retstart': '0',
   'idlist': ['38563729', '38563037'],
   'translationset': [{'from': 'y_10[Filter]', 'to': '"last 10 years"[dp]'},
    {'from': 'meta-analysis[Filter]', 'to': 'meta-analysis [PT]'},
    {'from': 'review[Filter]', 'to': 'review [PT]'}],
   'querytranslation': '"resistance train*"[All Fields] AND ("2014/04/03 00:00":"3000/01/01 05:00"[Date - Publication] AND ("meta analysis"[Publication Type] OR "review"[Publication Type] OR "systematic review"[Filter]))'}}}

In [24]:
iteration = 2.41
query = '("resistance train*"[All Fields]) AND ((y_10[Filter]) AND (meta-analysis[Filter] OR review[Filter] OR systematicreview[Filter]))'
result_dict[iteration] = Pubmed_API()
result_dict[iteration + 0.001] = result_dict[iteration].search_article(query, retmax=2, ids_only=False)
result_dict[iteration + 0.001]

2024-04-03 15:26:34,873 - Pubmed_API - INFO:
Search term: ("resistance train*"[All Fields]) AND ((y_10[Filter]) AND (meta-analysis[Filter] OR review[Filter] OR systematicreview[Filter]))

2024-04-03 15:26:35,714 - Pubmed_API - INFO:
Extracting these 2 PMIDs: ['38563729', '38563037']

2024-04-03 15:26:37,223 - Pubmed_API - INFO:
2 PMIDs found.
['38563729', '38563037']



Unnamed: 0,pubmed_title,abstract,journal,authors,year,month,pub_volume,pub_issue,start_page,end_page,doi,pmid,mesh_headings
0,Feasibility and Usefulness of Repetitions-In-R...,The intensity of resistance training (RT) exer...,Perceptual and motor skills,"Vasco Bastos, S&#xe9;rgio Machado, Diogo S Tei...",2024,,,,315125241241785,,10.1177/00315125241241785,38563729,
1,Differences in the Impact of Various Types of ...,"BACKGROUND: Irisin, a myokine that is responsi...",International journal of preventive medicine,"Atefe Torabi, Jalil Reisi, Mehdi Kargarfard, M...",2024,,15.0,,11,,10.4103/ijpvm.ijpvm_76_23,38563037,


## 2.42

In [29]:
iteration = 2.42
query = '("resistance train*"[All Fields]) AND ((y_10[Filter]) AND (meta-analysis[Filter] OR review[Filter] OR systematicreview[Filter]))'
result_dict[iteration] = Pubmed_API()
result_dict[iteration].search_article(query, retmax=2, ids_only=False)

2024-04-03 15:29:54,760 - Pubmed_API - INFO:
Search term: ("resistance train*"[All Fields]) AND ((y_10[Filter]) AND (meta-analysis[Filter] OR review[Filter] OR systematicreview[Filter]))



2024-04-03 15:29:55,529 - Pubmed_API - INFO:
Extracting these 2 PMIDs: ['38563729', '38563037']

2024-04-03 15:29:56,677 - Pubmed_API - INFO:
2 PMIDs found.
['38563729', '38563037']



Unnamed: 0,pubmed_title,abstract,journal,authors,year,month,pub_volume,pub_issue,start_page,end_page,doi,pmid,mesh_headings
0,Feasibility and Usefulness of Repetitions-In-R...,The intensity of resistance training (RT) exer...,Perceptual and motor skills,"Vasco Bastos, S&#xe9;rgio Machado, Diogo S Tei...",2024,,,,315125241241785,,10.1177/00315125241241785,38563729,
1,Differences in the Impact of Various Types of ...,"BACKGROUND: Irisin, a myokine that is responsi...",International journal of preventive medicine,"Atefe Torabi, Jalil Reisi, Mehdi Kargarfard, M...",2024,,15.0,,11,,10.4103/ijpvm.ijpvm_76_23,38563037,


In [31]:
result_dict[iteration].PMIDs_dict

{1: ['38563729', '38563037']}

In [32]:
query = '("resistance train*"[All Fields])'
result_dict[iteration].search_article(query, retmax=2, ids_only=False)

2024-04-03 15:30:44,040 - Pubmed_API - INFO:
Search term: ("resistance train*"[All Fields])

2024-04-03 15:30:44,592 - Pubmed_API - INFO:
Extracting these 2 PMIDs: ['38568258', '38565633']

2024-04-03 15:30:45,777 - Pubmed_API - INFO:
2 PMIDs found.
['38568258', '38565633']



Unnamed: 0,pubmed_title,abstract,journal,authors,year,month,pub_volume,pub_issue,start_page,end_page,doi,pmid,mesh_headings
0,Is there evidence for the asymmetrical transfe...,PURPOSE: The literature predominantly addresse...,European journal of applied physiology,"Vickie Wong, Jun Seob Song, Yujiro Yamada, Ryo...",2024,,,,,,10.1007/s00421-024-05472-9,38568258,
1,Effect of resistance training plus enriched pr...,This study aimed to determine the effects of r...,Scientific reports,"Majid Mohabbat, Hamid Arazi",2024,,14.0,1.0,7744.0,,10.1038/s41598-024-58462-4,38565633,


In [34]:
query = '("resistance train*"[All Fields]) AND ((y_1[Filter]) AND (meta-analysis[Filter] OR review[Filter] OR systematicreview[Filter]))'
result_dict[iteration].search_article(query, ids_only=False)

2024-04-03 15:31:41,653 - Pubmed_API - INFO:
Search term: ("resistance train*"[All Fields]) AND ((y_1[Filter]) AND (meta-analysis[Filter] OR review[Filter] OR systematicreview[Filter]))

2024-04-03 15:31:42,400 - Pubmed_API - INFO:
Extracting these 5 PMIDs: ['38563729', '38563037', '38561438', '38559546', '38549168']

2024-04-03 15:31:44,977 - Pubmed_API - INFO:
5 PMIDs found.
['38563729', '38563037', '38561438', '38559546', '38549168']



Unnamed: 0,pubmed_title,abstract,journal,authors,year,month,pub_volume,pub_issue,start_page,end_page,doi,pmid,mesh_headings
0,Feasibility and Usefulness of Repetitions-In-R...,The intensity of resistance training (RT) exer...,Perceptual and motor skills,"Vasco Bastos, S&#xe9;rgio Machado, Diogo S Tei...",2024,,,,315125241241785,,10.1177/00315125241241785,38563729,
1,Differences in the Impact of Various Types of ...,"BACKGROUND: Irisin, a myokine that is responsi...",International journal of preventive medicine,"Atefe Torabi, Jalil Reisi, Mehdi Kargarfard, M...",2024,,15.0,,11,,10.4103/ijpvm.ijpvm_76_23,38563037,
2,Differences in Biomechanical Determinants of A...,BACKGROUND: Change of direction (COD) movement...,Sports medicine - open,"Thomas A Donelon, Jamie Edwards, Mathew Brown,...",2024,,10.0,1.0,29,,10.1186/s40798-024-00701-z,38561438,
3,The Effectiveness and Optimal Dose of Resistan...,A subgroup of patients with low back pain (LBP...,Cureus,"Valerio Barbari, Maria M Carbone, Lorenzo Stor...",2024,,16.0,3.0,e57278,,10.7759/cureus.57278,38559546,
4,Sex differences in the physiological responses...,BACKGROUND: Heart disease is one of the leadin...,"BMC sports science, medicine &amp; rehabilitation","J Bouakkar, T J Pereira, H Johnston, M Pakosh,...",2024,,16.0,1.0,74,,10.1186/s13102-024-00867-9,38549168,


In [35]:
result_dict[iteration].PMIDs_dict

{1: ['38563729', '38563037'],
 2: ['38568258', '38565633'],
 3: ['38563729', '38563037', '38561438', '38559546', '38549168']}

In [36]:
result_dict[iteration].responses_dict

{1: {'header': {'type': 'esearch', 'version': '0.3'},
  'esearchresult': {'count': '2537',
   'retmax': '2',
   'retstart': '0',
   'idlist': ['38563729', '38563037'],
   'translationset': [{'from': 'y_10[Filter]', 'to': '"last 10 years"[dp]'},
    {'from': 'meta-analysis[Filter]', 'to': 'meta-analysis [PT]'},
    {'from': 'review[Filter]', 'to': 'review [PT]'}],
   'querytranslation': '"resistance train*"[All Fields] AND ("2014/04/03 00:00":"3000/01/01 05:00"[Date - Publication] AND ("meta analysis"[Publication Type] OR "review"[Publication Type] OR "systematic review"[Filter]))'}},
 2: {'header': {'type': 'esearch', 'version': '0.3'},
  'esearchresult': {'count': '19573',
   'retmax': '2',
   'retstart': '0',
   'idlist': ['38568258', '38565633'],
   'translationset': [],
   'querytranslation': '"resistance train*"[All Fields]'}},
 3: {'header': {'type': 'esearch', 'version': '0.3'},
  'esearchresult': {'count': '369',
   'retmax': '5',
   'retstart': '0',
   'idlist': ['38563729', '

In [37]:
result_dict[iteration].results_dict

{1: {0: {'pubmed_title': 'Feasibility and Usefulness of Repetitions-In-Reserve Scales for Selecting Exercise Intensity: A Scoping Review.',
   'abstract': 'The intensity of resistance training (RT) exercise is an important consideration for determining relevant health and performance-related outcomes. Yet, current objective exercise intensity measures present concerns in terms of viability or cost. In response to these concerns, repetition-in-reserve (RIR) scales may represent an adequate method of measuring and regulating intensity. However, no recent review has focused on how RIR scales have been used for this purpose in prior research. We prepared the present scoping review to analyze the feasibility and usefulness of RIR scales in selecting RT intensity. We conducted a systematic search in PubMed, SPORTDiscus, PsycINFO, and ClinicalTrials.gov databases (last search date April 2023) for experimental and non-experimental studies that utilized an RIR scale to measure proximity to fail

In [65]:
result_dict[iteration].responses_dict[1]

{'header': {'type': 'esearch', 'version': '0.3'},
 'esearchresult': {'count': '2537',
  'retmax': '2',
  'retstart': '0',
  'idlist': ['38563729', '38563037'],
  'translationset': [{'from': 'y_10[Filter]', 'to': '"last 10 years"[dp]'},
   {'from': 'meta-analysis[Filter]', 'to': 'meta-analysis [PT]'},
   {'from': 'review[Filter]', 'to': 'review [PT]'}],
  'querytranslation': '"resistance train*"[All Fields] AND ("2014/04/03 00:00":"3000/01/01 05:00"[Date - Publication] AND ("meta analysis"[Publication Type] OR "review"[Publication Type] OR "systematic review"[Filter]))'}}

In [40]:
result_dict[iteration].record_strings_dict[1][0]

'<?xml version="1.0" ?>\n<!DOCTYPE PubmedArticleSet PUBLIC "-//NLM//DTD PubMedArticle, 1st January 2024//EN" "https://dtd.nlm.nih.gov/ncbi/pubmed/out/pubmed_240101.dtd">\n<PubmedArticleSet>\n<PubmedArticle><MedlineCitation Status="Publisher" Owner="NLM" IndexingMethod="Automated"><PMID Version="1">38563729</PMID><DateRevised><Year>2024</Year><Month>04</Month><Day>02</Day></DateRevised><Article PubModel="Print-Electronic"><Journal><ISSN IssnType="Electronic">1558-688X</ISSN><JournalIssue CitedMedium="Internet"><PubDate><Year>2024</Year><Month>Apr</Month><Day>02</Day></PubDate></JournalIssue><Title>Perceptual and motor skills</Title><ISOAbbreviation>Percept Mot Skills</ISOAbbreviation></Journal><ArticleTitle>Feasibility and Usefulness of Repetitions-In-Reserve Scales for Selecting Exercise Intensity: A Scoping Review.</ArticleTitle><Pagination><StartPage>315125241241785</StartPage><MedlinePgn>315125241241785</MedlinePgn></Pagination><ELocationID EIdType="doi" ValidYN="Y">10.1177/00315125

In [88]:
def extract_pubmed_details(record_string):
    """
    Helper function called by `pubmed_details_by_title` to parse article metadata from PubMed database.
    """
    authors = re.findall(r'<Author ValidYN="Y".*?><LastName>(.*?)</LastName><ForeName>(.*?)</ForeName>', record_string)
    formatted_authors = ', '.join(['{} {}'.format(author[1], author[0]) for author in authors])

    # Extract publication year
    publication_year = re.search(r'<PubDate><Year>(\d{4})</Year>', record_string)
    publication_year = publication_year.group(1) if publication_year else ''
    publication_month = re.search(r'<PubDate>.*?<Month>(Aug)</Month>.*?</PubDate>', record_string)
    publication_month = publication_month.group(1) if publication_month else ''

    # Extract article title
    article_title = re.search(r'<ArticleTitle>(.*?)</ArticleTitle>', record_string)
    article_title = article_title.group(1) if article_title else ''

    # Extract journal title
    journal_title = re.search(r'<Title>(.*?)</Title>', record_string)
    journal_title = journal_title.group(1) if journal_title else ''

    # Extract journal volume
    journal_volume = re.search(r'<Volume>(.*?)</Volume>', record_string)
    journal_volume = journal_volume.group(1) if journal_volume else ''

    # Extract journal issue
    journal_issue = re.search(r'<Issue>(.*?)</Issue>', record_string)
    journal_issue = journal_issue.group(1) if journal_issue else ''

    # Extract start page
    start_page = re.search(r'<StartPage>(.*?)</StartPage>', record_string)
    start_page = start_page.group(1) if start_page else ''

    # Extract end page
    end_page = re.search(r'<EndPage>(.*?)</EndPage>', record_string)
    end_page = end_page.group(1) if end_page else ''

    # Extract ELocationID
    doi = re.search(r'<ELocationID.*?EIdType="doi".*?>(.*?)</ELocationID>', record_string)
    doi = doi.group(1) if doi else ''

    # Extract PMID
    pmid = re.search(r'<PMID.*?>(.*?)</PMID>', record_string)
    pmid = pmid.group(1) if pmid else ''

    abstract_matches = re.findall(r'(<AbstractText.*?>.*?</AbstractText>)', record_string)
    # self.logger.debug(f'Number of abstract sections: {len(abstract_matches)}')
    if len(abstract_matches) > 1:
        cleaned_abstract_sections = []
        for match in abstract_matches:
            clean_match = re.sub(r'<AbstractText.*?((?:Label=".*")?.*?>.*)</AbstractText>', r'\1', match)
            clean_match = re.sub(r'(?: Label="(.*?)")?.*?>(.*)', r'\1: \2', clean_match)
            cleaned_abstract_sections.append(clean_match)
            
        abstract = ''.join([f'{group}<br>' for group in cleaned_abstract_sections])
    else:
        abstract = re.sub(r'<AbstractText.*?>(.*?)</AbstractText>', r'\1', abstract_matches[0])  if abstract_matches else ''
        
    # Extract MeshHeadingList
    MeshHeadingList = re.search(r'<MeshHeadingList>(.*?)</MeshHeadingList>', record_string)
    MeshHeadingList = MeshHeadingList.group(1) if MeshHeadingList else ''

    # Extract MeshHeading text
    MeshHeadings = re.findall(
        r'<MeshHeading><DescriptorName.?(?:UI=".*?")>(.*?)</DescriptorName></MeshHeading>', MeshHeadingList
        )
    # Estract Mesh QualifierName
    MeshQualifiers = re.findall(
        r'<QualifierName.?(?:UI=".*?")>(.*?)</QualifierName>', MeshHeadingList
        )
    mesh_headings = []
    pattern = r'<MeshHeading><DescriptorName.*?>(.*?)</DescriptorName>(<QualifierName.*?>.*?</QualifierName>)?</MeshHeading>'
    # pattern = r'<MeshHeading><DescriptorName.?(?:UI=".*?")>(.*?)</DescriptorName>(?:<QualifierName.*?>(.*?)</QualifierName>)?</MeshHeading>'
    matches = re.findall(pattern, MeshHeadingList)
    print(f'mesh headings: {matches}')
    for match in matches:
        heading = match[0]
        if match[1]:
            # heading += f" / {match[1]}"
            
            # Estract Mesh QualifierName
            MeshQualifiers = re.findall(
                r'<QualifierName.?(?:UI=".*?")>(.*?)</QualifierName>', match[1]
                )
            print(f'mesh qualifiers: {MeshQualifiers}')
            for qualifier in MeshQualifiers:
                heading = f"{match[0]} / {qualifier}"
                mesh_headings.append(heading)
        else:
            mesh_headings.append(heading)

    # Extract keyword
    Keyword_List = re.search(r'<KeywordList.*?>(.*?)</KeywordList>', record_string)
    Keyword_List = Keyword_List.group(1) if Keyword_List else ''
    Keywords = re.findall(
        r'<Keyword.*?>(.*?)</Keyword>', Keyword_List
        )
    # Extract MajorTopic text
    MajorTopics = re.findall(
        r'<[^>]*MajorTopicYN="Y"[^>]*>([^<]+)<\/[^>]+>', record_string
        )
    # Extract Publication Type
    PublicationTypeList = re.search(r'<PublicationTypeList.*?>(.*?)</PublicationTypeList>', record_string)
    PublicationTypeList = PublicationTypeList.group(1) if PublicationTypeList else ''
    PublicationType = re.findall(
        r'<PublicationType.*?>(.*?)</PublicationType>', PublicationTypeList
        )
    return {
        'pubmed_title': article_title,
        'abstract': abstract,
        'journal': journal_title,
        'authors': formatted_authors,
        'year': publication_year,
        'month': publication_month,
        'pub_volume': journal_volume,
        'pub_issue': journal_issue,
        'start_page': start_page,
        'end_page': end_page,
        'doi': doi,
        'pmid': pmid,
        'mesh_headings': mesh_headings,
        'keywords': Keywords,
        'major_topics': MajorTopics,
        'publication_type': PublicationType
    }
record_string = result_dict[iteration].record_strings_dict[1][0]
record_string = '<?xml version="1.0" ?>\n<!DOCTYPE PubmedArticleSet PUBLIC "-//NLM//DTD PubMedArticle, 1st January 2024//EN" "https://dtd.nlm.nih.gov/ncbi/pubmed/out/pubmed_240101.dtd">\n<PubmedArticleSet>\n<PubmedArticle><MedlineCitation Status="MEDLINE" Owner="NLM" IndexingMethod="Automated"><PMID Version="1">38555895</PMID><DateCompleted><Year>2024</Year><Month>04</Month><Day>02</Day></DateCompleted><DateRevised><Year>2024</Year><Month>04</Month><Day>02</Day></DateRevised><Article PubModel="Print"><Journal><ISSN IssnType="Print">0300-5283</ISSN><JournalIssue CitedMedium="Internet"><Volume>79</Volume><Issue>Suppl 1</Issue><PubDate><Year>2024</Year><Month>Mar</Month></PubDate></JournalIssue><Title>The Medical journal of Malaysia</Title><ISOAbbreviation>Med J Malaysia</ISOAbbreviation></Journal><ArticleTitle>Factors associated with different types of hip fractures among elderly patients a tertiary hospital in Pahang: A retrospective cross-sectional study.</ArticleTitle><Pagination><StartPage>117</StartPage><EndPage>121</EndPage><MedlinePgn>117-121</MedlinePgn></Pagination><Abstract><AbstractText Label="INTRODUCTION" NlmCategory="BACKGROUND">Hip fractures, predominantly due to decreased bone density and falls, significantly impact elderly health, disproportionately affecting women and placing a strain on healthcare resources. This study aims to conduct an indepth epidemiological analysis of hip fracture incidence among the elderly in Pahang, Malaysia, to inform better healthcare strategies.</AbstractText><AbstractText Label="MATERIALS AND METHODS" NlmCategory="METHODS">In this retrospective study, medical records of patients admitted with hip fractures between 2019 and 2021 at Hospital Sultan Haji Ahmad Shah (HoSHAS) in Pahang were analyzed. Data on sociodemographic characteristics, nature of trauma, fracture types, and comorbidities were collected and examined using descriptive and inferential statistics.</AbstractText><AbstractText Label="RESULTS" NlmCategory="RESULTS">Among 3856 Orthopaedic Department admissions at HoSHAS (2019-2021), 296 hip fracture cases were identified, predominantly in women (71.3%), Malay ethnicity (75.3%), and aged 71-80 (38.5%). Intertrochanteric femur fractures were prevalent (62.8%). Unintentional falls accounted for 94.9% of cases. Logistic regression showed age and gender as significant predictors of femoral neck fractures. Specifically, Chinese seniors were 1.96 times more likely, and women over 65 were 1.95 times more likely to suffer these fractures. Notably, the absence of comorbidities increased the risk by 3.41 times (p &lt; 0.05).</AbstractText><AbstractText Label="CONCLUSION" NlmCategory="CONCLUSIONS">With increased longevity among Malaysian citizen, the number of hip fracture cases are growing and leading to other health-related problems such as disability, depression, and cardiovascular. Various preventive interventions for osteoporosis and falls should be implemented to reduce the incidence of hip fractures among older adults.</AbstractText></Abstract><AuthorList CompleteYN="Y"><Author ValidYN="Y"><LastName>Mohd Yusoff</LastName><ForeName>R</ForeName><Initials>R</Initials><AffiliationInfo><Affiliation>Hospital Pengajar Universiti Sultan Zainal Abidin Kampus Gong Badak, Kuala Terengganu, Terengganu, Malaysia.</Affiliation></AffiliationInfo></Author><Author ValidYN="Y"><LastName>Mulud</LastName><ForeName>Z A</ForeName><Initials>ZA</Initials><AffiliationInfo><Affiliation>Universiti Teknologi MARA, Faculty of Health Sciences, Centre for Nursing Studies, Puncak Alam Campus, Selangor, Malaysia. zamzaliza@uitm.edu.my.</Affiliation></AffiliationInfo></Author><Author ValidYN="Y"><LastName>Mohammadnezhad</LastName><ForeName>M</ForeName><Initials>M</Initials><AffiliationInfo><Affiliation>University of Bradford Richmond Rd, School of Nursing and Healthcare Leadership, Bradford, UK.</Affiliation></AffiliationInfo></Author></AuthorList><Language>eng</Language><PublicationTypeList><PublicationType UI="D016428">Journal Article</PublicationType></PublicationTypeList></Article><MedlineJournalInfo><Country>Malaysia</Country><MedlineTA>Med J Malaysia</MedlineTA><NlmUniqueID>0361547</NlmUniqueID><ISSNLinking>0300-5283</ISSNLinking></MedlineJournalInfo><CitationSubset>IM</CitationSubset><MeshHeadingList><MeshHeading><DescriptorName UI="D000368" MajorTopicYN="N">Aged</DescriptorName></MeshHeading><MeshHeading><DescriptorName UI="D006801" MajorTopicYN="N">Humans</DescriptorName></MeshHeading><MeshHeading><DescriptorName UI="D005260" MajorTopicYN="N">Female</DescriptorName></MeshHeading><MeshHeading><DescriptorName UI="D012189" MajorTopicYN="N">Retrospective Studies</DescriptorName></MeshHeading><MeshHeading><DescriptorName UI="D003430" MajorTopicYN="N">Cross-Sectional Studies</DescriptorName></MeshHeading><MeshHeading><DescriptorName UI="D062606" MajorTopicYN="N">Tertiary Care Centers</DescriptorName></MeshHeading><MeshHeading><DescriptorName UI="D012307" MajorTopicYN="N">Risk Factors</DescriptorName></MeshHeading><MeshHeading><DescriptorName UI="D006620" MajorTopicYN="Y">Hip Fractures</DescriptorName><QualifierName UI="Q000453" MajorTopicYN="N">epidemiology</QualifierName><QualifierName UI="Q000150" MajorTopicYN="N">complications</QualifierName></MeshHeading></MeshHeadingList></MedlineCitation><PubmedData><History><PubMedPubDate PubStatus="medline"><Year>2024</Year><Month>4</Month><Day>2</Day><Hour>6</Hour><Minute>46</Minute></PubMedPubDate><PubMedPubDate PubStatus="pubmed"><Year>2024</Year><Month>4</Month><Day>1</Day><Hour>0</Hour><Minute>42</Minute></PubMedPubDate><PubMedPubDate PubStatus="entrez"><Year>2024</Year><Month>3</Month><Day>31</Day><Hour>19</Hour><Minute>42</Minute></PubMedPubDate></History><PublicationStatus>ppublish</PublicationStatus><ArticleIdList><ArticleId IdType="pubmed">38555895</ArticleId></ArticleIdList></PubmedData></PubmedArticle></PubmedArticleSet>'
# record_string = '<?xml version="1.0" ?>\n<!DOCTYPE PubmedArticleSet PUBLIC "-//NLM//DTD PubMedArticle, 1st January 2024//EN" "https://dtd.nlm.nih.gov/ncbi/pubmed/out/pubmed_240101.dtd">\n<PubmedArticleSet>\n<PubmedArticle><MedlineCitation Status="Publisher" Owner="NLM" IndexingMethod="Automated"><PMID Version="1">38558168</PMID><DateRevised><Year>2024</Year><Month>04</Month><Day>01</Day></DateRevised><Article PubModel="Print-Electronic"><Journal><ISSN IssnType="Electronic">1945-7197</ISSN><JournalIssue CitedMedium="Internet"><PubDate><Year>2024</Year><Month>Apr</Month><Day>01</Day></PubDate></JournalIssue><Title>The Journal of clinical endocrinology and metabolism</Title><ISOAbbreviation>J Clin Endocrinol Metab</ISOAbbreviation></Journal><ArticleTitle>Breast adiposity: Menopausal status impact and its influence on glycemic and anthropometric metabolic parameters.</ArticleTitle><ELocationID EIdType="pii" ValidYN="Y">dgae205</ELocationID><ELocationID EIdType="doi" ValidYN="Y">10.1210/clinem/dgae205</ELocationID><Abstract><AbstractText Label="CONTEXT" NlmCategory="BACKGROUND">Ectopic fat depots are related to the deregulation of energy homeostasis, leading to diseases related to obesity and metabolic syndrome (MS). Despite significant changes in body composition over women\'s lifespan, little is known about the role of breast adipose tissue (BrAT) and its possible utilization as an ectopic fat depot in women of different menopausal statuses.</AbstractText><AbstractText Label="OBJECTIVE" NlmCategory="OBJECTIVE">We aimed to assess the relationship between BrAT and metabolic glycemic and lipid profiles and body composition parameters in adult women.</AbstractText><AbstractText Label="METHODS" NlmCategory="METHODS">In this cross-sectional study, we enrolled adult women undergoing routine mammograms to perform history and physical examination, body composition assessment, semi-automated assessment of breast adiposity (BA) from mammograms and fasting blood collection for biochemical analysis. Correlations and multivariate regression analysis were used to examine the associations of the BA with metabolic and body composition parameters.</AbstractText><AbstractText Label="RESULTS" NlmCategory="RESULTS">Of the 101 participants included in the final analysis, 76.2% were in menopause, and 23.8% were in premenopause. The BA was positively related with fasting plasma glucose, glycated hemoglobin, homeostasis model assessment of insulin resistance, body mass index, waist circumference, body fat percentage, abdominal visceral and subcutaneous fat when adjusted for age among women in postmenopause. Also, the BA was an independent predictor of hyperglycemia and metabolic syndrome. These associations were not present among women in premenopause.</AbstractText><AbstractText Label="CONCLUSION" NlmCategory="CONCLUSIONS">The BA was related to different adverse body composition and metabolic factors in women in postmenopause. The results suggest that there might be a relevant BrAT endocrine role during menopause, whose mechanisms are yet to be clarified, which thus opens up research perspectives on the subject as well as on clinical settings.</AbstractText><CopyrightInformation>&#xa9; The Author(s) 2024. Published by Oxford University Press on behalf of the Endocrine Society. All rights reserved. For commercial re-use, please contact reprints@oup.com for reprints and translation rights for reprints. All other permissions can be obtained through our RightsLink service via the Permissions link on the article page on our site&#x2014;for further information please contact journals.permissions@oup.com.</CopyrightInformation></Abstract><AuthorList CompleteYN="Y"><Author ValidYN="Y"><LastName>Limberger Nedel</LastName><ForeName>Barbara</ForeName><Initials>B</Initials><Identifier Source="ORCID">0000-0001-6776-4564</Identifier><AffiliationInfo><Affiliation>Graduate Program in Medical Sciences: Endocrinology, Department of Internal Medicine, Faculty of Medicine, Universidade Federal do Rio Grande do Sul (UFRGS), Porto Alegre, RS, Brazil.</Affiliation></AffiliationInfo></Author><Author ValidYN="Y"><LastName>Garcia Madure</LastName><ForeName>Michelle</ForeName><Initials>M</Initials><AffiliationInfo><Affiliation>Faculty of Nutrition and Food Sciences, Universidade Federal do Rio Grande do Sul (UFRGS), Porto Alegre, Brazil.</Affiliation></AffiliationInfo></Author><Author ValidYN="Y"><LastName>Guaresi</LastName><ForeName>Silvia</ForeName><Initials>S</Initials><AffiliationInfo><Affiliation>Faculty of Medicine, Universidade Federal do Rio Grande do Sul (UFRGS), Porto Alegre, Brazil.</Affiliation></AffiliationInfo></Author><Author ValidYN="Y"><LastName>Soares Machado</LastName><ForeName>Maria Elisa</ForeName><Initials>ME</Initials><AffiliationInfo><Affiliation>Faculty of Medicine, Universidade Federal do Rio Grande do Sul (UFRGS), Porto Alegre, Brazil.</Affiliation></AffiliationInfo></Author><Author ValidYN="Y"><LastName>Madrid de Bittencourt</LastName><ForeName>Marcelo</ForeName><Initials>M</Initials><AffiliationInfo><Affiliation>Faculty of Medicine, Universidade Federal do Rio Grande do Sul (UFRGS), Porto Alegre, Brazil.</Affiliation></AffiliationInfo></Author><Author ValidYN="Y"><LastName>Nobrega Chagas</LastName><ForeName>Nathalia</ForeName><Initials>N</Initials><AffiliationInfo><Affiliation>Faculty of Medicine, Universidade Federal do Rio Grande do Sul (UFRGS), Porto Alegre, Brazil.</Affiliation></AffiliationInfo></Author><Author ValidYN="Y"><LastName>Gerchman</LastName><ForeName>Fernando</ForeName><Initials>F</Initials><Identifier Source="ORCID">0000-0001-5873-9498</Identifier><AffiliationInfo><Affiliation>Graduate Program in Medical Sciences: Endocrinology, Department of Internal Medicine, Faculty of Medicine, Universidade Federal do Rio Grande do Sul (UFRGS), Porto Alegre, RS, Brazil.</Affiliation></AffiliationInfo><AffiliationInfo><Affiliation>Division of Endocrinology and Metabolism, Hospital de Cl&#xed;nicas de Porto Alegre (HCPA), Porto Alegre, RS, Brazil.</Affiliation></AffiliationInfo></Author></AuthorList><Language>eng</Language><PublicationTypeList><PublicationType UI="D016428">Journal Article</PublicationType></PublicationTypeList><ArticleDate DateType="Electronic"><Year>2024</Year><Month>04</Month><Day>01</Day></ArticleDate></Article><MedlineJournalInfo><Country>United States</Country><MedlineTA>J Clin Endocrinol Metab</MedlineTA><NlmUniqueID>0375362</NlmUniqueID><ISSNLinking>0021-972X</ISSNLinking></MedlineJournalInfo><CitationSubset>IM</CitationSubset><KeywordList Owner="NOTNLM"><Keyword MajorTopicYN="N">breast adipose tissue</Keyword><Keyword MajorTopicYN="N">breast adiposity</Keyword><Keyword MajorTopicYN="N">ectopic fat</Keyword><Keyword MajorTopicYN="N">mammograms</Keyword><Keyword MajorTopicYN="N">menopausal status</Keyword><Keyword MajorTopicYN="N">metabolic homeostasis</Keyword></KeywordList></MedlineCitation><PubmedData><History><PubMedPubDate PubStatus="received"><Year>2023</Year><Month>10</Month><Day>19</Day></PubMedPubDate><PubMedPubDate PubStatus="revised"><Year>2024</Year><Month>3</Month><Day>20</Day></PubMedPubDate><PubMedPubDate PubStatus="accepted"><Year>2024</Year><Month>3</Month><Day>26</Day></PubMedPubDate><PubMedPubDate PubStatus="medline"><Year>2024</Year><Month>4</Month><Day>1</Day><Hour>18</Hour><Minute>42</Minute></PubMedPubDate><PubMedPubDate PubStatus="pubmed"><Year>2024</Year><Month>4</Month><Day>1</Day><Hour>18</Hour><Minute>42</Minute></PubMedPubDate><PubMedPubDate PubStatus="entrez"><Year>2024</Year><Month>4</Month><Day>1</Day><Hour>16</Hour><Minute>26</Minute></PubMedPubDate></History><PublicationStatus>aheadofprint</PublicationStatus><ArticleIdList><ArticleId IdType="pubmed">38558168</ArticleId><ArticleId IdType="doi">10.1210/clinem/dgae205</ArticleId><ArticleId IdType="pii">7638462</ArticleId></ArticleIdList></PubmedData></PubmedArticle></PubmedArticleSet>'
extract_pubmed_details(record_string)

mesh headings: [('Aged', ''), ('Humans', ''), ('Female', ''), ('Retrospective Studies', ''), ('Cross-Sectional Studies', ''), ('Tertiary Care Centers', ''), ('Risk Factors', ''), ('Hip Fractures', '<QualifierName UI="Q000453" MajorTopicYN="N">epidemiology</QualifierName><QualifierName UI="Q000150" MajorTopicYN="N">complications</QualifierName>')]
mesh qualifiers: ['epidemiology', 'complications']


{'pubmed_title': 'Factors associated with different types of hip fractures among elderly patients a tertiary hospital in Pahang: A retrospective cross-sectional study.',
 'abstract': 'INTRODUCTION: Hip fractures, predominantly due to decreased bone density and falls, significantly impact elderly health, disproportionately affecting women and placing a strain on healthcare resources. This study aims to conduct an indepth epidemiological analysis of hip fracture incidence among the elderly in Pahang, Malaysia, to inform better healthcare strategies.<br>MATERIALS AND METHODS: In this retrospective study, medical records of patients admitted with hip fractures between 2019 and 2021 at Hospital Sultan Haji Ahmad Shah (HoSHAS) in Pahang were analyzed. Data on sociodemographic characteristics, nature of trauma, fracture types, and comorbidities were collected and examined using descriptive and inferential statistics.<br>RESULTS: Among 3856 Orthopaedic Department admissions at HoSHAS (2019-202

# Iteration 3
* Extract mesh headings and major topics
* Updating logging messages

In [89]:
import sys
sys.path.append(r"/home/silvhua/custom_python")
import os
import pandas as pd
import string
import re
import requests
# from article_processing import create_text_dict_from_folder
# from orm_summarize import *
api_key = os.getenv('api_ncbi') # Pubmed API key

import sys
import os
import requests
from Custom_Logger import *

class Pubmed_API:
    def __init__(self, api_key=os.getenv('api_ncbi'), logger=None, logging_level=logging.INFO):
        self.api_key = api_key
        self.logger = create_function_logger('Pubmed_API', logger, level=logging_level)
        self.iteration = 0
        self.responses_dict = {}
        self.results_dict = {}
        self.PMIDs_dict = {}
        self.record_strings_dict = {}

    def search_article(self, query, query_tag=None, publication=None, reldate=None, retmax=None,
        systematic_only=False, review_only=False, additional_search_params=None, ids_only=False, 
        verbose=True
        ):
        base_url = f'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi'
        if self.api_key:
            base_url += f'&api_key={self.api_key}'
        response = {}
        results = pd.DataFrame()
        search_term = f'{re.sub(r"not", "", query)}'  # Remove 'not' since it will be treated as a boolean
        if query_tag:
            search_term += f'{query_tag}'
        if publication:
            search_term = f'AND {publication} [ta]'
        if systematic_only:
            search_term += ' AND systematic[sb]'
        elif review_only:
            search_term += ' AND (systematic[sb] OR review[pt])'
        params = {
            'db': 'pubmed',
            'term': search_term,
            'retmax': 5,
            'retmode': 'json',
            'datetype': 'edat',
        }
        if reldate:
            params['reldate'] = reldate
        if retmax:
            params['retmax'] = retmax
        if additional_search_params:
            params.update(additional_search_params)
        self.logger.info(f'Search term: {search_term}')
        messages = []
        try:
            self.iteration += 1
            response = requests.get(base_url, params=params)
            response_dict = response.json()
            id_list = response_dict['esearchresult']['idlist']
            messages.append(f'{len(id_list)} PMIDs found.')
            if verbose==True:
                messages.append(f'{id_list}')
            self.PMIDs_dict[self.iteration] = id_list
            self.responses_dict[self.iteration] = response_dict
            if ids_only==False:
                results = self.get_article_data_by_title()
            else:
                results = id_list
            self.logger.info('\n'.join(messages))
        except Exception as error:
            error_messages = []
            exc_type, exc_obj, tb = sys.exc_info()
            file = tb.tb_frame
            lineno = tb.tb_lineno
            filename = file.f_code.co_filename
            message = f'\tAn error occurred on line {lineno} in {filename}: {error}'
            error_messages.append(message)
            self.logger.error('\n'.join(error_messages))
        
        return results

    def get_article_data_by_title(self, iteration=None):
        result_df = pd.DataFrame()
        try:
            result_dict = {}
            iteration = self.iteration if iteration == None else iteration
            record_strings_list = self.batch_retrieve_citation(iteration)
            self.record_strings_dict[iteration] = record_strings_list
            for index, record_string in enumerate(record_strings_list):
                result_dict[index] = self.extract_pubmed_details(record_string)
            self.results_dict[iteration] = result_dict
            result_df = pd.DataFrame(result_dict).transpose()
        except Exception as error:
            error_messages = []
            error_messages.append(f'Response: \n{self.PMIDs_dict.get(iteration)}')
            exc_type, exc_obj, tb = sys.exc_info()
            file = tb.tb_frame
            lineno = tb.tb_lineno
            filename = file.f_code.co_filename
            message = f'\tAn error occurred on line {lineno} in {filename}: {error}'
            error_messages.append(message)
            self.logger.error('\n'.join(error_messages))
        return result_df

    def batch_retrieve_citation(self, iteration):
        result_list = []
        messages = []
        try:
            id_list = self.PMIDs_dict.get(iteration)
            if id_list:
                self.logger.info(f'Extracting these {len(id_list)} PMIDs: {id_list}')
                for index, id in enumerate(id_list):
                    result_list.append(self.retrieve_citation(id).decode('utf-8'))
                    current_index, current_id = index+1, id
            else:
                self.logger.warning(f'No results found.')
        except Exception as error:
            messages.append(f'Response: \n{self.responses_dict.get(iteration)}')
            exc_type, exc_obj, tb = sys.exc_info()
            file = tb.tb_frame
            lineno = tb.tb_lineno
            filename = file.f_code.co_filename
            messages.append(f'\tAn error occurred on line {lineno} in {filename}: {error}')
            messages.append(f'Article {current_index} [{current_id}] not found.')
        return result_list

    def retrieve_citation(self, article_id):
        base_url = f'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi'
        if self.api_key:
            base_url += f'&api_key={self.api_key}'
        params = {
            'db': 'pubmed',
            'id': article_id
        }
        response = requests.get(base_url, params=params)
        return response.content

    def extract_pubmed_details(self, record_string):
        """
        Helper function called by `pubmed_details_by_title` to parse article metadata from PubMed database.
        """
        authors = re.findall(r'<Author ValidYN="Y".*?><LastName>(.*?)</LastName><ForeName>(.*?)</ForeName>', record_string)
        formatted_authors = ', '.join(['{} {}'.format(author[1], author[0]) for author in authors])

        # Extract publication year
        publication_year = re.search(r'<PubDate><Year>(\d{4})</Year>', record_string)
        publication_year = publication_year.group(1) if publication_year else ''
        publication_month = re.search(r'<PubDate>.*?<Month>(Aug)</Month>.*?</PubDate>', record_string)
        publication_month = publication_month.group(1) if publication_month else ''

        # Extract article title
        article_title = re.search(r'<ArticleTitle>(.*?)</ArticleTitle>', record_string)
        article_title = article_title.group(1) if article_title else ''

        # Extract journal title
        journal_title = re.search(r'<Title>(.*?)</Title>', record_string)
        journal_title = journal_title.group(1) if journal_title else ''

        # Extract journal volume
        journal_volume = re.search(r'<Volume>(.*?)</Volume>', record_string)
        journal_volume = journal_volume.group(1) if journal_volume else ''

        # Extract journal issue
        journal_issue = re.search(r'<Issue>(.*?)</Issue>', record_string)
        journal_issue = journal_issue.group(1) if journal_issue else ''

        # Extract start page
        start_page = re.search(r'<StartPage>(.*?)</StartPage>', record_string)
        start_page = start_page.group(1) if start_page else ''

        # Extract end page
        end_page = re.search(r'<EndPage>(.*?)</EndPage>', record_string)
        end_page = end_page.group(1) if end_page else ''

        # Extract ELocationID
        doi = re.search(r'<ELocationID.*?EIdType="doi".*?>(.*?)</ELocationID>', record_string)
        doi = doi.group(1) if doi else ''

        # Extract PMID
        pmid = re.search(r'<PMID.*?>(.*?)</PMID>', record_string)
        pmid = pmid.group(1) if pmid else ''

        abstract_matches = re.findall(r'(<AbstractText.*?>.*?</AbstractText>)', record_string)
        # self.logger.debug(f'Number of abstract sections: {len(abstract_matches)}')
        if len(abstract_matches) > 1:
            cleaned_abstract_sections = []
            for match in abstract_matches:
                clean_match = re.sub(r'<AbstractText.*?((?:Label=".*")?.*?>.*)</AbstractText>', r'\1', match)
                clean_match = re.sub(r'(?: Label="(.*?)")?.*?>(.*)', r'\1: \2', clean_match)
                cleaned_abstract_sections.append(clean_match)
                
            abstract = ''.join([f'{group}<br>' for group in cleaned_abstract_sections])
        else:
            abstract = re.sub(r'<AbstractText.*?>(.*?)</AbstractText>', r'\1', abstract_matches[0])  if abstract_matches else ''
            
        # Extract MeshHeadingList
        MeshHeadingList = re.search(r'<MeshHeadingList>(.*?)</MeshHeadingList>', record_string)
        MeshHeadingList = MeshHeadingList.group(1) if MeshHeadingList else ''

        # Estract MeshHeading text and any QualifierName
        MeshQualifiers = re.findall(
            r'<QualifierName.?(?:UI=".*?")>(.*?)</QualifierName>', MeshHeadingList
            )
        mesh_headings = []
        pattern = r'<MeshHeading><DescriptorName.*?>(.*?)</DescriptorName>(<QualifierName.*?>.*?</QualifierName>)?</MeshHeading>'
        matches = re.findall(pattern, MeshHeadingList)
        # print(f'mesh headings: {matches}')
        for match in matches:
            heading = match[0]
            if match[1]: # Estract Mesh QualifierName                
                MeshQualifiers = re.findall(
                    r'<QualifierName.?(?:UI=".*?")>(.*?)</QualifierName>', match[1]
                    )
                print(f'mesh qualifiers: {MeshQualifiers}')
                for qualifier in MeshQualifiers:
                    heading = f"{match[0]} / {qualifier}"
                    mesh_headings.append(heading)
            else:
                mesh_headings.append(heading)

        # Extract keyword
        Keyword_List = re.search(r'<KeywordList.*?>(.*?)</KeywordList>', record_string)
        Keyword_List = Keyword_List.group(1) if Keyword_List else ''
        Keywords = re.findall(
            r'<Keyword.*?>(.*?)</Keyword>', Keyword_List
            )
        # Extract MajorTopic text
        MajorTopics = re.findall(
            r'<[^>]*MajorTopicYN="Y"[^>]*>([^<]+)<\/[^>]+>', record_string
            )
        # Extract Publication Type
        PublicationTypeList = re.search(r'<PublicationTypeList.*?>(.*?)</PublicationTypeList>', record_string)
        PublicationTypeList = PublicationTypeList.group(1) if PublicationTypeList else ''
        PublicationType = re.findall(
            r'<PublicationType.*?>(.*?)</PublicationType>', PublicationTypeList
            )
        return {
            'pubmed_title': article_title,
            'abstract': abstract,
            'journal': journal_title,
            'authors': formatted_authors,
            'year': publication_year,
            'month': publication_month,
            'pub_volume': journal_volume,
            'pub_issue': journal_issue,
            'start_page': start_page,
            'end_page': end_page,
            'doi': doi,
            'pmid': pmid,
            'mesh_headings': mesh_headings,
            'keywords': Keywords,
            'major_topics': MajorTopics,
            'publication_type': PublicationType
        }
    
iteration = 3
query = '("resistance train*"[All Fields]) AND ((y_10[Filter]) AND (meta-analysis[Filter] OR review[Filter] OR systematicreview[Filter]))'
query = 'Factors associated with different types of hip fractures among elderly patients a tertiary hospital in Pahang: A retrospective cross-sectional study'
result_dict[iteration] = Pubmed_API()
ids_list = result_dict[iteration].search_article(query, retmax=2, ids_only=True)
df = result_dict[iteration].get_article_data_by_title()
df

2024-04-04 00:19:16,071 - Pubmed_API - INFO:
Search term: Factors associated with different types of hip fractures among elderly patients a tertiary hospital in Pahang: A retrospective cross-sectional study

2024-04-04 00:19:17,425 - Pubmed_API - INFO:
1 PMIDs found.
['38555895']

2024-04-04 00:19:17,438 - Pubmed_API - INFO:
Extracting these 1 PMIDs: ['38555895']



mesh qualifiers: ['epidemiology', 'complications']


Unnamed: 0,pubmed_title,abstract,journal,authors,year,month,pub_volume,pub_issue,start_page,end_page,doi,pmid,mesh_headings,keywords,major_topics,publication_type
0,Factors associated with different types of hip fractures among elderly patients a tertiary hospital in Pahang: A retrospective cross-sectional study.,"INTRODUCTION: Hip fractures, predominantly due to decreased bone density and falls, significantly impact elderly health, disproportionately affecting women and placing a strain on healthcare resou...",The Medical journal of Malaysia,"R Mohd Yusoff, Z A Mulud, M Mohammadnezhad",2024,,79,Suppl 1,117,121,,38555895,"[Aged, Humans, Female, Retrospective Studies, Cross-Sectional Studies, Tertiary Care Centers, Risk Factors, Hip Fractures / epidemiology, Hip Fractures / complications]",[],[Hip Fractures],[Journal Article]


# *End of Page*