In [2]:
import pandas as pd

In [12]:
from pymed import PubMed


# Create a PubMed object that GraphQL can use to query
# Note that the parameters are not required but kindly requested by PubMed Central
# https://www.ncbi.nlm.nih.gov/pmc/tools/developers/
pubmed = PubMed(tool="MyTool", email="suzanna.thompson@duke.edu")

# Create a GraphQL query in plain text
query = "tms"


# Execute the query against the API
results = pubmed.query(query, max_results=5)

# Loop over the retrieved articles
for article in results:

    # Print the type of object we've found (can be either PubMedBookArticle or PubMedArticle)
    print(article["doi"])

    # Print a JSON representation of the object
    my_dict = article.toDict()


TypeError: 'PubMedArticle' object is not subscriptable

In [29]:
pd.DataFrame.from_dict(my_dict)['doi']

0    10.5606/tftrd.2023.11585
1    10.5606/tftrd.2023.11585
2    10.5606/tftrd.2023.11585
3    10.5606/tftrd.2023.11585
Name: doi, dtype: object

In [4]:
results

<itertools.chain at 0x7f6efed0ef80>

In [8]:
%pip install pubmed

Collecting pubmed
  Downloading pubmed-0.0.2.post0.zip (5.4 kB)
  Preparing metadata (setup.py) ... [?25ldone
Collecting pyquery (from pubmed)
  Downloading pyquery-2.0.0-py3-none-any.whl (22 kB)
Collecting lxml>=2.1 (from pyquery->pubmed)
  Obtaining dependency information for lxml>=2.1 from https://files.pythonhosted.org/packages/3c/d2/11533f0bc47ff4d828a20cfb702f3453fe714bd5b475fcdc8cec6e6b7dcf/lxml-4.9.3-cp310-cp310-manylinux_2_28_x86_64.whl.metadata
  Downloading lxml-4.9.3-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (3.8 kB)
Collecting cssselect>=1.2.0 (from pyquery->pubmed)
  Downloading cssselect-1.2.0-py2.py3-none-any.whl (18 kB)
Downloading lxml-4.9.3-cp310-cp310-manylinux_2_28_x86_64.whl (7.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.9/7.9 MB[0m [31m55.0 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25hBuilding wheels for collected packages: pubmed
  Building wheel for pubmed (setup.py) ... [?25ldone
[?25h  Created wheel for pubmed: fi

In [11]:
from pubmed import PubMed

# Initialize PubMed object
pubmed = PubMed(tool="MyTool", email="my@email.com")

# Search for articles on a specific topic
results = pubmed.query("tms", max_results=5)

for article in results:
    print(f"Title: {article['doi']}")
    print(f"Abstract: {article['abstract']}")

    # Access the full text of the article
    full_text = pubmed.get_full_text(article)



SyntaxError: Missing parentheses in call to 'print'. Did you mean print(...)? (literature.py, line 59)

In [10]:
%pip install biopython

Collecting biopython
  Downloading biopython-1.81-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m24.4 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Installing collected packages: biopython
Successfully installed biopython-1.81
Note: you may need to restart the kernel to use updated packages.


In [11]:
from Bio import Entrez

In [12]:
def search(query):
    Entrez.email = 'your.email@example.com'
    handle = Entrez.esearch(db='pubmed',
                            sort='relevance',
                            retmax='20',
                            retmode='xml',
                            term=query)
    results = Entrez.read(handle)
    return results

In [13]:
def fetch_details(id_list):
    ids = ','.join(id_list)
    Entrez.email = 'your.email@example.com'
    handle = Entrez.efetch(db='pubmed',
                           retmode='xml',
                           id=ids)
    results = Entrez.read(handle)
    return results

In [14]:
if __name__ == '__main__':
    results = search('fever')
    id_list = results['IdList']
    papers = fetch_details(id_list)
    for i, paper in enumerate(papers['PubmedArticle']):
         print("{}) {}".format(i+1, paper['MedlineCitation']['Article']['ArticleTitle']))


1) The management of fever in children.
2) Neurogenic Fever.
3) Fever--an update.
4) Fever.
5) Diagnosis and management of undifferentiated fever in children.
6) Fever in the postoperative patient.
7) Drug fever.
8) Postoperative fever.
9) Fever in the tropics: the ultimate clinical challenge?
10) Fever in the elderly.
11) Fever in immunocompromised hosts.
12) Antipyresis and fever.
13) Fever in the returning traveler.
14) Fever: causes and consequences.
15) Fever of unknown origin.
16) Fever.
17) [Drug-induced fever].
18) Fever control in the neuro-ICU: why, who, and when?
19) A review of the physiology of fever in birds.
20) Fever in the neurosurgical patient.


In [15]:
# Pretty print the first paper in full to observe its structure
import json
print(json.dumps(papers['PubmedArticle'][0], indent=2))

{
  "MedlineCitation": {
    "OtherAbstract": [],
    "GeneralNote": [],
    "KeywordList": [],
    "SpaceFlightMission": [],
    "OtherID": [],
    "CitationSubset": [
      "IM"
    ],
    "PMID": "35822579",
    "DateCompleted": {
      "Year": "2022",
      "Month": "11",
      "Day": "10"
    },
    "DateRevised": {
      "Year": "2022",
      "Month": "11",
      "Day": "10"
    },
    "Article": {
      "ELocationID": [
        "10.23736/S2724-5276.22.06680-0"
      ],
      "Language": [
        "eng"
      ],
      "ArticleDate": [
        {
          "Year": "2022",
          "Month": "07",
          "Day": "13"
        }
      ],
      "Journal": {
        "ISSN": "2724-5780",
        "JournalIssue": {
          "Volume": "74",
          "Issue": "5",
          "PubDate": {
            "Year": "2022",
            "Month": "Oct"
          }
        },
        "Title": "Minerva pediatrics",
        "ISOAbbreviation": "Minerva Pediatr (Torino)"
      },
      "ArticleTitle": "T

In [18]:
import urllib 
import urllib2
import sys

def fetchByQuery(query,days):
    Entrez.email = "xxx" # you must give NCBI an email address
    searchHandle=Entrez.esearch(db="pmc", reldate=days, term=query, usehistory="y")
    searchResults=Entrez.read(searchHandle)
    searchHandle.close()
    webEnv=searchResults["WebEnv"]
    queryKey=searchResults["QueryKey"]
    batchSize=10
    try:
        fetchHandle = Entrez.efetch(db="pmc", retmax=100, retmode="xml", webenv=webEnv, query_key=queryKey)
        data=fetchHandle.read()
        fetchHandle.close()
        return data
    except:
        return None

days=100 #looking for papers in the last 100 days
termList=["yeast","Saccharomyces"] 

query=" AND ".join(termList)
xml_data=fetchByQuery(query,days)
if xml_data==None: 
    print(80*"*"+"\n")
    print("This search returned no hits")

else:
    f=open("pmcXml.txt" ,"w")
    f.write(xml_data)
    f.close()

ModuleNotFoundError: No module named 'urllib2'

In [31]:
%pip install elsapy

Collecting elsapy
  Downloading elsapy-0.5.1-py3-none-any.whl (12 kB)
Installing collected packages: elsapy
Successfully installed elsapy-0.5.1
Note: you may need to restart the kernel to use updated packages.


In [14]:

from elsapy.elsclient import ElsClient
from elsapy.elsprofile import ElsAuthor, ElsAffil
from elsapy.elsdoc import FullDoc, AbsDoc
from elsapy.elssearch import ElsSearch
import json

In [15]:
con_file = open("/workspaces/Wikistim-Summarization/config.json")
config = json.load(con_file)
con_file.close()

In [16]:
client = ElsClient(config['apikey'])

In [17]:
my_auth = ElsAuthor(
        uri = 'https://api.elsevier.com/content/author/author_id/7004367821')

In [18]:
if my_auth.read(client):
    print ("my_auth.full_name: ", my_auth.full_name)
    my_auth.write()
else:
    print ("Read author failed.")

Read author failed.


In [19]:
my_aff = ElsAffil(affil_id = '60101411')
if my_aff.read(client):
    print ("my_aff.name: ", my_aff.name)
    my_aff.write()
else:
    print ("Read affiliation failed.")

Read affiliation failed.


In [20]:
scp_doc = AbsDoc(scp_id = 84872135457)
if scp_doc.read(client):
    print ("scp_doc.title: ", scp_doc.title)
    scp_doc.write()   
else:
    print ("Read document failed.")

scp_doc.title:  Control of somatic tissue differentiation by the long non-coding RNA TINCR


In [47]:
pii_doc = FullDoc(sd_pii = 'S1674927814000082')
if pii_doc.read(client):
    print ("pii_doc.title: ", pii_doc.title)
    pii_doc.write()   
else:
    print ("Read document failed.")

pii_doc.title:  Establishing the fair allocation of international aviation carbon emission rights


In [42]:
doi_doc = FullDoc(doi = '10.1016/S1525-1578(10)60571-5')
if doi_doc.read(client):
    print ("doi_doc.title: ", doi_doc.title)
    doi_doc.write()   
else:
    print ("Read document failed.")

doi_doc.title:  Sensitive Sequencing Method for KRAS Mutation Detection by Pyrosequencing 


In [44]:
print ("Load documents (Y/N)?")
s = input('--> ')

if (s == "y" or s == "Y"):

    ## Read all documents for example author, then write to disk
    if my_auth.read_docs(client):
        print ("my_auth.doc_list has " + str(len(my_auth.doc_list)) + " items.")
        my_auth.write_docs()
    else:
        print ("Read docs for author failed.")

    ## Read all documents for example affiliation, then write to disk
    if my_aff.read_docs(client):
        print ("my_aff.doc_list has " + str(len(my_aff.doc_list)) + " items.")
        my_aff.write_docs()
    else:
        print ("Read docs for affiliation failed.")

Load documents (Y/N)?
Read docs for author failed.
Read docs for affiliation failed.


In [45]:
auth_srch = ElsSearch('authlast(keuskamp)','author')
auth_srch.execute(client)
print ("auth_srch has", len(auth_srch.results), "results.")

HTTPError: HTTP 401 Error from https://api.elsevier.com/content/search/author?query=authlast%28keuskamp%29
and using headers {'X-ELS-APIKey': 'ec35f663ecf6501d66550e2b2d63591b', 'User-Agent': 'elsapy-v0.5.1', 'Accept': 'application/json'}:
{"service-error":{"status":{"statusCode":"AUTHORIZATION_ERROR","statusText":"The requestor is not authorized to access the requested view or fields of the resource"}}}

In [62]:
from pymed import PubMed
pubmed = PubMed(tool="MyTool", email="suzanna.thompson@duke.edu")

# Create a GraphQL query in plain text
query = "tms[Title]"
results = pubmed.query(query, max_results=2)

In [63]:
for article in results:
    # Print a JSON representation of the object
    article.toPandas()

AttributeError: 'PubMedArticle' object has no attribute 'toPandas'

In [69]:
import pandas as pd
from pymed import PubMed

# Initialize an empty list to store dictionaries
data = []

pubmed = PubMed(tool="MyTool", email="suzanna.thompson@duke.edu")

# Create a GraphQL query in plain text
query = "tms[Title]"
results = pubmed.query(query, max_results=200)

for article in results:
    title = article.title
    abstract = article.abstract
    doi = article.doi
    pub_date = article.publication_date

    # Append a dictionary to the list
    data.append({"Title": title, "Abstract": abstract, "DOI": doi, "Publication Date": pub_date})

# Create a DataFrame from the list of dictionaries
df = pd.DataFrame(data)

# Display the combined DataFrame
df



Unnamed: 0,Title,Abstract,DOI,Publication Date
0,TMS must not harm participants: guidelines for...,Transcranial magnetic stimulation (TMS) can mo...,10.1080/17588928.2023.2259553,2023-09-14
1,TMS over the pre-SMA enhances semantic cogniti...,The continuous decline of executive abilities ...,10.1016/j.brs.2023.09.009,2023-09-14
2,Semi-TMS: an efficient regularization-oriented...,Although Convolutional Neural Networks (CNN) a...,10.1088/1361-6560/acf90f,2023-09-13
3,Reflecting the causes of variability of EEG re...,"Recently, Fong et al. published EEG responses ...",10.1016/j.neuroimage.2023.120368,2023-09-12
4,Repetitive paired-pulse TMS increases motor co...,Transcranial magnetic stimulation (TMS) over p...,10.1093/cercor/bhad315,2023-09-10
...,...,...,...,...
195,Exploring the spatial resolution of TMS-EEG co...,The use of TMS-EEG coupling as a neuroimaging ...,10.1016/j.neuroimage.2022.119419,2022-07-02
196,The Patho-Neurophysiological Basis and Treatme...,"Focal laryngeal dystonia (LD) is a rare, idiop...",10.3390/jcm11123453\n10.1016/S1388-2457(00)005...,2022-06-25
197,Assessing the Capabilities of Transcranial Mag...,The brain tumor is frequently related to sever...,10.2147/NDT.S359855\n10.3171/2009.12.FOCUS0926...,2022-06-24
198,Identification and verification of a 'true' TM...,The concurrent combination of transcranial mag...,10.1016/j.jneumeth.2022.109651,2022-06-18


In [36]:
type(results)

itertools.chain

In [39]:
results[1].toJSON()

TypeError: 'itertools.chain' object is not subscriptable

In [35]:
for article in results:
    df = pd.DataFrame(article.toJSON())
    df

In [1]:
%pip install tabula

Collecting tabula
  Downloading tabula-1.0.5.tar.gz (9.5 kB)
  Preparing metadata (setup.py) ... [?25ldone
Building wheels for collected packages: tabula
  Building wheel for tabula (setup.py) ... [?25ldone
[?25h  Created wheel for tabula: filename=tabula-1.0.5-py3-none-any.whl size=10587 sha256=2d8afc4aaa67b2a4d6ef44e3d279906cb2793617f5ff46fe70156b0dda1d79de
  Stored in directory: /home/codespace/.cache/pip/wheels/3c/5d/6a/bd13fe26e73e2170dfea1f6ccb69a69e824735e04b73192b39
Successfully built tabula
Installing collected packages: tabula
Successfully installed tabula-1.0.5
Note: you may need to restart the kernel to use updated packages.


In [5]:
%pip install PdfReader


Collecting PdfReader
  Downloading pdfreader-0.1.12.tar.gz (2.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.9/2.9 MB[0m [31m34.5 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting bitarray>=1.1.0 (from PdfReader)
  Obtaining dependency information for bitarray>=1.1.0 from https://files.pythonhosted.org/packages/5b/81/938e947b54e6e1eee683c069fb19d35ee0c292fc91d7b10eaab84611439b/bitarray-2.8.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata
  Downloading bitarray-2.8.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (32 kB)
Collecting pycryptodome>=3.9.9 (from PdfReader)
  Obtaining dependency information for pycryptodome>=3.9.9 from https://files.pythonhosted.org/packages/40/88/89623388754162ddb82c62814ccbfdbfcc8ed9bd6d5f7412d2479bdca3a7/pycryptodome-3.18.0-cp35-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata
  Downloading pycryptodome-3.18.

In [8]:
import PyPDF2

# Specify the path to your PDF file
pdf_file_path = "/workspaces/Wikistim-Summarization/1-s2.0-S1877065715000792.pdf"

with open(pdf_file_path, 'rb') as pdf_file:
    pdf_reader = PyPDF2.PdfReader(pdf_file)
    
    concatenated_text = ""
    
    for page_num in range(len(pdf_reader.pages)):
        page = pdf_reader.pages[page_num] 
        page_text = page.extract_text()
        concatenated_text += page_text
    
    pdf_file.close()

# Now you can use concatenated_text as needed
print(concatenated_text)


with open('/workspaces/Wikistim-Summarization/00_source_data/first_data_pull_test/1-s2.0-S1877065715000792', 'w') as f:
    f.write(concatenated_text)

Update  article
Basic  principles  of  transcranial  magnetic  stimulation  (TMS)  and
repetitive  TMS  (rTMS)
Wanalee  Klomjaia,  Rose  Katzb,c,  Alexandra  Lackmy-Valle ´eb,*
aFaculty  of  Physical  Therapy,  Mahidol  University,  73170  Nakonpathom,  Thailand
bUPMC  Universite ´Paris  06,  CNRS,  Inserm,  laboratoire  d’imagerie  biome ´dicale,  Sorbonne  universite ´s,  75013  Paris,  France
cService  de  me´decine  physique  et  re´adaptation,  groupe  hospitalier  Pitie´-Salpe ˆtrie`re,  AP–HP,  France
1.  Introduction
Over  the  past  decades,  neuroscience  researchers  have  beneﬁted
from  technical  advancements  in  non-invasive  brain  stimulation  in
humans.  Transcranial  magnetic  stimulation  (TMS)  is  one  method
used  to  deliver  electrical  stimuli  through  the  scalp  in  conscious
humans.  In  general,  single-pulse  TMS  (including  paired-pulse  TMS)
is  used  to  explore  brain  functioning,  whereas  repetitive  TMS
(rTMS)  is  used  to  induce  changes  in 

In [1]:
import PyPDF2

# Specify the path to your PDF file
pdf_file_path = "/workspaces/Wikistim-Summarization/1-s2.0-B9780444640321000370-main.pdf"

with open(pdf_file_path, 'rb') as pdf_file:
    pdf_reader = PyPDF2.PdfReader(pdf_file)
    
    concatenated_text = ""
    
    for page_num in range(len(pdf_reader.pages)):
        page = pdf_reader.pages[page_num] 
        page_text = page.extract_text()
        concatenated_text += page_text
    
    pdf_file.close()

# Now you can use concatenated_text as needed
print(concatenated_text)


with open('/workspaces/Wikistim-Summarization/00_source_data/first_data_pull_test/1-s2.0-B9780444640321000370-main.', 'w') as f:
    f.write(concatenated_text)

Chapter 37
Transcranial magnetic stimulation
JEAN-PASCAL LEFAUCHEUR1,2*
1Excitabilit /C19e Nerveuse et Th /C19erapeutique, Facult /C19ed eM /C19edecine de Cr /C19eteil, Cr /C19eteil, France
2Service de Physiologie—Explorations Fonctionnelles, H ^opital Henri Mondor, Cr /C19eteil, France
Abstract
This review presents the neurophysiologic principles and clinical applications of transcranial magnetic
stimulation (TMS) and other related techniques of noninvasive cortical stimulation. TMS can serve variouspurposes for diagnosis or treatment. Regarding diagnosis, TMS is mainly dedicated to the recording ofmotor evoked potentials (MEPs). MEP recording allows investigation of corticospinal conduction timeand cortical motor control in clinical practice. Especially when using image-guided neuronavigationmethods, MEP recording is a reliable method to perform functional mapping of muscle representationwithin the motor cortex. Using various types of paired-pulse paradigms, TMS allows the assessment

In [1]:
# Import library
import PyPDF2
import pandas as pd

# Create a file object for a pdf file.
pdfFile = open("/workspaces/Wikistim-Summarization/1-s2.0-S1877065715000792.pdf", "rb")

# Create an object for reading a file.
pdfReader = PyPDF2.PdfReader(pdfFile)

# Get the number of pages in the pdf file.
numPages = len(pdfReader.pages)

text_holder = pd.DataFrame()
text_string = str('Start: ')


pdfReader.extract_text()

# for i in range (0, numPages): 
#     page = pdfReader.pages[i]
#     text = page.extract_text()
#     text_string = text + text_string



# # Print the number of pages and text data.
# print("Number of Pages:", numPages)

# print("Text:\n"+text_string)

AttributeError: 'PdfReader' object has no attribute 'extract_text'