In [1]:
#pip install mysqlclient

In [14]:
import json
from Bio import Entrez
from Bio import Medline

import os

# pubmed

In [15]:
Entrez.email = "tariqf549@gmail.com"
handle = Entrez.einfo() # or esearch, efetch, ...
record = Entrez.read(handle)
handle.close()

In [16]:
querylist = ('clinical trial[Title/Abstract]' , 'italy[Title/Abstract]' , 'netherlands[Title/Abstract]' , 'case control study' , 'epidemiology' , 
             'mortality', '(treatment[All Fields] OR drug[All Fields] OR intervention[All Fields] OR recovery[All Fields])' )

In [17]:
#myterm = MainTerm + """ AND trial[Title/Abstract] """

In [18]:
resultsfilename='results.json'

In [23]:
jsonfilename = resultsfilename
if os.path.isfile(jsonfilename):
    mainDict = json.load(open(jsonfilename))
else:
    mainDict = {}


In [24]:
def add2dict(dataresults , Q):
    for hit in dataresults:
        m1 = 'PMID' + hit
        parse_res = Medline.read(m1.split('\n'))
        
        PMID = parse_res['PMID']
        if PMID in mainDict.keys():
            print('PMID exists')
            if Q not in mainDict[PMID]['Tag']:
                mainDict[PMID]['Tag'].append(Q)
            continue
        else:
            Title = parse_res['TI']
            dateP = parse_res['DP']
            Tag = [Q]
            if 'JT' in parse_res.keys():
                JournalName  = parse_res['JT']
            else: JournalName = ''

            if 'LR' in parse_res.keys():
                dateMod  = parse_res['LR']
            else: dateMod = ''

            if 'AB' in parse_res.keys():
                Abstract = parse_res['AB']
            else: Abstract = 'NA'
            Link= 'https://www.ncbi.nlm.nih.gov/pubmed/{}'.format(PMID)   


            mainDict[PMID] = {'PMID': PMID, 'Title':Title ,
                                       'JournalName':JournalName ,
                                        'Modification Date':dateMod , 
                                       'Publication Date':dateP , 
                                       'Abstract':Abstract , 
                                       'Link':Link,
                                       'Tag':Tag
                                         
                             }


In [25]:
def search_function (MyTerms, startD , endD):

    Entrez.email = "tariqf549@gmail.com"
    MainTerm = """(((("coronavirus"[MeSH Terms] OR "coronavirus"[All Fields]) AND ("COVID-19"[All Fields] OR "severe acute respiratory syndrome coronavirus 2"[Supplementary Concept] OR "severe acute respiratory syndrome coronavirus 2"[All Fields] OR "2019-nCoV"[All Fields] OR "SARS-CoV-2"[All Fields] OR "2019nCoV"[All Fields] ))))"""
    
    #MainTerm = '"COVID-19"[All Fields]'
    DateRange = '"{}"[PDat] : "{}"[PDat]'.format(startD , endD)
    myterm = MainTerm + ' AND ' + MyTerms + ' AND ' + DateRange
    print(myterm)
    search_results = Entrez.read(
        Entrez.esearch(
            db="pubmed", term=myterm,  datetype="pdat", usehistory="y" , sort = 'relevance' 
        )
    )
    count = int(search_results["Count"])
    print("Found %i results" % count)

    batch_size = 10
    out_handle = open("corona_{}_papers.txt".format(MyTerms.split('[')[0]), "w")
    for start in range(0, count, batch_size):
        end = min(count, start + batch_size)
        print("Going to download record %i to %i" % (start + 1, end))
        fetch_handle = Entrez.efetch(
            db="pubmed",
            rettype="medline",
            retmode="text",
            retstart=start,
            retmax=batch_size,
            webenv=search_results["WebEnv"],
            query_key=search_results["QueryKey"],
        )
        data = fetch_handle.read()
        
        dataresults = data.split('\nPMID')[1:]
        add2dict(dataresults , MyTerms)
        
        fetch_handle.close()
        out_handle.write(data)
    out_handle.close()

In [26]:
for X in querylist:
    search_function(X , '2020/01/01' , '2020/03/26' )

(((("coronavirus"[MeSH Terms] OR "coronavirus"[All Fields]) AND ("COVID-19"[All Fields] OR "severe acute respiratory syndrome coronavirus 2"[Supplementary Concept] OR "severe acute respiratory syndrome coronavirus 2"[All Fields] OR "2019-nCoV"[All Fields] OR "SARS-CoV-2"[All Fields] OR "2019nCoV"[All Fields] )))) AND clinical trial[Title/Abstract] AND "2020/01/01"[PDat] : "2020/03/26"[PDat]
Found 5 results
Going to download record 1 to 5
(((("coronavirus"[MeSH Terms] OR "coronavirus"[All Fields]) AND ("COVID-19"[All Fields] OR "severe acute respiratory syndrome coronavirus 2"[Supplementary Concept] OR "severe acute respiratory syndrome coronavirus 2"[All Fields] OR "2019-nCoV"[All Fields] OR "SARS-CoV-2"[All Fields] OR "2019nCoV"[All Fields] )))) AND italy[Title/Abstract] AND "2020/01/01"[PDat] : "2020/03/26"[PDat]
Found 18 results
Going to download record 1 to 10
Going to download record 11 to 18
(((("coronavirus"[MeSH Terms] OR "coronavirus"[All Fields]) AND ("COVID-19"[All Fields] O

PMID exists
PMID exists
PMID exists
PMID exists
PMID exists
Going to download record 311 to 320
PMID exists
PMID exists
PMID exists
PMID exists
PMID exists
Going to download record 321 to 330
PMID exists
PMID exists
PMID exists
PMID exists
PMID exists
Going to download record 331 to 340
PMID exists
PMID exists
Going to download record 341 to 350
PMID exists
PMID exists
PMID exists
PMID exists
PMID exists
Going to download record 351 to 360
PMID exists
PMID exists
PMID exists
PMID exists
PMID exists
PMID exists
PMID exists
PMID exists
Going to download record 361 to 370
PMID exists
PMID exists
PMID exists
PMID exists
PMID exists
Going to download record 371 to 380
PMID exists
PMID exists
PMID exists
PMID exists
PMID exists
Going to download record 381 to 390
PMID exists
PMID exists
PMID exists
PMID exists
PMID exists
PMID exists
Going to download record 391 to 400
PMID exists
PMID exists
PMID exists
Going to download record 401 to 410
PMID exists
PMID exists
PMID exists
PMID exists
PMID

In [37]:

Output=[]
for Key,Item in mainDict.items():
    Output.append(Item)

In [38]:
Output


[{'Title': 'Severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) and coronavirus disease-2019 (COVID-19): The epidemic and the challenges.',
  'JournalName': 'International journal of antimicrobial agents',
  'Modification Date': '20200325',
  'Publication Date': '2020 Mar',
  'Abstract': 'The emergence of severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2; previously provisionally named 2019 novel coronavirus or 2019-nCoV) disease (COVID-19) in China at the end of 2019 has caused a large global outbreak and is a major public health issue. As of 11 February 2020, data from the World Health Organization (WHO) have shown that more than 43 000 confirmed cases have been identified in 28 countries/regions, with >99% of cases being detected in China. On 30 January 2020, the WHO declared COVID-19 as the sixth public health emergency of international concern. SARS-CoV-2 is closely related to two bat-derived severe acute respiratory syndrome-like coronaviruses, bat-SL-CoVZC45 a

with open('ct.json' , 'w') as fp:
    json.dump(mainDict['clinical trial[Title/Abstract]'], fp)
    

In [27]:
with open(resultsfilename , 'w') as fp:
    json.dump(mainDict, fp)

In [None]:
#pip install MySQL-python

In [10]:
import mysql.connector
mydb = mysql.connector.connect(
  host="localhost",
  user="yourusername",
  passwd="yourpassword"
)
mycursor = mydb.cursor()

mycursor.execute("CREATE DATABASE mydatabase")
print(mydb)

ModuleNotFoundError: No module named 'mysql'

In [None]:
print("\033[44;33mHello World!\033[m")


# google

In [None]:
import requests
from bs4 import BeautifulSoup

query = '"covid-19"  italy'
url =  'https://scholar.google.com/scholar?start=0&q='+ query + '&hl=en&scisbd=1&as_sdt=1,5&as_vis=1&ie=UTF-8&oe=UTF-8&hl=en&btnG=Search'

content = requests.get(url).text
page = BeautifulSoup(content, 'html')
results = []
for entry in page.find_all("h3", attrs={"class": "gs_rt"}):
    results.append({"title": entry.a.text, "url": entry.a['href']})

In [None]:
for entry in page.find_all(attrs={"class": "gs_rs"}):
    maintext = entry.get_text()
    D = maintext.split(' ')[0]
    print(D)
    print(entry.get_text())
    print()

In [None]:
results[0]