In [2]:
import requests
from bs4 import BeautifulSoup
import csv

def RequestGetResPaperWebsite(research_paper_url):
    user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'
    headers = { 'User-Agent': user_agent }

    r = requests.get(research_paper_url, headers=headers)
    soup = BeautifulSoup(r.text, 'lxml')

    return soup

def WriteSciPaperToCsv(article_title, article_url, article_text):
    with open('sci_research_papers.csv', mode='a', encoding='utf8', newline='') as research_papers_file:
        res_paper_writer = csv.writer(research_papers_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)

        res_paper_writer.writerow([article_title, article_url, article_text])

def WritePseudoPaperToCsv(article_title, article_url, article_text):
    with open('pseudo_research_papers.csv', mode='a', encoding='utf8', newline='') as research_papers_file:
        res_paper_writer = csv.writer(research_papers_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)

        res_paper_writer.writerow([article_title, article_url, article_text])        

        
def DownloadNaturePapers():
    soup = RequestGetResPaperWebsite('https://www.nature.com/subjects/health-sciences')

    latest_articles = soup.find('div', {'id': 'latest-research'}).find('ul', {'class': 'ma0 clean-list'}).find_all('li', {'class': 'pb40'})

    for article in latest_articles:
        article_title = str(article.find('h3').text).strip()
        article_url = 'https://www.nature.com' + str(article.find('h3').find('a').get('href'))

        print('Requesting ' + article_url + '...')

        soup = RequestGetResPaperWebsite(article_url)

        article_text = soup.find('div', {'id': 'Abs1-content'}).text

        WriteSciPaperToCsv(article_title, article_url, article_text)

def DownloadScienceMagPapers():
    soup = RequestGetResPaperWebsite('https://stm.sciencemag.org')

    articles = soup.select('h3.highwire-cite-title-wrapper.media__headline')

    for article in articles:
        article_title = article.text
        article_url = 'https://stm.sciencemag.org' + article.find('a').get('href')

        print('Requesting ' + article_url + '...')

        soup = RequestGetResPaperWebsite(article_url)

        article_text = soup.find('div', {'class': 'abstract'}).find('p').text

        WriteSciPaperToCsv(article_title, article_url, article_text)

def DownloadNejmPapers():
    soup = RequestGetResPaperWebsite('https://www.nejm.org/medical-articles/original-article')

    articles = soup.find('ul', {'class': 'o-results'})

    for article in articles:
        article_url = 'https://www.nejm.org' + article.find('a', {'class': 'm-result__link'}).get('href')
        article_title = article.find('a', {'class': 'm-result__link'}).find('strong', {'class': 'm-result__title'}).text

        print('Requesting ' + article_url + '...')

        soup = RequestGetResPaperWebsite(article_url)

        if soup.find('section', {'id': 'article_body'}):
            abstract = soup.find('section', {'id': 'article_body'})
        elif soup.find('section', {'id': 'article_Abstract'}):
            abstract = soup.find('section', {'id': 'article_Abstract'})

        article_text = ''

        for text_elem in abstract.findChildren():
            article_text += text_elem.text + ' '

        WriteSciPaperToCsv(article_title, article_url, article_text)

def DownloadLancetPapers():
    soup = RequestGetResPaperWebsite('https://www.thelancet.com/online-first-research')

    articles = soup.find_all('h4', {'class': 'title'})

    for article in articles:
        article_title = article.text
        article_url = 'https://www.thelancet.com' + article.find('a').get('href')

        print('Requesting ' + article_url + '...')

        soup = RequestGetResPaperWebsite(article_url)

        article_text = ''

        if soup.find('div', {'class': 'article__body'}):
            if soup.find('div', {'class': 'article__body'}).find('div', {'class': 'container'}):
                if soup.find('div', {'class': 'article__body'}).find('div', {'class': 'container'}).find('section').find('div', {'class': 'section-paragraph'}):

                    abstract = soup.find('div', {'class': 'article__body'}).find('div', {'class': 'container'}).find('section').find('div', {'class': 'section-paragraph'})

                    for text_elem in abstract.findChildren():
                        article_text += text_elem.text + ' '

                    print(article_text)

        WriteSciPaperToCsv(article_title, article_url, article_text)

def DownloadElsevierPapers():
    soup = RequestGetResPaperWebsite('https://www.journals.elsevier.com/journal-of-acupuncture-and-meridian-studies/recent-articles')

    articles = soup.find_all('div', {'class': 'pod-listing-header'})

    for article in articles:
        article_title = article.find('a').find('span').text
        article_url = article.find('a').get('href')

        print('Requesting ' + article_url + '...')

        soup = RequestGetResPaperWebsite(article_url)

        article_text = ''

        if soup.find('div', {'id': 'abstracts'}):
            abstract = soup.find('div', {'id': 'abstracts'}).select('div.abstract.author')

            for section in abstract:
                for div in section.findChildren():
                    for text_elem in div.findChildren():
                        article_text += text_elem.text + ' '

        WritePseudoPaperToCsv(article_title, article_url, article_text)

def DownloadScienceDirectPapers():
    soup = RequestGetResPaperWebsite('https://www.sciencedirect.com/journal/homeopathy/vol/106/issue/4')

    articles = soup.find_all('li', {'class': 'js-article-list-item'})

    for article in articles:
        if article.find('span', {'class': 'js-article-subtype'}):
            article_url = 'https://www.sciencedirect.com' + article.find('a', {'class': 'article-content-title'}).get('href')
            article_title = article.find('a', {'class': 'article-content-title'}).text

            print('Requesting ' + article_url + '...')

            soup = RequestGetResPaperWebsite(article_url)

            article_text = ''

            if soup.find('div', {'id': 'abstracts'}):
                abstract = soup.find('div', {'id': 'abstracts'}).select('div.abstract.author')

                for section in abstract:
                    for div in section.findChildren():
                        for text_elem in div.findChildren():
                            article_text += text_elem.text + ' '

            WritePseudoPaperToCsv(article_title, article_url, article_text)

#DownloadNaturePapers()
#DownloadScienceMagPapers()
#DownloadNejmPapers()
#DownloadLancetPapers()
#DownloadElsevierPapers()
#DownloadScienceDirectPapers()



In [43]:
#def WriteNatSciPaperToCsv(article_title, article_url, article_text):
    with open('nat_sci_research_papers.csv', mode='a', encoding='utf8', newline='') as research_papers_file:
        res_paper_writer = csv.writer(research_papers_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)

        res_paper_writer.writerow([article_title, article_url, article_text])


#def DownloadNaturePapers():
    soup = RequestGetResPaperWebsite('https://www.nature.com/subjects/health-sciences')

    latest_articles = soup.find('div', {'id': 'latest-research'}).find('ul', {'class': 'ma0 clean-list'}).find_all('li', {'class': 'pb40'})

    for article in latest_articles:
        article_title = str(article.find('h3').text).strip()
        article_url = 'https://www.nature.com' + str(article.find('h3').find('a').get('href'))

        print('Requesting ' + article_url + '...')

        soup = RequestGetResPaperWebsite(article_url)

        article_text = soup.find('div', {'id': 'Abs1-content'}).text

        WriteNatSciPaperToCsv(article_title, article_url, article_text)
        
DownloadNaturePapers()

Requesting https://www.nature.com/articles/s41598-020-61994-0...
Requesting https://www.nature.com/articles/s41467-020-15397-4...
Requesting https://www.nature.com/articles/s41439-020-0095-1...
Requesting https://www.nature.com/articles/s41430-020-0617-4...
Requesting https://www.nature.com/articles/s41598-020-62709-1...
Requesting https://www.nature.com/articles/s41598-020-62586-8...


In [41]:
# 58 in the hole
# Modified version
def DownloadNaturePapers():
    soup = RequestGetResPaperWebsite('https://www.nature.com/search?article_type=protocols%2Cresearch%2Creviews&subject=health-sciences&page=58')
    # finds the article                this class is above all articles.        also above all articles              and this above each article
    #latest_articles = soup.find('div', {'class': 'latest-research'}).find('ul', {'class': 'ma0 clean-list'}).find_all('li', {'class': 'pb40'})
    # modified for the search page
    latest_articles = soup.find('div', {'class': 'pl20 mq640-pr20 hide-overflow'}).find('ol', {'class': 'clean-list'}).find_all('li', {'class': 'mb20 pb20 cleared'})
    #                                                                                                                  li class="mb20 pb20 cleared"
    
    # for each article found, do the following
    for article in latest_articles:
        # find the title and convert it into a string
        article_title = str(article.find('h2').text).strip()
        #article_url = 'https://www.nature.com' + str(article.find('h3').find('a').get('href'))

        # modified
        article_url = 'https://www.nature.com' + str(article.find('h2').find('a').get('href'))
        
        print('Requesting ' + article_url + '...')

        soup = RequestGetResPaperWebsite(article_url)

        article_text = soup.find('div', {'id': 'Abs1-content'}).text

        WriteSciPaperToCsv(article_title, article_url, article_text)
        
DownloadNaturePapers()

Requesting https://www.nature.com/articles/s41598-020-59099-9...
Requesting https://www.nature.com/articles/s41576-019-0205-4...
Requesting https://www.nature.com/articles/s41591-019-0734-6...
Requesting https://www.nature.com/articles/s41467-019-14161-7...
Requesting https://www.nature.com/articles/s41598-020-59153-6...
Requesting https://www.nature.com/articles/s41594-020-0374-z...
Requesting https://www.nature.com/articles/s41598-020-59106-z...
Requesting https://www.nature.com/articles/s41375-020-0729-9...


AttributeError: 'NoneType' object has no attribute 'text'

In [14]:
# final url used was https://www.nejm.org/medical-articles/original-article#qs=%3Farticletype%3Doriginal-article%26requestType%3Dajax%26viewClass%3D%26page%3D40%26manualFilterParam%3DcontentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_firstDelimiter
# final page was 21 ie 781-800 papers. 

def WriteNEJMSciPaperToCsv(article_title, article_url, article_text):
    with open('sci_NEJM_research_papers.csv', mode='a', encoding='utf8', newline='') as research_papers_file:
        res_paper_writer = csv.writer(research_papers_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)

        res_paper_writer.writerow([article_title, article_url, article_text])

def DownloadNejmPapers():
    soup = RequestGetResPaperWebsite('https://www.nejm.org/medical-articles/original-article#qs=%3Farticletype%3Doriginal-article%26requestType%3Dajax%26viewClass%3D%26page%3D40%26manualFilterParam%3DcontentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_delimiter_contentAge_firstDelimiter')

    articles = soup.find('ul', {'class': 'o-results'})

    for article in articles:
        article_url = 'https://www.nejm.org' + article.find('a', {'class': 'm-result__link'}).get('href')
        article_title = article.find('a', {'class': 'm-result__link'}).find('strong', {'class': 'm-result__title'}).text

        print('Requesting ' + article_url + '...')

        soup = RequestGetResPaperWebsite(article_url)

        if soup.find('section', {'id': 'article_body'}):
            abstract = soup.find('section', {'id': 'article_body'})
        elif soup.find('section', {'id': 'article_Abstract'}):
            abstract = soup.find('section', {'id': 'article_Abstract'})

        article_text = ''

        for text_elem in abstract.findChildren():
            article_text += text_elem.text + ' '
            
        WriteNEJMSciPaperToCsv(article_title, article_url, article_text)
            
DownloadNejmPapers()

Requesting https://www.nejm.org/doi/full/10.1056/NEJMoa1912484...
Requesting https://www.nejm.org/doi/full/10.1056/NEJMoa1910215...
Requesting https://www.nejm.org/doi/full/10.1056/NEJMoa1914347...
Requesting https://www.nejm.org/doi/full/10.1056/NEJMoa1914433...
Requesting https://www.nejm.org/doi/full/10.1056/NEJMoa1916038...
Requesting https://www.nejm.org/doi/full/10.1056/NEJMoa1915922...
Requesting https://www.nejm.org/doi/full/10.1056/NEJMoa1915925...
Requesting https://www.nejm.org/doi/full/10.1056/NEJMoa1916370...
Requesting https://www.nejm.org/doi/full/10.1056/NEJMoa1916374...
Requesting https://www.nejm.org/doi/full/10.1056/NEJMoa1915103...
Requesting https://www.nejm.org/doi/full/10.1056/NEJMoa1913808...
Requesting https://www.nejm.org/doi/full/10.1056/NEJMoa1915152...
Requesting https://www.nejm.org/doi/full/10.1056/NEJMoa1915928...
Requesting https://www.nejm.org/doi/full/10.1056/NEJMoa2000052...
Requesting https://www.nejm.org/doi/full/10.1056/NEJMoa2005412...
Requesting

# Using Science Direct for Real Science

In [None]:
def GetScienceDirectPaperArticleText(article_url):
    soup = RequestGetResPaperWebsite(article_url)

    article_text = ''

    if soup.find('div', {'id': 'abstracts'}):
        abstract = soup.find('div', {'id': 'abstracts'}).select('div.abstract.author')

        for section in abstract:
            for div in section.findChildren():
                for text_elem in div.findChildren():
                    article_text += text_elem.text + ' '

    return article_text

def DownloadScienceDirectPapersFromSearch():
    soup = RequestGetResPaperWebsite('')

    articles = soup.find_all('div', {'class': 'result-item-content'})

    for article in articles:
        article_title = article.find('h2').text
        article_url = 'https://www.sciencedirect.com' + article.find('h2').find('a').get('href')

        print('Requesting ' + article_url + '...')

        article_text = GetScienceDirectPaperArticleText(article_url)

        WriteSciPaperToCsv(article_title, article_url, article_text)

DownloadScienceDirectPapersFromSearch()

In [6]:
def WriteLancetSciPaperToCsv(article_title, article_url, article_text):
    with open('sci_lancet_research_papers.csv', mode='a', encoding='utf8', newline='') as research_papers_file:
        res_paper_writer = csv.writer(research_papers_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)

        res_paper_writer.writerow([article_title, article_url, article_text])

def DownloadLancetPapers():
    soup = RequestGetResPaperWebsite('https://www.thelancet.com/online-first-research')

    articles = soup.find_all('h4', {'class': 'title'})

    for article in articles:
        article_title = article.text
        article_url = 'https://www.thelancet.com' + article.find('a').get('href')

        print('Requesting ' + article_url + '...')

        soup = RequestGetResPaperWebsite(article_url)

        article_text = ''

        if soup.find('div', {'class': 'article__body'}):
            if soup.find('div', {'class': 'article__body'}).find('div', {'class': 'container'}):
                if soup.find('div', {'class': 'article__body'}).find('div', {'class': 'container'}).find('section').find('div', {'class': 'section-paragraph'}):

                    abstract = soup.find('div', {'class': 'article__body'}).find('div', {'class': 'container'}).find('section').find('div', {'class': 'section-paragraph'})

                    for text_elem in abstract.findChildren():
                        article_text += text_elem.text + ' '

                    print(article_text)

        WriteLancetSciPaperToCsv(article_title, article_url, article_text)

DownloadLancetPapers()

Requesting https://www.thelancet.com/journals/lancet/article/PIIS0140-6736(20)30554-7/fulltext...
Background Catheter-based renal denervation has significantly reduced blood pressure in previous
         studies. Following a positive pilot trial, the SPYRAL HTN-OFF MED (SPYRAL Pivotal)
         trial was designed to assess the efficacy of renal denervation in the absence of antihypertensive
         medications.
       Methods In this international, prospective, single-blinded, sham-controlled trial, done at
         44 study sites in Australia, Austria, Canada, Germany, Greece, Ireland, Japan, the
         UK, and the USA, hypertensive patients with office systolic blood pressure of 150
         mm Hg to less than 180 mm Hg were randomly assigned 1:1 to either a renal denervation
         or sham procedure. The primary efficacy endpoint was baseline-adjusted change in 24-h
         systolic blood pressure and the secondary efficacy endpoint was baseline-adjusted
         change in off

Background An unmet clinical need remains for an effective tetravalent dengue vaccine suitable
         for all age groups, regardless of serostatus. We assessed the immunogenicity and safety
         of three different dose schedules of a tetravalent dengue vaccine (TAK-003) over a
         48-month period in children living in dengue-endemic countries.
       Methods We did a large, phase 2, double-blind, placebo-controlled trial at three sites in
         the Dominican Republic, Panama, and the Philippines. Healthy participants aged 2–17
         years were randomly assigned 1:2:5:1 using an interactive web response system with
         stratification by age to receive either a two-dose primary series (days 1 and 91),
         one primary dose (day 1), one primary dose plus booster (days 1 and 365), or placebo.
         Participants and relevant study personnel were masked to the random assignment until
         completion of the study at month 48. To maintain masking, TAK-003 recip

Background Artemisinin and partner-drug resistance in Plasmodium falciparum are major threats to malaria control and elimination. Triple artemisinin-based combination therapies (TACTs), which combine existing co-formulated ACTs with a second partner drug that is slowly eliminated, might provide effective treatment and delay emergence of antimalarial drug resistance. Plasmodium falciparum Methods In this multicentre, open-label, randomised trial, we recruited patients with uncomplicated P falciparum malaria at 18 hospitals and health clinics in eight countries. Eligible patients were aged 2–65 years, with acute, uncomplicated P falciparum malaria alone or mixed with non-falciparum species, and a temperature of 37·5°C or higher, or a history of fever in the past 24 h. Patients were randomly assigned (1:1) to one of two treatments using block randomisation, depending on their location: in Thailand, Cambodia, Vietnam, and Myanmar patients were assigned to either dihydroartemisinin–piperaqu

Background Congenital Zika syndrome causes a spectrum of neurological symptoms with varying effects
         on function that require different therapeutic strategies. To date, this spectrum
         of effects and its clinical implications have not been completely described. We describe
         the neurological examination findings in toddlers and preschoolers, including predominant
         symptom complexes and comorbidities.
       Methods This study is a case-series neurological evaluation of 75 children with congenital
         Zika syndrome in Campina Grande, Brazil. The study is part of a cohort of children
         with congenital Zika syndrome that started in 2015 and is still ongoing. Children
         with Zika virus infection detected during pregnancy (mothers exhibited rash and were
         followed and diagnosed by fetal ultrasound abnormalities or RT-PCR) or through microcephaly
         screening after birth, using Intergrowth 21 guidelines, were selected by laborato

Background Oesophageal cancer is a common and often fatal cancer that has two main histological subtypes: oesophageal squamous cell carcinoma and oesophageal adenocarcinoma. Updated statistics on the incidence and mortality of oesophageal cancer, and on the disability-adjusted life-years (DALYs) caused by the disease, can assist policy makers in allocating resources for prevention, treatment, and care of oesophageal cancer. We report the latest estimates of these statistics for 195 countries and territories between 1990 and 2017, by age, sex, and Socio-demographic Index (SDI), using data from the Global Burden of Diseases, Injuries, and Risk Factors Study 2017 (GBD). Methods We used data from vital registration systems, vital registration-samples, verbal autopsy records, and cancer registries, combined with relevant modelling, to estimate the mortality, incidence, and burden of oesophageal cancer from 1990 to 2017. Mortality-to-incidence ratios (MIRs) were estimated and fed into a Caus

Background Gastro-oesophageal reflux disease is a common chronic ailment that causes uncomfortable symptoms and increases the risk of oesophageal adenocarcinoma. We aimed to report the burden of gastro-oesophageal reflux disease in 195 countries and territories between 1990 and 2017, using data from the Global Burden of Diseases, Injuries, and Risk Factors Study (GBD) 2017. Methods We did a systematic review to identify measurements of the prevalence of gastro-oesophageal reflux disease in geographically defined populations worldwide between 1990 and 2017. These estimates were analysed with DisMod-MR, a Bayesian mixed-effects meta-regression tool that incorporates predictive covariates and adjustments for differences in study design in a geographical cascade of models. Fitted values for broader geographical units inform prior distributions for finer geographical units. Prevalence was estimated for 195 countries and territories. Reports of the frequency and severity of symptoms among in

Background Crohn's disease is a risk factor for colorectal cancer (CRC). However, available studies
         reflect older treatment and surveillance strategies, and most have assessed risks
         for incident CRC without taking surveillance and lead-time bias into account. Such
         biases can be accounted for by assessing CRC incidence by tumour stage and CRC mortality
         by tumour stage. We aimed to assess rates of incident CRC and CRC mortality among
         patients with Crohn's disease compared with the general population.
       Methods For this nationwide register-based cohort study, we used International Classification
         of Disease codes in national patient registers and pathology reports to identify incident
         cases of Crohn's disease. In Denmark we searched for incident cases between January,
         1977, and December, 2011, and in Sweden between January, 1969, and December, 2017.
         For each patient with Crohn's disease, we identified up 

Background GRECCAR 2 was the first multicentre, randomised trial to compare local excision with
         total mesorectal excision in downstaged low rectal cancer. Encouraging oncological
         results were noted at 3 years' follow-up but needed to be corroborated with longer
         follow-up. In this study, we aimed to report the 5-year oncological outcomes, including
         local recurrence, metastatic disease, and survival.
       Methods Patients age 18 years and older with T2T3 low rectal cancer, of maximum size 4 cm,
         who were clinically good responders after chemoradiotherapy (residual tumour ≤2 cm)
         were randomly assigned before surgery to either local excision or total mesorectal
         excision. Randomisation was centralised and not stratified and used permuted blocks
         of size eight. In the local excision group, a completion total mesorectal excision
         was performed if pathological tumour stage was ypT2–3. The primary objective of this


Background Multiple myeloma is an incurable haematological malignancy, representing over 10%
         of haematological cancers in the USA. We did a phase 1–2 study of melflufen and dexamethasone
         in patients with relapsed and refractory multiple myeloma to determine the maximum
         tolerated dose of melflufen and to investigate its safety and efficacy.
       Methods We did a multicentre, international, dose-confirmation and dose-expansion, open-label,
         phase 1–2 study in seven centres in the USA and Europe. Eligible patients were aged
         18 years or older, had relapsed and refractory multiple myeloma, had received two
         or more previous lines of therapy (including lenalidomide and bortezomib), were refractory
         to their last line of therapy, and had an Eastern Cooperative Oncology Group performance
         status of 2 or less. In phase 1, patients received an intravenous infusion of melflufen
         at 15 mg, 25 mg, 40 mg, or 55 mg for 30 m

Background Short-course preventive therapy with 12 doses of once-weekly rifapentine (900 mg)
         plus isoniazid (900 mg) could greatly improve tuberculosis control, especially in
         areas with high co-endemicity with HIV. However, a small previous trial of such therapy
         with dolutegravir in healthy, HIV-negative adults was halted early after two of the
         four patients developed serious adverse events. Because of the potential use of this
         therapy, and variable safety outcomes of tuberculosis drugs seen in patients with
         and without HIV, we aimed to characterise safety, pharmacokinetics, and virological
         suppression in adults who are HIV positive.
       Methods DOLPHIN was a phase 1/2, single-arm trial done at The Aurum Institute (Tembisa Clinical
         Research Site, Tembisa, South Africa), with pharmacokinetic visits done at VxPharma
         (Pretoria, South Africa). Adults (≥18 years) with HIV infection and undetectable viral
   

Background Current efficacy studies of a mosaic HIV-1 prophylactic vaccine require four vaccination
         visits over one year, which is a complex regimen that could prove challenging for
         vaccine delivery at the community level, both for recipients and clinics. In this
         study, we evaluated the safety, tolerability, and immunogenicity of shorter, simpler
         regimens of trivalent Ad26.Mos.HIV expressing mosaic HIV-1 Env/Gag/Pol antigens combined
         with aluminium phosphate-adjuvanted clade C gp140 protein.
       Methods We did this randomised, double-blind, placebo-controlled phase 1 trial (IPCAVD010/HPX1002)
         at Beth Israel Deaconess Medical Center in Boston, MA, USA. We included healthy, HIV-uninfected
         participants (aged 18–50 years) who were considered at low risk for HIV infection
         and had not received any vaccines in the 14 days before study commencement. We randomly
         assigned participants via a computer-generated ran

Background A monovalent, parenteral, subunit rotavirus vaccine was well tolerated and immunogenic in adults in the USA and in toddlers and infants in South Africa, but elicited poor responses against heterotypic rotavirus strains. We aimed to evaluate safety and immunogenicity of a trivalent vaccine formulation (P2-VP8-P[4],[6],[8]). Methods A double-blind, randomised, placebo-controlled, dose-escalation, phase 1/2 study was done at three South African research sites. Healthy adults (aged 18–45 years), toddlers (aged 2–3 years), and infants (aged 6–8 weeks, ≥37 weeks' gestation, and without previous receipt of rotavirus vaccination), all without HIV infection, were eligible for enrolment. In the dose-escalation phase, adults and toddlers were randomly assigned in blocks (block size of five) to receive 30 μg or 90 μg of vaccine, or placebo, and infants were randomly assigned in blocks (block size of four) to receive 15 μg, 30 μg, or 90 μg of vaccine, or placebo. In the expanded phase, i

Background In the face of rapidly changing data, a range of case fatality ratio estimates for coronavirus disease 2019 (COVID-19) have been produced that differ substantially in magnitude. We aimed to provide robust estimates, accounting for censoring and ascertainment biases. Methods We collected individual-case data for patients who died from COVID-19 in Hubei, mainland China (reported by national and provincial health commissions to Feb 8, 2020), and for cases outside of mainland China (from government or ministry of health websites and media reports for 37 countries, as well as Hong Kong and Macau, until Feb 25, 2020). These individual-case data were used to estimate the time between onset of symptoms and outcome (death or discharge from hospital). We next obtained age-stratified estimates of the case fatality ratio by relating the aggregate distribution of cases to the observed cumulative deaths in China, assuming a constant attack rate by age and adjusting for demography and age-

Background In December, 2019, coronavirus disease 2019 (COVID-19) caused by severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) emerged in Wuhan, China. The number of affected pregnant women is increasing, but scarce information is available about the clinical features of COVID-19 in pregnancy. This study aimed to clarify the clinical features and obstetric and neonatal outcomes of pregnant patients with COVID-19. Methods In this retrospective, single-centre study, we included all pregnant women with COVID-19 who were admitted to Tongji Hospital in Wuhan, China. Clinical features, treatments, and maternal and fetal outcomes were assessed. Findings Seven patients, admitted to Tongji Hospital from Jan 1, to Feb 8, 2020, were included in our study. The mean age of the patients was 32 years (range 29–34 years) and the mean gestational age was 39 weeks plus 1 day (range 37 weeks to 41 weeks plus 2 days). Clinical manifestations were fever (six [86%] patients), cough (one [14%] pati

Background Since the coronavirus disease 2019 outbreak began in the Chinese city of Wuhan on Dec 31, 2019, 68 imported cases and 175 locally acquired infections have been reported in Singapore. We aimed to investigate options for early intervention in Singapore should local containment (eg, preventing disease spread through contact tracing efforts) be unsuccessful. Methods We adapted an influenza epidemic simulation model to estimate the likelihood of human-to-human transmission of severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) in a simulated Singaporean population. Using this model, we estimated the cumulative number of SARS-CoV-2 infections at 80 days, after detection of 100 cases of community transmission, under three infectivity scenarios (basic reproduction number [R0] of 1·5, 2·0, or 2·5) and assuming 7·5% of infections are asymptomatic. We first ran the model assuming no intervention was in place (baseline scenario), and then assessed the effect of four interventio

Background The clinical and epidemiological significance of HIV-associated Mycobacterium tuberculosis bloodstream infection (BSI) is incompletely understood. We hypothesised that M tuberculosis BSI prevalence has been underestimated, that it independently predicts death, and that sputum Xpert MTB/RIF has suboptimal diagnostic yield for M tuberculosis BSI. Mycobacterium tuberculosis M tuberculosis M tuberculosis Methods We did a systematic review and individual patient data (IPD) meta-analysis of studies performing routine mycobacterial blood culture in a prospectively defined patient population of people with HIV aged 13 years or older. Studies were identified through searching PubMed and Scopus up to Nov 10, 2018, without language or date restrictions and through manual review of reference lists. Risk of bias in the included studies was assessed with an adapted QUADAS-2 framework. IPD were requested for all identified studies and subject to harmonised inclusion criteria: age 13 years 

Background Carbapenem-resistant Enterobacterales (CRE) are a global threat. We aimed to describe
         the clinical and molecular characteristics of Centers for Disease Control and Prevention
         (CDC)-defined CRE in the USA.
       Methods CRACKLE-2 is a prospective, multicentre, cohort study. Patients hospitalised in 49
         US hospitals, with clinical cultures positive for CDC-defined CRE between April 30,
         2016, and Aug 31, 2017, were included. There was no age exclusion. The primary outcome
         was desirability of outcome ranking (DOOR) at 30 days after index culture. Clinical
         data and bacteria were collected, and whole genome sequencing was done. This trial
         is registered with 
         ClinicalTrials.gov, number 
         NCT03646227.
       ClinicalTrials.gov NCT03646227 Findings 1040 patients with unique isolates were included, 449 (43%) with infection and 591
         (57%) with colonisation. The CDC-defined CRE admission rate was 57 

Background Tuberculosis remains a global health challenge, with early diagnosis key to its reduction. Face-mask sampling detects exhaled Mycobacterium tuberculosis. We aimed to investigate bacillary output from patients with pulmonary tuberculosis and to assess the potential of face-mask sampling as a diagnostic method in active case-finding. Mycobacterium tuberculosis Methods We did a 24-h longitudinal study in patients from three hospitals in Pretoria, South Africa, with microbiologically confirmed pulmonary tuberculosis. Patients underwent 1 h of face-mask sampling eight times over a 24-h period, with contemporaneous sputum sampling. M tuberculosis was detected by quantitative PCR. We also did an active case-finding pilot study in inhabitants of an informal settlement near Pretoria. We enrolled individuals with symptoms of tuberculosis on the WHO screening questionnaire. Participants provided sputum and face-mask samples that were tested with the molecular assay Xpert MTB/RIF Ultra.

Background We did a phase 2 trial of pembrolizumab in patients with non-small-cell lung cancer
         (NSCLC) or melanoma with untreated brain metastases to determine the activity of PD-1
         blockade in the CNS. Interim results were previously published, and we now report
         an updated analysis of the full NSCLC cohort.
       Methods This was an open-label, phase 2 study of patients from the Yale Cancer Center (CT,
         USA). Eligible patients were at least 18 years of age with stage IV NSCLC with at
         least one brain metastasis 5–20 mm in size, not previously treated or progressing
         after previous radiotherapy, no neurological symptoms or corticosteroid requirement,
         and Eastern Cooperative Oncology Group performance status less than two. Modified
         Response Evaluation Criteria in Solid Tumors (mRECIST) criteria was used to evaluate
         CNS disease; systemic disease was not required for participation. Patients were treated
        

Background Fibroblast growth factor receptor (
         FGFR) 2 gene alterations are involved in the pathogenesis of cholangiocarcinoma. Pemigatinib
         is a selective, potent, oral inhibitor of FGFR1, 2, and 3. This study evaluated the
         safety and antitumour activity of pemigatinib in patients with previously treated,
         locally advanced or metastatic cholangiocarcinoma with and without 
         FGFR2 fusions or rearrangements.
       FGFR FGFR2 Methods In this multicentre, open-label, single-arm, multicohort, phase 2 study (FIGHT-202),
         patients aged 18 years or older with disease progression following at least one previous
         treatment and an Eastern Cooperative Oncology Group (ECOG) performance status of 0–2
         recruited from 146 academic or community-based sites in the USA, Europe, the Middle
         East, and Asia were assigned to one of three cohorts: patients with 
         FGFR2 fusions or rearrangements, patients with other 
         F

Background Prenatal and perinatal insults are implicated in the aetiopathogenesis of psychotic
         disorders but the consistency and magnitude of their associations with psychosis have
         not been updated for nearly two decades. The aim of this systematic review and meta-analysis
         was to provide a comprehensive and up-to-date synthesis of the evidence on the association
         between prenatal or perinatal risk and protective factors and psychotic disorders.
       Methods In this systematic review and meta-analysis, we searched the Web of Science database
         for articles published up to July 20, 2019. We identified cohort and case-control
         studies examining the association (odds ratio [OR]) between prenatal and perinatal
         factors and any International Classification of Diseases (ICD) or Diagnostic and Statistical
         Manual of Mental Disorders (DSM) non-organic psychotic disorder with a healthy comparison
         group. Other inclusion 

Background Tachycardia and atrial fibrillation frequently occur in patients being treated for
         sepsis or septic shock and have a poor prognosis. Treatments for tachyarrhythmias
         are often ineffective or contraindicated in this setting. We aimed to investigate
         the efficacy and safety of landiolol, an ultra-short-acting β-blocker, for treating
         sepsis-related tachyarrhythmias.
       Methods We did a multicentre, open-label, randomised controlled trial at 54 hospitals in Japan.
         Patients admitted to the intensive care units who received conventional treatment
         for sepsis, according to clinical guidelines for the management of sepsis, and who
         subsequently developed a tachyarrhythmia, were enrolled. The main inclusion criteria
         were 20 years of age or older, diagnosis of sepsis according to Third International
         Consensus Definitions for Sepsis and Septic Shock criteria, administration of catecholamine
         necess

Background Respiratory syncytial virus (RSV) infection during infancy is suggested to cause long-term
         wheeze. In turn, wheeze has been associated with bacterial dysbiosis of the respiratory
         tract. We investigated the effects of RSV prophylaxis with palivizumab in otherwise
         healthy preterm infants on respiratory microbiota composition at 1 year and 6 years
         of age.
       Methods In a multicentre, single-blind, randomised, placebo-controlled trial (the MAKI trial),
         infants born between 32–35 weeks of gestation, in one university and in 15 regional
         hospitals in the the Netherlands, were randomly assigned (1:1) to receive palivizumab
         or placebo during the RSV season of their first year of life. Intramuscular injections
         of palivizumab 15 mg/kg or placebo were given during one RSV season: either from Oct
         1, or from discharge from the neonatal unit until March 10 (minimun of 2 and maximum
         of 5 injections

Background Accurate prediction of exacerbation risk enables personalised care for patients with
         chronic obstructive pulmonary disease (COPD). We developed and validated a generalisable
         model to predict individualised rate and severity of COPD exacerbations.
       Methods In this risk modelling study, we pooled data from three COPD trials on patients with
         a history of exacerbations. We developed a mixed-effect model to predict exacerbations
         over 1 year. Severe exacerbations were those requiring inpatient care. Predictors
         were history of exacerbations, age, sex, body-mass index, smoking status, domiciliary
         oxygen therapy, lung function, symptom burden, and current medication use. Evaluation
         of COPD Longitudinally to Identify Predictive Surrogate End-points (ECLIPSE), a multicentre
         cohort study, was used for external validation.
       Results The development dataset included 2380 patients, 1373 (58%) of whom were me

Background Gefapixant is a P2X3 receptor antagonist that has shown promise for the treatment
         of refractory and unexplained chronic cough. The aim of this study was to evaluate
         the efficacy of gefapixant compared with placebo after 12 weeks of treatment for refractory
         chronic cough or unexplained chronic cough.
       Methods We did a 12-week, phase 2b, randomised, double-blind, placebo-controlled study in
         patients with refractory chronic cough or unexplained chronic cough aged 18–80 years
         who were recruited from 44 primarily outpatient pulmonologist or allergist sites in
         the UK and the USA. Eligible patients had refractory or unexplained chronic cough
         lasting 1 year or longer, no radiographic chest abnormality, and 40 mm or more on
         a 100-mm cough severity visual analogue scale at enrolment. Patients were randomly
         assigned to receive placebo or one of three doses (7·5 mg, 20 mg, or 50 mg) of oral
         g

Background The observation that patients with idiopathic pulmonary fibrosis (IPF) can have higher
         than normal expiratory flow rates at low lung volumes led to the conclusion that the
         airways are spared in IPF. This study aimed to re-examine the hypothesis that airways
         are spared in IPF using a multiresolution imaging protocol that combines multidetector
         CT (MDCT), with micro-CT and histology.
       Methods This was a retrospective cohort study comparing explanted lungs from patients with
         severe IPF treated by lung transplantation with a cohort of unused donor (control)
         lungs. The donor control lungs had no known lung disease, comorbidities, or structural
         lung injury, and were deemed appropriate for transplantation on review of the clinical
         files. The diagnosis of IPF in the lungs from patients was established by a multidisciplinary
         consensus committee according to existing guidelines, and was confirmed by

Background The DNA released into the bloodstream by malignant tumours· called circulating tumour DNA (ctDNA), is often a small fraction of total cell-free DNA shed predominantly by hematopoietic cells and is therefore challenging to detect. Understanding the biological properties of ctDNA is key to the investigation of its clinical relevance as a non-invasive marker for cancer detection and monitoring. Methods We selected 40 plasma DNA samples of pancreatic cancer cases previously reported to carry a KRAS mutation at the ‘hotspot’ codon 12 and re-screened the cell-free DNA using a 4-size amplicons strategy (57 bp, 79 bp, 167 bp and 218 bp) combined with ultra-deep sequencing in order to investigate whether amplicon lengths could impact on the capacity of detection of ctDNA, which in turn could provide inference of ctDNA and non-malignant cell-free DNA size distribution. KRAS Findings Higher KRAS amplicon size (167 bp and 218 bp) was associated with lower detectable cell-free DNA mutant

Background A novel coronavirus disease (COVID-19) outbreak due to the severe respiratory syndrome coronavirus (SARS-CoV-2) infection occurred in China in late December 2019. Facemask wearing with proper hand hygiene is considered an effective measure to prevent SARS-CoV-2 transmission, but facemask wearing has become a social concern due to the global facemask shortage. China is the major facemask producer in the world, contributing to 50% of global production. However, a universal facemask wearing policy would put an enormous burden on the facemask supply. Methods We performed a policy review concerning facemasks using government websites and mathematical modelling shortage analyses based on data obtained from the National Health Commission (NHC), the Ministry of Industry and Information Technology (MIIT), the Centre for Disease Control and Prevention (CDC), and General Administration of Customs (GAC) of the People's Republic of China. Three scenarios with respect to wearing facemasks

Background Menstrual tampons are widely used in western countries. Indirect evidence suggests that tampon misuse could be associated with an increased risk of menstrual toxic shock syndrome (MTSS). The aim of this study was to determine what characteristics of tampon use are associated with increased risk of menstrual toxic shock syndrome (MTSS). Methods A nationwide, case-control study in France, was conducted with women that use tampons with MTSS diagnoses according to the CDC diagnostic criteria (n = 55, from January 2011, to December 2017) and a control group of women with no MTSS history (n = 126, from February to December 2017). Information regarding tampon use during a 6-month period was collected. Associations between tampon use and MTSS were assessed using logistic regression models stratified by residential area. n n Findings Compared to controls, women diagnosed with MTSS more frequently reported maximum tampon wear of >6 h (62% vs. 41%; P = 0.02), overnight tampon use (77% 

Background Sexual harassment of women in academic medicine may impede advancement and productivity. This study analyzes the longitudinal effects of sexual harassment on academic advancement and productivity among women. Methods We undertook a longitudinal analysis to predict effects of sexual harassment reported in 1995 on career outcomes measured in 2012–13, among a sample of women in academic medicine (N = 1273) recruited from 24 U.S. medical schools. Measures included survey data from 1995 on sexual harassment (predictor), and 2012–2013 data on retention in academic medicine, rank, leadership positions, and refereed publications (outcomes), captured from surveys and public records. We used multivariable models to test effects of sexual harassment on study outcomes, adjusting for socio-demographics, employment-related variables, and gender discrimination. N Findings In 1995, 54% of women reported any workplace sexual harassment, and 32% of women reported severe harassment (e.g., thre

Background Bullying victimisation is a global public health problem that has been predominantly studied in high income countries. This study aimed to estimate the population level prevalence of bullying victimisation and its association with peer and parental supports amongst adolescents across low and middle income to high income countries (LMIC HICs). Methods Data were drawn from the Global School-based Student Health Survey of school children aged 12–17 years, between 2003 and 2015, in 83 LMIC HICs in the six World Health Organization (WHO) regions. We estimated the weighted prevalence of bullying victimisation at country, region and global level. We used multiple binary logistic regression models to estimate the adjusted association of age, gender, socioeconomic status, and parental support and peer support, and country level variables (GDP and government expenditure on education) with adolescent bullying victimisation. Findings Of the 317,869 adolescents studied, 151,036 (48%) wer

In [9]:
# Trying now with search page.
# changed 'h4' to 'h2'

def WriteLancetSciPaperToCsv(article_title, article_url, article_text):
    with open('sci_lancet_research_search_papers.csv', mode='a', encoding='utf8', newline='') as research_papers_file:
        res_paper_writer = csv.writer(research_papers_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)

        res_paper_writer.writerow([article_title, article_url, article_text])

def DownloadLancetPapers():
    soup = RequestGetResPaperWebsite('https://www.thelancet.com/action/doSearch?journalCode=&searchText1=the&occurrences1=all&op1=and&searchText2=for&occurrences2=all&seriesISSNFltraddfilter=2352-3964&seriesISSNFltraddfilter=2589-5370&seriesISSNFltraddfilter=0140-6736&seriesISSNFltraddfilter=2352-4642&seriesISSNFltraddfilter=2213-8587&seriesISSNFltraddfilter=2589-7500&seriesISSNFltraddfilter=2468-1253&seriesISSNFltraddfilter=2214-109X&seriesISSNFltraddfilter=2352-3026&seriesISSNFltraddfilter=2352-3018&seriesISSNFltraddfilter=1473-3099&seriesISSNFltraddfilter=2666-5247&seriesISSNFltraddfilter=1474-4422&seriesISSNFltraddfilter=1470-2045&seriesISSNFltraddfilter=2542-5196&seriesISSNFltraddfilter=2215-0366&seriesISSNFltraddfilter=2468-2667&seriesISSNFltraddfilter=2213-2600&seriesISSNFltraddfilter=2665-9913&artLblFltraddfilter=Research&date=range&dateRange=6m&searchAttempt=&searchType=advanced&doSearch=Search&rows=100&startPage=1#navigation')

    articles = soup.find_all('h2', {'class': 'title'})

    for article in articles:
        article_title = article.text
        article_url = 'https://www.thelancet.com' + article.find('a').get('href')

        print('Requesting ' + article_url + '...')

        soup = RequestGetResPaperWebsite(article_url)

        article_text = ''

        if soup.find('div', {'class': 'article__body'}):
            if soup.find('div', {'class': 'article__body'}).find('div', {'class': 'container'}):
                if soup.find('div', {'class': 'article__body'}).find('div', {'class': 'container'}).find('section').find('div', {'class': 'section-paragraph'}):

                    abstract = soup.find('div', {'class': 'article__body'}).find('div', {'class': 'container'}).find('section').find('div', {'class': 'section-paragraph'})

                    for text_elem in abstract.findChildren():
                        article_text += text_elem.text + ' '

                    print(article_text)

        WriteLancetSciPaperToCsv(article_title, article_url, article_text)

DownloadLancetPapers()

Requesting https://www.thelancet.com/scopus/S2214-109X(20)30024-3...
Requesting https://www.thelancet.com/scopus/S2214-109X(20)30025-5...
Requesting https://www.thelancet.com/scopus/S2214-109X(20)30003-6...
Requesting https://www.thelancet.com/scopus/S2214-109X(20)30004-8...
Requesting https://www.thelancet.com/scopus/S1474-4422(20)30004-1...
Requesting https://www.thelancet.com/scopus/S2542-5196(20)30027-9...
Requesting https://www.thelancet.com/scopus/S2542-5196(20)30037-1...
Requesting https://www.thelancet.com/scopus/S2542-5196(20)30055-3...
Requesting https://www.thelancet.com/scopus/S2352-4642(20)30023-7...
Requesting https://www.thelancet.com/scopus/S2352-4642(20)30021-3...
Requesting https://www.thelancet.com/scopus/S2214-109X(20)30074-7...
Requesting https://www.thelancet.com/scopus/S1470-2045(20)30027-9...
Requesting https://www.thelancet.com/scopus/S1470-2045(20)30011-5...


KeyboardInterrupt: 

# Pseudoscience Scraping Code

In [87]:
def WritePseudoSciPaperToCsv(article_title, article_url, article_text):
    with open('pseudosci_research_papers.csv', mode='a', encoding='utf8', newline='') as research_papers_file:
        res_paper_writer = csv.writer(research_papers_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)

        res_paper_writer.writerow([article_title, article_url, article_text])

In [112]:
# so far, have used ~400 from sciencedirect homeopathy, meridian studies, ~300 chiropractic



def GetScienceDirectPaperArticleText(article_url):
    soup = RequestGetResPaperWebsite(article_url)

    article_text = ''

    if soup.find('div', {'id': 'abstracts'}):
        abstract = soup.find('div', {'id': 'abstracts'}).select('div.abstract.author')

        for section in abstract:
            for div in section.findChildren():
                for text_elem in div.findChildren():
                    article_text += text_elem.text + ' '

    return article_text

def DownloadScienceDirectPapersFromSearch():
    soup = RequestGetResPaperWebsite('https://www.sciencedirect.com/search/advanced?pub=Journal%20of%20Chiropractic%20Medicine&cid=274165&articleTypes=FLA%2CREV&show=100&offset=300')

    articles = soup.find_all('div', {'class': 'result-item-content'})

    for article in articles:
        article_title = article.find('h2').text
        article_url = 'https://www.sciencedirect.com' + article.find('h2').find('a').get('href')

        print('Requesting ' + article_url + '...')

        article_text = GetScienceDirectPaperArticleText(article_url)

        WritePseudoSciPaperToCsv(article_title, article_url, article_text)

DownloadScienceDirectPapersFromSearch()

Requesting https://www.sciencedirect.com/science/article/pii/S0899346707600640...
Requesting https://www.sciencedirect.com/science/article/pii/S0899346707600615...
Requesting https://www.sciencedirect.com/science/article/pii/S0899346707600597...
Requesting https://www.sciencedirect.com/science/article/pii/S0899346707601037...
Requesting https://www.sciencedirect.com/science/article/pii/S0899346707601001...
Requesting https://www.sciencedirect.com/science/article/pii/S0899346707601025...
Requesting https://www.sciencedirect.com/science/article/pii/S0899346707601013...
Requesting https://www.sciencedirect.com/science/article/pii/S0899346707601049...
Requesting https://www.sciencedirect.com/science/article/pii/S0899346707600925...
Requesting https://www.sciencedirect.com/science/article/pii/S0899346707600949...
Requesting https://www.sciencedirect.com/science/article/pii/S0899346707600937...
Requesting https://www.sciencedirect.com/science/article/pii/S0899346707600950...
Requesting https

In [122]:
def DownloadLiebertPubPapers():
    soup = RequestGetResPaperWebsite('https://www.liebertpub.com/action/doSearch?AllField=design&ContentItemType=research-article&SeriesKey=acm&startPage=3&pageSize=100')

    articles = soup.find_all('h5', {'class': 'meta__title'})

    for article in articles:
        article_title = article.text
        article_url = 'https://www.liebertpub.com' + article.find('a').get('href')

        print('Requesting ' + article_url + '...')

        soup = RequestGetResPaperWebsite(article_url)

        if soup.find('div', {'class': 'hlFld-Abstract'}):
            article_text = soup.find('div', {'class': 'hlFld-Abstract'}).text
        else:
            article_text = ''

        WritePseudoSciPaperToCsv(article_title, article_url, article_text)
        
DownloadLiebertPubPapers()

Requesting https://www.liebertpub.com/doi/10.1089/acm.2008.0521...
Requesting https://www.liebertpub.com/doi/10.1089/acm.2016.0251...
Requesting https://www.liebertpub.com/doi/10.1089/acm.2007.7186...
Requesting https://www.liebertpub.com/doi/10.1089/acm.2005.11.681...
Requesting https://www.liebertpub.com/doi/10.1089/acm.2008.0204...
Requesting https://www.liebertpub.com/doi/10.1089/acm.2000.6.7...
Requesting https://www.liebertpub.com/doi/10.1089/107555303322284794...
Requesting https://www.liebertpub.com/doi/10.1089/107555304322849066...
Requesting https://www.liebertpub.com/doi/10.1089/acm.1997.3.55...
Requesting https://www.liebertpub.com/doi/10.1089/acm.2006.12.141...
Requesting https://www.liebertpub.com/doi/10.1089/107555301753393878...
Requesting https://www.liebertpub.com/doi/10.1089/acm.2005.11.179...
Requesting https://www.liebertpub.com/doi/10.1089/acm.2011.0148...
Requesting https://www.liebertpub.com/doi/10.1089/acm.1999.5.437...
Requesting https://www.liebertpub.com/doi

In [123]:
import pandas as pd
pseu = pd.read_csv("pseudosci_research_papers.csv")
pseu.shape

(1623, 3)