In [6]:
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.firefox.options import Options

class Results(object):
    
    def __init__(self, paperName):
        self.paperName = paperName
        self.resultsTable = None
        
    def getResultsTableData(self):
        if self.resultsTable is None:
            return None
        else:
            spans = self.resultsTable.find_all("span")
            divs = self.resultsTable.find_all("div")
            return spans + divs

def formatForRWDB(s):
    rwBaseURL = 'http://retractiondatabase.org/RetractionSearch.aspx#?ttl%3d'
    # will need to research and expand on this
    s = s.replace(' ', '%2b')
    s = s.replace(':', '%253a')
    return rwBaseURL + s

def getResultsTable(soup):
    # get all the tables from th page
    tables = soup.find_all("tbody")
    # get the table with the results
    resultsTable = [t for t in tables if len(t.find_all("span", attrs = {"class" : "totalItems"})) > 0]
    if len(resultsTable) == 0:
        return None
    else:
        return resultsTable[0]

def getResultsTableData(resultsTable):
    spans = resultsTable.find_all("span")
    divs = resultsTable.find_all("div")
    return spans + divs
    
def queryRWDB(targets):
    resList = []
    for t in targets:
        opts = Options()
        opts.headless = True
        driver = webdriver.Firefox(options = opts)
        driver.get(formatForRWDB(t))
        htmlSrc = driver.page_source
        soup = BeautifulSoup(htmlSrc)
        result = Results(t)
        result.resultsTable = getResultsTable(soup)
        resList.append(result)
        driver.quit()
    return resList

# a known retraction for testing
testTargets = [
    # this is retracted 
    'A review of convalescent plasma transfusion in COVID-19: Old wine reserved for special occasions',
    # this is retracted
    '5G Technology and induction of coronavirus in skin cells',
    # this is not
    'An interactive web-based dashboard to track COVID-19 in real time'
]

# actually query the database
resList = queryRWDB(testTargets)

In [7]:
for res in resList:
    print("For paper:", res.paperName)
    resData = res.getResultsTableData()
    if resData is None:
        print("\tNo retraction found")
    else:
        print("\tRetraction found:")
        for i in resData:
            print("\t\t", i)

For paper: A review of convalescent plasma transfusion in COVID-19: Old wine reserved for special occasions
	Retraction found:
		 <span class="totalItems">1 Item(s) Found</span>
		 <span class="rTitleNotIE">A review of convalescent plasma transfusion in COVID-19: Old wine reserved for special occasions</span>
		 <span class="rSubject">(HSC) Medicine - Immunology; (HSC) Medicine - Infectious Disease; (HSC) Medicine - Rehabilitation/Therapy; </span>
		 <span class="rJournal" style="white-space:normal"><span class="rJournal">Lung India ---</span><span class="rPublisher">Wolters Kluwer - Medknow</span></span>
		 <span class="rJournal">Lung India ---</span>
		 <span class="rPublisher">Wolters Kluwer - Medknow</span>
		 <span class="rInstitution" style="border-top-style: solid; border-top-width: 2px;">Department of Pulmonary Medicine, All India Institute of Medical Sciences, Jodhpur, Rajasthan, India</span>
		 <span class="rNature">10.4103/lungindia.lungindia_350_20</span>
		 <span class="rN

In [12]:
citations = [
    'An interactive web-based dashboard to track COVID-19 in real time',
    'Dying in a Leadership Vacuum',
    'Covid-19: Is a second wave hitting Europe?',
    'Pathophysiology, Transmission, Diagnosis, and Treatment of Coronavirus Disease 2019 (COVID-19)'
    'Prevalence of Asymptomatic SARS-CoV-2 Infection : A Narrative Review',
    'Seroprevalence of Antibodies to SARS-CoV-2 in 10 Sites in the United States',
    'The intersection of viral illnesses: A seasonal influenza epidemic amidst the COVID-19 pandemic',
    'The outlook for diagnostic purposes of the 2019-novel coronavirus disease',
    'COVID-19 diagnostic approaches: different roads to the same destination',
    'Nucleic-acid-amplification tests from respiratory samples for the diagnosis of coronavirus infections: systematic review and meta-analysis',
    'Laboratory Testing Methods for Novel Severe Acute Respiratory Syndrome-Coronavirus-2 (SARS-CoV-2)',
    'Measuring the Serologic Response to Severe Acute Respiratory Syndrome Coronavirus 2: Methods and Meaning',
    'Serology testing in the COVID-19 pandemic response',
    'Testing for SARS-CoV-2 (COVID-19): a systematic review and clinical guide to molecular and serological in-vitro diagnostic assays',
    'Comparative assessment of multiple COVID-19 serological technologies supports continued evaluation of point-of-care lateral flow assays in hospital and community healthcare settings',
    'Evaluation of SARS-CoV-2 serology assays reveals a range of test performance',
    'The Advisory Committee on Immunization Practices’ Interim Recommendation for Use of Pfizer-BioNTech COVID-19 Vaccine - United States, December 2020',
    'An mRNA Vaccine against SARS-CoV-2 - Preliminary Report',
    'Safety and Immunogenicity of SARS-CoV-2 mRNA-1273 Vaccine in Older Adults',
    'Safety and Immunogenicity of Two RNA-Based Covid-19 Vaccine Candidates',
    'Approaches and Challenges in SARS-CoV-2 Vaccine Development',
    'SARS-CoV-2 immunity: review and applications to phase 3 vaccine candidates',
    'Compromised Humoral Functional Evolution Tracks with SARS-CoV-2 Mortality',
    'Kinetics of antibody responses dictate COVID-19 outcome',
    'Meta-analysis of diagnostic performance of serology tests for COVID-19: impact of assay design and post-symptom-onset intervals',
    'Meta-analysis of diagnostic performance of serological tests for SARS-CoV-2 antibodies up to 25 April 2020 and public health implications',
    'Diagnostic accuracy of serological tests for covid-19: systematic review and meta-analysis',
    'Diagnostic Characteristics of Serological-Based COVID-19 Testing: A Systematic Review and Meta-Analysis',
    'Evaluation of diagnostic accuracy of 10 serological assays for detection of SARS-CoV-2 antibodies',
    'EUA Authorized Serology Test Performance',
    'SARS-CoV-2 serology: Test, test, test, but interpret with caution!',
    'Antibody responses to SARS-CoV-2 in patients with COVID-19',
    'Serology characteristics of SARS-CoV-2 infection after exposure and postsymptom onset',
    'Antibody Responses to SARS-CoV-2 in Patients With Novel Coronavirus Disease 2019',
    'Prevalence. StatPearls',
    'The Role of Antibody Testing for SARS-CoV-2: Is There One?',
    'Diagnostic technology for COVID-19: comparative evaluation of antigen and serology-based SARS-CoV-2 immunoassays, and contact tracing solutions for potential use as at-home products',
    'A new method of classifying prognostic comorbidity in longitudinal studies: development and validation',
    'Ultrasensitive high-resolution profiling of early seroconversion in patients with COVID-19',
    'Potent binding of 2019 novel coronavirus spike protein by a SARS coronavirus-specific human monoclonal antibody',
    'Negative Predictive Value, Low Prevalence, and Spectrum Effect: Caution in the Interpretation',
    'Projecting the transmission dynamics of SARS-CoV-2 through the postpandemic period',
    'The Power of Antibody-Based Surveillance',
    'The bumpy road to achieve herd immunity in COVID-19',
    'Evaluation of two commercial and two non-commercial immunoassays for the detection of prior infection to SARS-CoV-2',
    'SARS-CoV-2 mRNA vaccine design enabled by prototype pathogen preparedness',
    'Risks of emergency use authorizations for medical products during outbreak situations: a COVID-19 case study',
    'Cross-reactivity of SARS-CoV-2 with HIV chemiluminescent assay leading to false-positive results',
    'Testing for antibodies to SARS-CoV-2',
    'Racial Disparity of Coronavirus Disease 2019 in African American Communities',
    'Clinical and immunological assessment of asymptomatic SARS-CoV-2 infections',
    'Rapid Decay of Anti-SARS-CoV-2 Antibodies in Persons with Mild Covid-19',
    'Loss of Anti-SARS-CoV-2 Antibodies in Mild Covid-19',
    'Serological follow-up of SARS-CoV-2 asymptomatic subjects',
    'COVID-19 re-infection by a phylogenetically distinct SARS-coronavirus-2 strain confirmed by whole genome sequencing',
    'Reinfection with SARS-CoV-2: Implications for Vaccines',
    'Reinfection of SARS-CoV-2 in an immunocompromised patient: a case report',
    'Targeting SARS-CoV2 Spike Protein Receptor Binding Domain by Therapeutic Antibodies',
    '“Immunity passports” in the context of COVID-19',
    'Immunity Passports’ for SARS-CoV-2: an online experimental study of the impact of antibody test terminology on perceived risk and behaviour',
    'The scientific and ethical feasibility of immunity passports'  
]

In [14]:
print(len(citations))

60


In [9]:

batch1 = citations[0:10]
batch2 = citations[10:20]
batch3 = citations[20:30]
batch4 = citations[30:40]
batch5 = citations[40:50]
batch6 = citations[50:]

batch1 + batch2 + batch3 + batch4 + batch5 + batch6 == citations

allBatches = [batch1, batch2, batch3, batch4, batch5, batch6]

In [10]:
# quicky hacky function to display results in a reasonable way
def displayResults(results):
    for res in results:
        print("\tFor paper:", res.paperName)
        resData = res.getResultsTableData()
        if resData is None:
            print("\t\tNo retraction found")
        else:
            print("*****\t\tPossible retraction found:")
            for i in resData:
                print("\t\t", i)

In [11]:
for batch in allBatches:
    print("processing:")
    for item in batch:
        print("\t", item)
    displayResults(queryRWDB(batch))
print("done")

processing:
	 An interactive web-based dashboard to track COVID-19 in real time
	 Dying in a Leadership Vacuum
	 Covid-19: Is a second wave hitting Europe?
	 Pathophysiology, Transmission, Diagnosis, and Treatment of Coronavirus Disease 2019 (COVID-19)Prevalence of Asymptomatic SARS-CoV-2 Infection : A Narrative Review
	 Seroprevalence of Antibodies to SARS-CoV-2 in 10 Sites in the United States
	 The intersection of viral illnesses: A seasonal influenza epidemic amidst the COVID-19 pandemic
	 The outlook for diagnostic purposes of the 2019-novel coronavirus disease
	 COVID-19 diagnostic approaches: different roads to the same destination
	 Nucleic-acid-amplification tests from respiratory samples for the diagnosis of coronavirus infections: systematic review and meta-analysis
	 Laboratory Testing Methods for Novel Severe Acute Respiratory Syndrome-Coronavirus-2 (SARS-CoV-2)
	For paper: An interactive web-based dashboard to track COVID-19 in real time
		No retraction found
	For paper: 