In [None]:
# Importing required libraries

import pandas as pd
import requests
from bs4 import BeautifulSoup

In [None]:
# Extract html for given url
url = 'https://ep70.eventpilotadmin.com/web/page.php?page=Session&project=AAIC19&id=5172&filterUrn=urn%3Aeventpilot%3Aall%3Aagenda%3Afilter%3Acategoryid%3DClinical+%28neuropsychiatry+and+behavioral+neurology%29'

page = requests.get(url)

In [None]:
# Parsing the data

soup = BeautifulSoup(page.content, 'html.parser')

list_items = soup.find_all('a', attrs = {'class' : 'catimg'})

base_url = 'https://ep70.eventpilotadmin.com/web/'
presentation_links = []

for ele in list_items: 
    presentation_links.append(base_url + ele['href'])

print('# of presentation links obtained : ' + str(len(presentation_links)))

# of presentation links obtained : 56


In [None]:
# Obtains abstract data for a presentation

def getAbstractData(abst_url):
    abstract_data = requests.get(abst_url)
    abst_soup = BeautifulSoup(abstract_data.content, 'html.parser')
    temp = abst_soup.find('div', 'abstract').text.strip()
    abst_index = temp.find('Background:')
    return temp[abst_index : ]

In [None]:
# Scraping the links
presentation_data = []

for link in presentation_links:

    page = requests.get(link)
    soup = BeautifulSoup(page.content, 'html.parser')
    
    # Abstract title
    abstract_title = soup.find('li', 'session_detail_title_708').text

    # Time
    date_time_tag = soup.find_all('div', 'session_detail_day text_bannercolor')
    time = date_time_tag[0].text.strip()

    # Date
    date = date_time_tag[1].text.strip()
    date = date.replace(u'\xa0', u'')        # removing the &nbsp

    # Location
    location_abtract_no_tag = soup.find('span', 'ui-li-aside session_detail_location')
    location = location_abtract_no_tag.text.strip().split('\n')[0]

    # Abstract no
    abstract_no = location_abtract_no_tag.text.strip().split('\n')[1]

    # Authors and Author's Affiliations
    authors = ""
    affiliations = ""
    authors_data = soup.find('div', 'detail_description').text.strip().split('\n')[0]
    index = authors_data.find('Author')
    aff_index = authors_data.find('(1)')

    if aff_index == -1:
        affiliations = ''
        authors = authors_data[index : ]
    else:
        authors = authors_data[index : aff_index]
        affiliations = authors_data[aff_index : ]


    # Category
    filters = soup.find_all('div', 'filter_value')
    category = filters[0].text 

    # Sub-category
    sub_category = filters[1].text
    
    # Abstract text
    abstract_tag = soup.find_all('div', 'mediabgicon')[0].parent
    abst_url = base_url + abstract_tag['href']
    abstract_text = getAbstractData(abst_url)
    
    # Session title
    session_title = soup.find('div', 'session_title list_cell_title').text
    session = [abstract_no, abstract_title, date, time, location, abst_url, authors, affiliations, 
           abstract_text, category, sub_category, session_title]
    
    print('Extracted ' + abstract_no + ' : ' + abstract_title)
    presentation_data.append(session)   
    
print('\nExtracted all presentation data.')

Extracted P2-265 : The Clinical Characteristics of Cognitive Impairment in Patients with Small Vessel Disease
Extracted P2-266 : A History-Based Computerized Questionnaire for the Diagnosis of Severity and Subtypes of Dementia: Design and Verify
Extracted P2-267 : Moral Emotions in Frontotemporal Dementia
Extracted P2-268 : Validity of the Everyday Cognition (ECog) As a Screening Measure of Subjective Cognitive Decline (SCD)
Extracted P2-269 : Geriatric Depression Scale Item-Level Analysis in Relation to In Vivo Cortical Amyloid and Cerebral Regional Tau in Clinically Normal Older Adults: Findings from the Harvard Aging Brain Study
Extracted P2-270 : Protective Role of Orexinergic System Overexpression on Cognitive Function in Alzheimer Disease Patients
Extracted P2-271 : The Effect of African American Race on Neurobehavioral Outcomes, Structural MRI Indices, and Cerebrospinal Fluid Concentrations of Tau and Aβ in Former National Football League Players
Extracted P2-272 : Prevalence of

In [None]:
# Saving data to pandas dataframe
column_names = ['Abstract #', 'Abstract Title', 'Date', 'Time', 'Location', 'URL', 'Authors',
                'Author\'s Affiliations', 'Abstract Text', 'Category', 'Sub-category', 'Session Title']

df = pd.DataFrame(presentation_data, columns=column_names)
df.set_index("Abstract #", inplace = True)
df.head()

Unnamed: 0_level_0,Abstract Title,Date,Time,Location,URL,Authors,Author's Affiliations,Abstract Text,Category,Sub-category,Session Title
Abstract #,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
P2-265,The Clinical Characteristics of Cognitive Impa...,"Mon, Jul 15 (Pacific)",9:30am - 4:15pm,South Hall GH - Los Angeles Convention Center,https://ep70.eventpilotadmin.com/web/page.php?...,"Author Block: Junliang Yuan Sr., MD, McLean Ho...",,Background: To explore the clinical characteri...,Diagnosis and Prognosis,Clinical (neuropsychiatry and behavioral neuro...,[Posters Mon] Diagnosis and Prognosis： Clinica...
P2-266,A History-Based Computerized Questionnaire for...,"Mon, Jul 15 (Pacific)",9:30am - 4:15pm,South Hall GH - Los Angeles Convention Center,https://ep70.eventpilotadmin.com/web/page.php?...,"Author Block: Pai-Yi Chiu, MD, PhD, Show Chwan...",,Background: It is difficult for non-specialist...,Diagnosis and Prognosis,Clinical (neuropsychiatry and behavioral neuro...,[Posters Mon] Diagnosis and Prognosis： Clinica...
P2-267,Moral Emotions in Frontotemporal Dementia,"Mon, Jul 15 (Pacific)",9:30am - 4:15pm,South Hall GH - Los Angeles Convention Center,https://ep70.eventpilotadmin.com/web/page.php?...,Author Block: Chloé Daigmorte1; Marc Teichmann...,"(1)APHP- Groupe Hospitalier Pitie Salpetriere,...",Background: Frontotemporal dementia (bv-FTD) i...,Diagnosis and Prognosis,Clinical (neuropsychiatry and behavioral neuro...,[Posters Mon] Diagnosis and Prognosis： Clinica...
P2-268,Validity of the Everyday Cognition (ECog) As a...,"Mon, Jul 15 (Pacific)",9:30am - 4:15pm,South Hall GH - Los Angeles Convention Center,https://ep70.eventpilotadmin.com/web/page.php?...,"Author Block: Minji Song, MA1,2; Lee Sun Hwa, ...",(1)Hallym University Chuncheon Sacred Heart Ho...,"Background: Everyday Cognition (ECog, Farias e...",Diagnosis and Prognosis,Clinical (neuropsychiatry and behavioral neuro...,[Posters Mon] Diagnosis and Prognosis： Clinica...
P2-269,Geriatric Depression Scale Item-Level Analysis...,"Mon, Jul 15 (Pacific)",9:30am - 4:15pm,South Hall GH - Los Angeles Convention Center,https://ep70.eventpilotadmin.com/web/page.php?...,"Author Block: Jennifer R Gatchel, MD, PhD1,2,3...","(1)Massachusetts General Hospital, Boston, MA,...",Background: While evidence suggests that depre...,Diagnosis and Prognosis,Clinical (neuropsychiatry and behavioral neuro...,[Posters Mon] Diagnosis and Prognosis： Clinica...


In [None]:
# Saving data to excel file
fname = 'Conf Web Planner (Output File).xlsx'
df.to_excel(fname)
print('Saved to ' + fname)

Saved to Conf Web Planner (Output File).xlsx
