In [5]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

# Set up the web driver
driver = webdriver.Chrome()
driver.get("https://info.lse.ac.uk/Staff/Departments-and-Institutes")

# Find and click the department
department = WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.LINK_TEXT, 'Department of Accounting')))
driver.execute_script("arguments[0].scrollIntoView();", department)
department.click()

# Find and click "People"
people = WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.LINK_TEXT, 'People')))
people.click()
people_url = driver.current_url

# Find and click "Academic Faculty"
academic_faculty = WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.LINK_TEXT, 'Academic Faculty')))
driver.execute_script("arguments[0].scrollIntoView();", academic_faculty)
academic_faculty.click()

In [6]:
from bs4 import BeautifulSoup
import requests
url = driver.current_url
r = requests.get(url)
soup = BeautifulSoup(r.content,'lxml')
academic_faculty = soup.find("div", attrs={'class': "accordion__content"})
academic_faculty
text_block = academic_faculty.find_all("div", attrs={'class': "accordion__txt"})

professors = []
for professor in text_block:
    prof_names = professor.find("a", attrs={'class': "sys_0 sys_t0"})
    professors.append(prof_names)

In [7]:
professor_urls = []
for professor in professors:
    url = professor.get("href")
    url = "https://www.lse.ac.uk" + url
    if url not in professor_urls:
        professor_urls.append(url)

In [8]:
professors_dict = {}
professor_name_list = []
professor_prefix_list = []
key_expertise_list = []
professor_title_list = []
languages_list = []
title_list = []
modules_list = []

for one_url in professor_urls:
    
    r_2 = requests.get(one_url)
    soup_2 = BeautifulSoup(r_2.content,'lxml')

    # Get professor prefix and name
    professor = soup_2.find("h1", attrs={'class': 'people__name'})
    professor_prefix = professor.find('span', class_='people__title').text
    professor_name = professor.text.strip("Dr")
    professor_name = professor.text.strip("Professor")
    professor_name_list.append(professor_name)
    professor_prefix_list.append(professor_prefix)

    # Get the key expertise
    key_expertise_locate = soup_2.find('div', class_='peopleContact__method', text="Key Expertise")
    if key_expertise_locate: # if key_expertise_locate exists and doesn't return None
        key_expertise = key_expertise_locate.find_next_sibling('div').text
        key_expertise_list.append(key_expertise)
    else:
        key_expertise_list.append(None)

    # Get the languages
    languages_locate = soup_2.find('div', class_='peopleContact__method', text="Languages")
    if languages_locate:
        languages = languages_locate.find_next_sibling('div').text
        languages_list.append(languages)
    else:
        languages_list.append(None)

    
    # Get professor title
    professor_title = soup_2.find('h2', class_='people__position').text
    professor_title_list.append(professor_title)

    # Get courses that the professor is teaching
    teaching = soup_2.find(name=["h3","h2", "p"], text= ["Teaching", "Teaching:"])
    if teaching:
        module_list = teaching.find_next('ul')
        modules = [li.text for li in module_list.find_all('li')] # extract the items from the <li> elements
        modules = [module.replace('\xa0', " ").strip("\n") for module in modules]
        modules_list.append(modules)
    else:
        modules_list.append(None)

professors_dict["Professor Name"] = professor_name_list
professors_dict["Professor Prefix"] = professor_prefix_list
professors_dict["Key Expertise"] = key_expertise_list
professors_dict["Languages"] = languages_list
professors_dict["Title"] = professor_title_list
professors_dict["Modules"] = modules_list

import pandas as pd
professors_df = pd.DataFrame(professors_dict)

In [9]:
professors_df.to_csv('accounting.csv')

In [10]:
professors_df

Unnamed: 0,Professor Name,Professor Prefix,Key Expertise,Languages,Title,Modules
0,Dr Per Ahblom,Dr,"Accounting & Finance Social Studies, Capital M...","English, Swedish",Assistant Professor of Accounting,[AC200 Accounting Theory and Practice (Current...
1,Alnoor Bhimani,Professor,"Accounting, Management Accounting, Tech Entrep...",English,Professor of Management Accounting,"[AC490 Management Accounting, Decisions and Co..."
2,Dr Jose Carabias Palmei,Dr,"Accounting, Financial Statements Analysis, Ass...",English,Assistant Professor of Accounting,"[AC330 Financial Accounting, Analysis and Valu..."
3,Dr Stefano Cascin,Dr,"Disclosure Regulation, Business Groups, Credit...","English, Italian",Associate Professor of Accounting,"[AC332 Financial Statement, Analysis and Valua..."
4,Dr Maria Correia,Dr,"Credit Markets, Default Prediction, Business G...",English,Associate Professor of Accounting,[AC416 Topics in Financial Reporting]
5,Dr Pascal Frantz,Dr,"Corporate Finance, Mergers & Acquisitions, Pri...","English, French",Lecturer in Accounting and Finance,[AC417 Corporate Financial Disclosure and Inve...
6,Dr Saipriya Kamath,Dr,"Accounting, Audit, Regulation, Innovation, Ris...",English,Assistant Professor of Accounting,[AC100 Elements of Accounting and Finance Curr...
7,Dr Gulnaz Khamidullina,Dr,"Corporate Reporting, Responsible Investment, E...","English, Russian",Assistant Professorial Lecturer,"[AC102 Elements of Financial Accounting, AC103..."
8,Dr Jeroen Koenraadt,Dr,"Disclosure Regulation, Financial Intermediarie...","Dutch, English",Assistant Professor of Accounting,[AC331 Contemporary Issues in Financial Accoun...
9,Dr Ken L,Dr,Valuation Methodologies,English,Associate Professorial Lecturer – MSc Programm...,
