In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

# Set up the web driver
driver = webdriver.Chrome()
driver.get("https://info.lse.ac.uk/Staff/Departments-and-Institutes")

# Find and click the department
department = WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.LINK_TEXT, 'Department of Finance')))
driver.execute_script("arguments[0].scrollIntoView();", department)
department.click()

# Find and click "People"
people = WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.LINK_TEXT, 'People')))
people.click()
people_url = driver.current_url

# Find and click "Academic Faculty"
academic_faculty = WebDriverWait(driver, 15).until(EC.element_to_be_clickable((By.LINK_TEXT, 'Finance faculty')))
driver.execute_script("arguments[0].scrollIntoView();", academic_faculty)
academic_faculty.click()

In [2]:
from bs4 import BeautifulSoup
import requests
url = driver.current_url
r = requests.get(url)
soup = BeautifulSoup(r.content,'lxml')
academic_faculty = soup.find("div", attrs={'class': "accordion__content"})
academic_faculty
text_block = academic_faculty.find_all("div", attrs={'class': "accordion__txt"})

professors = []
for professor in text_block:
    prof_names = professor.find("a", attrs={'class': "sys_0 sys_t0"})
    professors.append(prof_names)

In [3]:
professor_urls = []
for professor in professors:
    url = professor.get("href")
    url = "https://www.lse.ac.uk" + url
    if url not in professor_urls:
        professor_urls.append(url)

In [4]:
professors_dict = {}
professor_name_list = []
professor_prefix_list = []
key_expertise_list = []
professor_title_list = []
languages_list = []
title_list = []
modules_list = []

for one_url in professor_urls:
    
    r_2 = requests.get(one_url)
    soup_2 = BeautifulSoup(r_2.content,'lxml')

    # Get professor prefix and name
    professor = soup_2.find("h1", attrs={'class': 'people__name'})
    professor_prefix = professor.find('span', class_='people__title').text
    professor_name = professor.text.strip("Dr")
    professor_name = professor.text.strip("Professor")
    professor_name_list.append(professor_name)
    professor_prefix_list.append(professor_prefix)

    # Get the key expertise
    key_expertise_locate = soup_2.find('h2', text="Research Interests")
    if key_expertise_locate: # if key_expertise_locate exists and doesn't return None
        key_expertise = key_expertise_locate.find_next_sibling('p')
        text = key_expertise.get_text(separator='\n')
        key_expertise_inner_list = text.strip().split('\n')
        key_expertise_list.append(key_expertise_inner_list)
    else:
        key_expertise_list.append(None)

    # Get the languages
    languages_locate = soup_2.find('div', class_='peopleContact__method', text="Languages")
    if languages_locate:
        languages = languages_locate.find_next_sibling('div').text
        languages_list.append(languages)
    else:
        languages_list.append(None)

    
    # Get professor title
    professor_title = soup_2.find('h2', class_='people__position').text
    professor_title_list.append(professor_title)

    # Get courses that the professor is teaching
    teaching = soup_2.find(name=["h3","h2", "p"], text= ["Teaching", "Teaching:"])
    if teaching:
        module_list = teaching.find_next('ul')
        modules = [li.text for li in module_list.find_all('li')] # extract the items from the <li> elements
        modules = [module.replace('\xa0', " ").strip("\n") for module in modules]
        modules_list.append(modules)
    else:
        modules_list.append(None)

professors_dict["Professor Name"] = professor_name_list
professors_dict["Professor Prefix"] = professor_prefix_list
professors_dict["Key Expertise"] = key_expertise_list
professors_dict["Languages"] = languages_list
professors_dict["Title"] = professor_title_list
professors_dict["Modules"] = modules_list

import pandas as pd
professors_df = pd.DataFrame(professors_dict)

In [5]:
professors_df

Unnamed: 0,Professor Name,Professor Prefix,Key Expertise,Languages,Title,Modules
0,Dr Ashwini Agrawal,Dr,"[Corporate Finance, Corporate Governance, Labo...",English,Associate Professor of Finance,"[FM304 Applied Corporate Finance, FM414 Corpor..."
1,Ulf Axelson,Professor,"[Corporate Finance, Corporate Governance, Priv...",English,Professor of Finance and Private Equity,"[FM410 Private Equity, FM431L Corporate Financ..."
2,Dr Cynthia Balloch,Dr,"[International Finance, Macroeconomics, Corpor...",English,Assistant Professor of Finance,"[FM421 Applied Corporate Finance, FM473L Finan..."
3,Dr Elisabetta Bertero,Dr,[Sub-sovereign debt; the financial role of the...,English,Lecturer in Finance,"[AC100 Elements of Accounting and Finance, FM1..."
4,Mike Burkart,Professor,"[Corporate Governance, Financial Contracting]",English,Professor of Finance,"[FM436 Financial Economics, FM473M Financial M..."
5,Dr Georgy Chabakauri,Dr,"[Asset Pricing, Portfolio Choice, Risk Managem...",English,Associate Professor of Finance,"[FM300 Corporate Finance, Investments and Fina..."
6,Dr Kim Fe Cram,Dr,,English,Assistant Professor,[FM213 Principles of Finance]
7,Vicente Cuñat,Professor,"[Corporate Finance, Applied Theory, Applied Ec...",English,Professor of Finance,"[FM407 Mergers, Buyouts and Corporate Restruct..."
8,Dr Jon Danielsson,Dr,"[Systemic risk, Financial risk, Econometrics, ...",English,Reader in Finance; Director of the Systemic Ri...,"[FM403 Management and Regulation of Risk, FM44..."
9,Amil Dasgupta,Professor,"[Institutional investors, Shareholder activism...",English,Professor of Finance,"[FM422E Corporate Finance, FM475E Financial Ma..."


In [6]:
professors_df.to_csv("finance.csv")