In [1]:
import requests
from lxml import etree
import pandas as pd

def remove_space_char(s):
    '''
    @param s a string
    @return a new string without unnecessary whitespaces
    '''
    return s.translate(None, '\n\t').replace('  ', '').strip()


def get_data():
    '''
    Get courses data from MIT OpenCourseWare.
    @return a list, e.g. [course_1, course_2, ...]
     course_i is a dictionary, e.g. {'course_title': 'string', 'course_#': 'string', ...}
    '''
    res = requests.get("https://ocw.mit.edu/courses/")
    res.encoding = 'utf8'
    html_data = etree.fromstring(res.content, etree.HTMLParser())
    titles = html_data.xpath('//*[@id="course_wrapper"]//h3[@class="deptTitle"]/a[1]/text()')
    course_lists = html_data.xpath('//*[@id="course_wrapper"]//table[@class="courseList"]')
    tree = etree.ElementTree(html_data)
    
    ret = []
    for i in range(len(titles)):
        courses = html_data.xpath(tree.getpath(course_lists[i]) + '/tbody[1]/tr')
        for e in courses:
            tmp_dict = {}
            try:
                tmp_dict['course_title'] = \
                    remove_space_char(html_data.xpath(tree.getpath(e) + '/td[2]/a[1]/text()')[0])
                tmp_dict['course_#'] = \
                    remove_space_char(html_data.xpath(tree.getpath(e) + '/td[1]/a[1]/text()')[0])
                tmp_dict['level'] = \
                    remove_space_char(html_data.xpath(tree.getpath(e) + '/td[3]/a[1]/text()')[0])
                tmp_dict['department'] = titles[i]
                ret.append(tmp_dict.copy())
            except:
                None
    return ret

pd.set_option('display.max_rows', 9999)
def data_table():
    df = pd.DataFrame(get_data())
    return df

data_table()

Unnamed: 0,course_#,course_title,department,level
0,16.00,Introduction to Aerospace Engineering and Design,Aeronautics and Astronautics,Undergraduate
1,16.00AJ,"Exploring Sea, Space, & Earth: Fundamentals of...",Aeronautics and Astronautics,Undergraduate
2,16.01,"Unified Engineering I, II, III, & IV (Fall 2005)",Aeronautics and Astronautics,Undergraduate
3,16.02,"Unified Engineering I, II, III, & IV (Fall 2005)",Aeronautics and Astronautics,Undergraduate
4,16.03,"Unified Engineering I, II, III, & IV (Fall 2005)",Aeronautics and Astronautics,Undergraduate
5,16.04,"Unified Engineering I, II, III, & IV (Fall 2005)",Aeronautics and Astronautics,Undergraduate
6,16.050,Thermal Energy,Aeronautics and Astronautics,Undergraduate
7,16.06,Principles of Automatic Control,Aeronautics and Astronautics,Undergraduate
8,16.07,Dynamics,Aeronautics and Astronautics,Undergraduate
9,16.100,Aerodynamics,Aeronautics and Astronautics,Undergraduate
