# Scraping data from OERCOMMONS

In [1]:
import requests
from bs4 import BeautifulSoup
from typing import Optional, List

# Helper Functions for Scraping

def get_element_text(element, index: int = 0, default: str = 'N/A') -> str:

    try:
        value = element.find_all('dd')[index].text.strip()
        return value if value else default
    except (AttributeError, IndexError):
        return default

def get_metadata_content(soup, meta_name: str, default: str = 'N/A') -> str:

    try:
        return soup.find('meta', {'itemprop': meta_name})['content'].strip()
    except (AttributeError, TypeError):
        return default

# Function to Fetch Course Data

def scrape_course_data(course, base_url: str) -> Optional[dict]:

    try:
        # Fetch course-specific information
        title = course.find('div', class_='item-title').text.strip()
        link = course.find('a', class_='item-link')['href']
        author = get_element_text(course, index=0)
        subject = get_element_text(course, index=1)
        material_type = get_element_text(course, index=2)
        description = course.find('div', class_='abstract').text.strip() if course.find('div', 'abstract') else 'N/A'

        # Fetch the lesson URL and parse the details
        lesson_url = base_url + link
        response = requests.get(lesson_url)
        response.raise_for_status()
        lesson_soup = BeautifulSoup(response.text, 'html.parser')

        keywords = get_metadata_content(lesson_soup, 'keywords')
        date_created = get_metadata_content(lesson_soup, 'dateCreated')
        resource_url = lesson_soup.find('a', {'id': 'goto'})['href'] if lesson_soup.find('a', {'id': 'goto'}) else 'N/A'

        return {
            "title": title,
            "author": author,
            "subject": subject,
            "material_type": material_type,
            "description": description,
            "keywords": keywords,
            "date_created": date_created,
            "resource_url": resource_url
        }
    except Exception as e:
        print(f"Error processing course data: {e}")
        return None