In [28]:
import requests
from bs4 import BeautifulSoup
import re

In [95]:
class Game(object):
    
    def __init__(self, url):
        """Class to hold and parse data from a single Jeopardy game."""
        self._url = url
        self._game_id = url.split("=")[1]
        self._page = requests.get(url)
        self._parsed_page = BeautifulSoup(self._page.content, "lxml")
        
    def get_questions(self, round_name):
        """Method to return all of the questions and answers for a given round.
        
        Args:
          round_name: (string) one of jeopardy, double_jeopardy, final_jeopardy.
        
        Returns:
          list of (question, answer) tuples, unformatted.
        """
        assert round_name in ["jeopardy", "double_jeopardy", "final_jeopardy"]
        
        table = self._parsed_page.find_all(id=round_name + "_round")
        assert(len(table) == 1)
        
        questions = []
        clues = table[0].find_all(class_="clue")
        if round_name == "final_jeopardy":
            assert(len(clues) == 1)
        else:
            assert(len(clues) == 30)
            
        for clue in clues:
            question = clue.find(class_="clue_text")
            answer = str(clue).split("correct_response")[1].split("/")[0].split(";")[-2][:-3]  # I'm not happy about this.
            questions.append((question.text, answer))
            
        return questions

In [103]:
class Season(object):
    
    def __init__(self, url):
        """Class to parse data for a season of Jeopardy."""
        self._url = url
        self._page = requests.get(url)
        self._parsed_page = BeautifulSoup(self._page.content, "lxml")
        
    def get_links(self):
        elements = self._parsed_page.find_all("a")
        all_links = [e.get("href") for e in elements]
        links = [l for l in all_links if "game_id" in l]
        return links