**Part 1. Get Shakespeareâ€™s sonnets**

In [None]:
import requests
import json
import nltk
from nltk import PorterStemmer
sonnets_import = requests.get("https://poetrydb.org/author,title/Shakespeare;Sonnet")
sonnets_text = json.loads(sonnets_import.text)

**Part 2. Convert the list of dictionaries to a list of Sonnet instances**

**Part 3. Add tokenization to your Sonnet class**

**Part 4. Add stemming to the tokenization process**

**Part 6.1 Adding a Document class**

In [None]:
class Document:
  def __init__(self, lines: list[str]):
    self.lines = lines

  def tokenize(self) -> list[str]:
    tokens = []
    for line in self.lines:
      for mark in ".,':;!?":
        line = line.strip(mark)
      tokens.extend(line.lower().split())
    stemmer = PorterStemmer()
    stemmed_tokens = [stemmer.stem(token) for token in tokens]
    return stemmed_tokens

class Sonnet(Document):
  def __init__(self, dict):
    full_title = dict["title"].split(": ", 1)
    self.id = int(full_title[0][6:])
    self.title = full_title[1]
    self.author = dict["author"]
    super().__init__(dict["lines"])
    self.linecount = len(self.lines)

  def __str__(self):
    lines = "\n".join(self.lines)
    return f"Sonnet {self.id}: {self.title}\n{lines}"

sonnets = [Sonnet(sonnet) for sonnet in sonnets_text]

**Part 5. Creating the inverted index (3 points)**

**Part 6.2 Adding a Query
class**

**Part 7. Adding the search to the Index**

In [None]:
class Query(Document):
  def __init__(self, query: str):
    super().__init__([query])

class Index(dict[str, set[int]]):
    def __init__(self, documents: list[Sonnet]):
        super().__init__()
        self.documents = documents
        for document in documents:
            self.add(document)

    def add(self, document: Sonnet):
        tokens = document.tokenize()
        for token in tokens:
            if token not in self:
                self[token] = {document.id}
            else:
                self[token].add(document.id)
        return self

    def get_set_for_token(self, token):
        return self.get(token, set())

    def search(self, query: Query) -> list[Sonnet]:
      query_tokens = query.tokenize()
      sets_to_intersect = [self.get_set_for_token(token) for token in query_tokens]
      result_set = sorted(set.intersection(*sets_to_intersect))
      self.pretty_print(result_set, query)
      return result_set

    def pretty_print(self, ids: set[int], query: list[Query]):
      print(f'Your search for "{" ".join(query.lines)}" matched {len(ids)} sonnet(s) ({", ".join([str(id) for id in ids])}):\n')
      for id in ids:
        print(self.documents[id-1], "\n")

index = Index(sonnets)

**Part 8. Adding the user interface**


In [None]:
print("Reading sonnets...")
def request_input():
  user_input = input("Search for sonnets ('q' to quit)> ")
  return(user_input)

user_input = request_input()
while user_input != "q":
  index.search(Query(user_input))
  user_input = request_input()

Reading sonnets...
Your search for "take give" matched 2 sonnet(s) (37, 77):

Sonnet 13: O! that you were your self; but, love you are
O! that you were your self; but, love you are
No longer yours, than you your self here live:
Against this coming end you should prepare,
And your sweet semblance to some other give:
So should that beauty which you hold in lease
Find no determination; then you were
Yourself again, after yourself's decease,
When your sweet issue your sweet form should bear.
Who lets so fair a house fall to decay,
Which husbandry in honour might uphold,
Against the stormy gusts of winter's day
And barren rage of death's eternal cold?
  O! none but unthrifts. Dear my love, you know,
  You had a father: let your son say so. 

Sonnet 52: So am I as the rich, whose blessed key
So am I as the rich, whose blessed key,
Can bring him to his sweet up-locked treasure,
The which he will not every hour survey,
For blunting the fine point of seldom pleasure.
Therefore are feasts so sol