In [None]:
import fitz
import pandas as pd
from itertools import groupby
from operator import itemgetter

In [None]:
def recover(words, rect):
    
    # build sublist of words container in given rectangle
    mywords = [w for w in words if fitz.Rect(w[:4]) in rect]

    # sort the words by lower line, then by word start coordinate
    mywords.sort(key=itemgetter(3, 0)) # sort by y1, x0 of word rectangle

    # create word groups on same line
    grouped_lines = groupby(mywords, key=itemgetter(3))

    words_out = [] # will be returned

    # iterate through the grouped lines
    # for each line coorinate ("_"), the list of words is given
    for _, words_in_line in grouped_lines:
        for i, w in enumerate(words_in_line):
            if i == 0: # store first word
                x0, y0, y1, word = w[:5]
                continue

            r = fitz.Rect(w[:4]) # word rect

            # compute word distance threshold as 20% of width of 1 letter.
            # should be safe joining text pieces into one word if shorter distance than that
            treshold = r.width / len(w[4]) / 5
            if r.x0 <= x1 + treshold: # join with previous words
                word += w[4]
                x1 = r.x1
                y0 = max(y0, r.y0)
                continue

            # now have a new words, output previous one
            words_out.append([x0, y0, x1, y1, word])

            # store the new word
            x0, y0, x1, y1, word = w[:5]
        
        # output word waiting for completion
        words_out.append([x0, y0, x1, y1, word])
    return words_out

def search_for(text, words):
    rect_list = []

    # search for text in items of list of words

    for w in words:
        if text in w[4]:
            rect_list.append(fitz.Rect(w[:4]))
    return rect_list



In [None]:
def say_hello(name):
    if not name:
        return f"Hello there!"
    return f"Hello, {name}!"

In [None]:
import unittest

class Test(unittest.TestCase):
    def test_should_say_hello(self):
        self.assertEqual(say_hello("Qualified"),
        "Hello, Qualified")