In [1]:
from urllib import request
from urllib.error import HTTPError
import pandas as pd
import tabula
import os
import shutil
import random
import re
from datetime import date, datetime
import pytesseract
from pdf2image import convert_from_path, convert_from_bytes
from tempfile import TemporaryDirectory
from traceback import print_tb

In [2]:
class TorontoHpiReport:
    base_url = 'https://trreb.ca/files/market-stats/home-price-index/'
    earliest_report_date = date(2012, 2, 1)
    index_regexp_str = '[0-9]{3}(?:\.[0-9])?'
    toronto_neighbourhood_regexp_str = r'Toronto\s+[WCE].{2}'
    
    @staticmethod
    def _is_dir(folder_path: str):
        if not os.path.isdir(folder_path):
            if not os.path.exists(folder_path):
                raise FileNotFoundError
            else:
                raise NotADirectoryError
        else:
            return True
    
    def __init__(self, pdf_folder, output_folder):
        if self._is_dir(pdf_folder):
            self.pdf_folder = pdf_folder
        if self._is_dir(output_folder):
            self.output_folder = output_folder
        
        
    def download_missing_reports(self):
        missing_filenames = self.get_missing_report_filenames()
        for filename in missing_filenames:
            self.download_report_if_exists(filename)
            
    def download_report_if_exists(self, report_filename: str):
        "Download one monthly report, if it exists."
        download_url = self.base_url + report_filename
        local_filename = os.path.join(self.pdf_folder, report_filename)
        try:
            with request.urlopen(download_url) as response:
                with open(local_filename, 'wb') as local_file:
                    shutil.copyfileobj(response, local_file)
        except HTTPError:
            pass
        
    def _get_random_report_filenames(self, k :int=1, after :date=None, before :date=None):
        "Get a random report filename, for testing."
        all_months = self.all_reported_months()
        if after is None:
            after = self.earliest_report_date
        if before is None:
            before = datetime.now().date()
        months_after = [month for month in all_months if before > month > after]
        random_months = random.sample(months_after, k=k)
        
        return [self.format_filename(month) for month in random_months]
    
    def all_reported_months(self) -> list:
        "Return a range of date from the start until today."
        return \
            pd.date_range(start=self.earliest_report_date, 
                          end=datetime.now(), 
                          freq='MS')\
            .map(lambda x: x.date())\
            .tolist()

    def all_reported_filenames(self) -> list:
        "Return a range of date from the start until today."
        return [self.format_filename(month) for month in self.all_reported_months()]

    def get_missing_report_filenames(self) -> list:
        "Return any PDF filenames that are currently not in the folder."
        missing_filenames = []
        for report_date in self.all_reported_months():
            filename = self.format_filename(report_date)
            full_file_path = os.path.join(self.pdf_folder, filename)
            if not os.path.isfile(full_file_path):
                missing_filenames += [filename]
        return missing_filenames

    @staticmethod
    def format_filename(report_date: date) -> str:
        "Return the filename for the month and year in a given date."
        return f'TREB_MLS_HPI_Public_Tables_{report_date:%m}{report_date:%y}.pdf'

    def _extract_index_value_from_cell(self, cell: str) -> str:
        m = re.match('^(' + self.index_regexp_str + ')', cell)
        if m:
            return m[1]
        else:
            return cell

    def _get_table_count_in_report(self, report_filename: str) -> int:
        report_full_path = os.path.join(self.pdf_folder, report_filename)
        with open(report_full_path, 'rb') as report_file:
            dfs = tabula.read_pdf(report_file, pages=2)
            return len(dfs)
    
    def _get_city_of_toronto_table_count_in_report(self, report_filename: str) -> int:
        report_full_path = os.path.join(self.pdf_folder, report_filename)
        with open(report_full_path, 'rb') as report_file:
            dfs = tabula.read_pdf(report_file, pages=3)
            return len(dfs)
    
    def extract_city_of_toronto_from_report(self, report_filename: str) -> pd.DataFrame:
        """Extract the data for city of Toronto neighbourhoods in the PDF to a pandas DataFrame.
        
        Uses the module tabula-py if there is only one table in file (relatively clean format).
        Otherwise, exports as PNG and uses Tesseract OCR to extract the data, somewhat dirtier.
        """
        report_full_path = os.path.join(self.pdf_folder, report_filename)
        index_regexp = re.compile('^' + self.index_regexp_str + '$')
        region_name_regexp = re.compile('[a-z\.\-\sA-Z]+$')
        neighbourhood_regexp = re.compile(self.toronto_neighbourhood_regexp_str)
        with open(report_full_path, 'rb') as report_file:
#             report = PdfFileReader(report_file)
#             print("Page count:", len(report.pages))
#             report_file.seek(0)
            dfs = tabula.read_pdf(report_file, pages=3, silent=True)
            if len(dfs) == 1:
                # Use tabula to extract the dataframe
                print("1 table found. Using tabula to extract.")
                df = dfs[0]
#                 print("Column count:", len(df.columns))
#                 print("Row count:", len(df))
                correct_column_names = ['Area'] + [col for col in df.columns if not col.startswith('Unnamed')]
#                 print(correct_column_names)
                first_row_values = df.loc[0].tolist()
                index_column_indices = [0] + [idx for idx, val in enumerate(first_row_values) if str(val).strip().startswith('Index')]
                index_column_names = [name for idx, name in enumerate(df.columns) if idx in index_column_indices]
#                 print(index_column_names)
                df = df[index_column_names]
                df = df.iloc[1:]
                df.columns = correct_column_names
                df = df.applymap(self._extract_index_value_from_cell)
                for col in correct_column_names[1:]:
                    df[col] = df[col].replace('-', 'NaN').astype(float)
#                 print(df)
                
                city_of_toronto_df = df
            else:
                # Use tesseract OCR to extract the dataframe
                print(f"tabula found {len(dfs)} tables. Switching to Tesseract OCR to extract data.")
                report_file.seek(0)
                images = convert_from_bytes(report_file.read(), fmt='png', dpi=600, first_page=3, last_page=3)
                contents = pytesseract.image_to_string(images[0])
                neighbourhoods = []
                index_values = [[]]
                started_on_index_value_sequence = False
                i = 0
                for line in contents.split('\n'):
                    if neighbourhood_regexp.search(line):
                        neighbourhoods.append(line)
                    elif index_regexp.search(line):
                        started_on_index_value_sequence = True
                        try:
                            index_values[i].append(line)
                        except IndexError:
                            index_values.append([line])
                    else:
                        if started_on_index_value_sequence and line.strip() == '':
                            i += 1
                            started_on_index_value_sequence = False
                        else:
                            if re.search(' ' + self.index_regexp_str + ' ', line):
                                print(line)
#                 print(neighbourhoods)
                if len(neighbourhoods) == 1:
                    neighbourhoods = neighbourhoods[0].split('|')
                    neighbourhoods = [neighbourhood.strip(r'\s+') for neighbourhood in neighbourhoods]
                if len(index_values) == 1 and len(index_values[0]) == 0:
                    cells = neighbourhoods[0].split(' ')
                    index_values_indices = []
                    for i, cell in enumerate(cells):
                        if index_regexp.match(cell) or cell == 'Zion': # OCR registers 275.5 as Zion
                            index_values_indices.append(i)
#                     print(index_values_indices)
                    if len(index_values_indices) == 5:
                        for i, neighbourhood in enumerate(neighbourhoods):
                            neighbourhood = re.sub('([0-9]), ([0-9])', '\1,\2', neighbourhood)
                            neighbourhood = re.sub('(25)\s+(om)', '255.7', neighbourhood)
                            cells = neighbourhood.split(' ')
#                             print(cells)
                            neighbourhoods[i] = ' '.join(cells[:2])
                            for k, j in enumerate(index_values_indices):
                                cell = cells[j]
                                try:
                                    index_values[k].append(cell)
                                except IndexError:
                                    index_values.append([cell])
#                 print(neighbourhoods)
#                 print(index_values)
#                 print(len(neighbourhoods))
#                 print(len(index_values))
#                 print([len(column) for column in index_values])
                correct_column_names = ['Area', 'Composite', 'Single-Family Detached', 'Single-Family Attached', 'Townhouse', 'Apartment']
                dataframe_values = {
                    correct_column_names[0]: neighbourhoods
                }
                for i, column in enumerate(correct_column_names[1:]):
                    dataframe_values[column] = index_values[i]
                city_of_toronto_df = pd.DataFrame(columns=correct_column_names, data=dataframe_values)
                
            return city_of_toronto_df

        
    def export_city_of_toronto_indices_from_report(self, filename: str) -> bool:
        "Export City of Toronto indices to csv from from one PDF file."
        m = re.search(r'_([0-9]{2})([0-9]{2})\.pdf$', filename)
        if m:
            month, year = m.group(1), m.group(2)
            try:
                df = self.extract_city_of_toronto_from_report(filename)
                print(filename, f"Column count: {len(df.columns)}", f"Row count: {len(df)}")
                assert len(df.columns) == 6
                assert len(df) >= 35
                df.to_csv(os.path.join(self.output_folder, f'city-of-toronto-{year}-{month}.csv'), index=False)
                return True
            except Exception as err:
                print(f"Error while processing {filename}")
                print_tb(err.__traceback__)
                return False

    def export_all_city_of_toronto_indices(self):
        "Export all City of Toronto indices to csv."
        successes = []
        failures = []
        filenames = os.listdir(self.pdf_folder)
        print(f"Found {len(filenames)} files.")
        for i, filename in enumerate(filenames):
            print(f"{i+ 1} of {len(filenames)}")
            result = self.export_city_of_toronto_indices_from_report(filename)
            if result:
                successes.append(filename)
            else:
                failures.append(filename)
        with open(os.path.join(self.output_folder, 'failed-filenames-city-of-toronto.txt'), 'w') as failed_filenames:
            for failure in failures:
                failed_filenames.write(failure +'\n')
        print(f"{len(successes)} files processed successfully. {len(failures)} files had errors. List of missing files in: failed-filenames-city-of-toronto.txt")


    def extract_trreb_area_indices_from_report(self, report_filename: str) -> pd.DataFrame:
        "Extract the data in the PDF to a pandas DataFrame."
        report_full_path = os.path.join(self.pdf_folder, report_filename)
        index_regexp = re.compile('^' + self.index_regexp_str + '$')
        region_name_regexp = re.compile('[a-z\.\-\sA-Z]+$')
        with open(report_full_path, 'rb') as report_file:
            dfs = tabula.read_pdf(report_file, pages=2, silent=True)
            if len(dfs) == 1:
                print("1 table found. Using tabula to extract.")
                df = dfs[0]
#                 print("Column count:", len(all_treb_areas_df.columns))
#                 print("Row count:", len(all_treb_areas_df))
                correct_column_names = ['Area'] + [col for col in df.columns if not col.startswith('Unnamed')]
#                 print(correct_column_names)
                first_row_values = df.loc[0].tolist()
                index_column_indices = [0] + [idx for idx, val in enumerate(first_row_values) if str(val).strip().startswith('Index')]
                index_column_names = [name for idx, name in enumerate(df.columns) if idx in index_column_indices]
                df = df[index_column_names]
                df = df.iloc[1:]
                df.columns = correct_column_names
                df = df.applymap(self._extract_index_value_from_cell)
                for col in correct_column_names[1:]:
                    df[col] = df[col].replace('-', 'NaN').astype(float)
#                 print(index_column_names)
                
                all_trreb_areas_df = df
#                 print(all_trreb_areas_df)
            else:
                print(f"tabula found {len(dfs)} tables. Switching to Tesseract OCR to extract data.")
                report_file.seek(0)
                images = convert_from_bytes(report_file.read(), fmt='png', dpi=300, first_page=2, last_page=2)
                contents = pytesseract.image_to_string(images[0])
                correct_column_names = ['Area', 'Composite', 'Single-Family Detached', 'Single-Family Attached', 'Townhouse', 'Apartment']
                i = 0
                rows = []
                for line in contents.split('\n'):
                    line = line.replace('|', ' ')
                    line = re.sub('([0-9]), ([0-9])', r'\1,\2', line)
                    cells = line.split(' ')
                    cells = [cell.strip().strip('.').strip('-') for cell in cells]
                    if len(cells) > 5:
                        row = dict()
                        alphanumeric_cells = []
                        index_value_cells = []
                        for cell in cells:
                            if region_name_regexp.match(cell):
                                alphanumeric_cells.append(cell)
                            if index_regexp.match(cell):
                                index_value_cells.append(cell)
                        
#                         print(alphanumeric_cells, index_value_cells)
                        if len(index_value_cells) >= 2:
                            row = {
                                correct_column_names[0]: ' '.join(alphanumeric_cells)
                            }
                            if len(index_value_cells) == 5:
                                for i, column in enumerate(correct_column_names[1:]):
                                    row[column] = index_value_cells[i]
                            elif len(index_value_cells) == 3:
                                for i, column in enumerate(correct_column_names[1:4]):
                                    row[column] = index_value_cells[i]
                            elif len(index_value_cells) == 2:
                                for i, column in enumerate(correct_column_names[1:3]):
                                    row[column] = index_value_cells[i]
                            else:
                                pass # Unrecognized row, do not process.
                                print(cells)
#                             print(line)
#                             print(row)
                            rows.append(row)
                        else:
                            pass # Unrecognized row, do not process.
#                             print(cells)
                all_trreb_areas_df = pd.DataFrame(rows)
                            

            return all_trreb_areas_df


    def export_trreb_area_indices_from_report(self, filename: str) -> bool:
        "Export TRREB area indices to csv from from one PDF file."
        m = re.search(r'_([0-9]{2})([0-9]{2})\.pdf$', filename)
        if m:
            month, year = m.group(1), m.group(2)
            try:
                df = self.extract_trreb_area_indices_from_report(filename)
                print(filename, f"Column count: {len(df.columns)}", f"Row count: {len(df)}")
                assert len(df.columns) == 6
                assert len(df) >= 30
                df.to_csv(os.path.join(self.output_folder, f'all-trreb-areas-{year}-{month}.csv'), index=False)
                return True
            except Exception as err:
                print(f"Error while processing {filename}")
                print_tb(err.__traceback__)
                return False

    def export_all_trreb_areas_indices(self):
        successes = []
        failures = []
        filenames = os.listdir(self.pdf_folder)
        print(f"Found {len(filenames)} files.")
        for i, filename in enumerate(filenames):
            print(f"{i + 1} of {len(filenames)}")
            result = self.export_trreb_area_indices_from_report(filename)
            if result:
                successes.append(filename)
            else:
                failures.append(filename)
        with open(os.path.join(self.output_folder, 'failed-filenames-all-treb-areas.txt'), 'w') as failed_filenames:
            for failure in failures:
                failed_filenames.write(failure +'\n')
        print(f"{len(successes)} files processed successfully. {len(failures)} files had errors. List of missing files in: failed-filenames-all-treb-areas.txt")





In [3]:
hpi = TorontoHpiReport(pdf_folder='../01.raw-pdf/', output_folder='../02.raw-data/')
hpi.format_filename(hpi.earliest_report_date)
hpi.download_missing_reports()

In [4]:
report_filename = 'TREB_MLS_HPI_Public_Tables_0221.pdf'
hpi.export_trreb_area_indices_from_report(report_filename)
hpi.export_city_of_toronto_indices_from_report(report_filename)

tabula found 17 tables. Switching to Tesseract OCR to extract data.
['Caledon', '293.1', '1,076,300', '26.72%', '296.1', '1,125,600', '27.74%', '324.8', '802,400', '22.06%', '', '', '', '251.9', '612,400', '3.11%']
['York', 'Region', '309.9', '1,047,700', '17.16%', '318.6', '1,213,300', '20.23%', '932,200', '20.15%', '280.7', '728,500', '11.04%', '260.2', 'Ey', 'sJ', 'Ro', 'tO)', '5.39%']
['King', '274.7', '1,149,300', '22.03%', '276.5', '1,150,200', '22.89%', '270.7', '634,000', '19.04%', '', '', '', '256.5', '701,600', '3.47%']
TREB_MLS_HPI_Public_Tables_0221.pdf Column count: 6 Row count: 31
tabula found 18 tables. Switching to Tesseract OCR to extract data.
TREB_MLS_HPI_Public_Tables_0221.pdf Column count: 6 Row count: 35


True

In [5]:
hpi.export_all_trreb_areas_indices()

Found 110 files.
1 of 110
1 table found. Using tabula to extract.
Error while processing TREB_MLS_HPI_Public_Tables_0113.pdf
2 of 110


  File "<ipython-input-2-c68d9f243969>", line 322, in export_trreb_area_indices_from_report
    df = self.extract_trreb_area_indices_from_report(filename)
  File "<ipython-input-2-c68d9f243969>", line 257, in extract_trreb_area_indices_from_report
    df = df.applymap(self._extract_index_value_from_cell)
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\frame.py", line 7844, in applymap
    return self.apply(infer).__finalize__(self, "applymap")
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\frame.py", line 7768, in apply
    return op.get_result()
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\apply.py", line 185, in get_result
    return self.apply_standard()
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\apply.py", line 276, in apply_standard
    results, res_index = self.apply_series_generator()
  File "C:\Users\Sin

tabula found 3 tables. Switching to Tesseract OCR to extract data.
TREB_MLS_HPI_Public_Tables_0114.pdf Column count: 6 Row count: 2
Error while processing TREB_MLS_HPI_Public_Tables_0114.pdf
3 of 110


  File "<ipython-input-2-c68d9f243969>", line 325, in export_trreb_area_indices_from_report
    assert len(df) >= 30


tabula found 3 tables. Switching to Tesseract OCR to extract data.
TREB_MLS_HPI_Public_Tables_0115.pdf Column count: 3 Row count: 4
Error while processing TREB_MLS_HPI_Public_Tables_0115.pdf
4 of 110


  File "<ipython-input-2-c68d9f243969>", line 324, in export_trreb_area_indices_from_report
    assert len(df.columns) == 6


1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0116.pdf Column count: 6 Row count: 38
5 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0117.pdf Column count: 6 Row count: 38
6 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0118.pdf Column count: 6 Row count: 38
7 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0119.pdf Column count: 6 Row count: 38
8 of 110
tabula found 17 tables. Switching to Tesseract OCR to extract data.
['Caledon', '226.0', '829,900', '5.85%', '225.8', '858,300', '5.86%', '261.1', '645,000', '6.57%', '', '', '', '232.7', '565,700', '3.88%']
['King', '224.2', '938,000', '0.99%', '224.8', '935,200', '0.76%', '221.0', '517,600', '0.18%', '', '', '', '236.7', '647,400', '8.48%']
['[Durham', 'Region', '_—_—', '', '249.6', '', '', '583,000', '', '', '6.80%', '', '', '243.1', '', '', '631,700', '', '', '6.25%', '', '', '257.5', '', '', '514,300', '', '', '6.98%', '', '', '2

  File "<ipython-input-2-c68d9f243969>", line 322, in export_trreb_area_indices_from_report
    df = self.extract_trreb_area_indices_from_report(filename)
  File "<ipython-input-2-c68d9f243969>", line 257, in extract_trreb_area_indices_from_report
    df = df.applymap(self._extract_index_value_from_cell)
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\frame.py", line 7844, in applymap
    return self.apply(infer).__finalize__(self, "applymap")
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\frame.py", line 7768, in apply
    return op.get_result()
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\apply.py", line 185, in get_result
    return self.apply_standard()
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\apply.py", line 276, in apply_standard
    results, res_index = self.apply_series_generator()
  File "C:\Users\Sin

1 table found. Using tabula to extract.
Error while processing TREB_MLS_HPI_Public_Tables_0213.pdf
12 of 110


  File "<ipython-input-2-c68d9f243969>", line 322, in export_trreb_area_indices_from_report
    df = self.extract_trreb_area_indices_from_report(filename)
  File "<ipython-input-2-c68d9f243969>", line 257, in extract_trreb_area_indices_from_report
    df = df.applymap(self._extract_index_value_from_cell)
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\frame.py", line 7844, in applymap
    return self.apply(infer).__finalize__(self, "applymap")
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\frame.py", line 7768, in apply
    return op.get_result()
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\apply.py", line 185, in get_result
    return self.apply_standard()
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\apply.py", line 276, in apply_standard
    results, res_index = self.apply_series_generator()
  File "C:\Users\Sin

tabula found 3 tables. Switching to Tesseract OCR to extract data.
TREB_MLS_HPI_Public_Tables_0214.pdf Column count: 4 Row count: 3
Error while processing TREB_MLS_HPI_Public_Tables_0214.pdf
13 of 110


  File "<ipython-input-2-c68d9f243969>", line 324, in export_trreb_area_indices_from_report
    assert len(df.columns) == 6


tabula found 3 tables. Switching to Tesseract OCR to extract data.
TREB_MLS_HPI_Public_Tables_0215.pdf Column count: 3 Row count: 3
Error while processing TREB_MLS_HPI_Public_Tables_0215.pdf
14 of 110


  File "<ipython-input-2-c68d9f243969>", line 324, in export_trreb_area_indices_from_report
    assert len(df.columns) == 6


1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0216.pdf Column count: 6 Row count: 38
15 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0217.pdf Column count: 6 Row count: 38
16 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0218.pdf Column count: 6 Row count: 38
17 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0219.pdf Column count: 6 Row count: 38
18 of 110
tabula found 17 tables. Switching to Tesseract OCR to extract data.
['Caledon', '231.3', '849,400', '5.96%', '231.7', '880,800', '5.99%', '266.1', '657,400', '7.56%', '', '', '', '244.5', '594,400', '6.03%']
['York', 'Region', '263.5', '889,000', 'eT', '264.0', '1,004,600', '5.60%', '265.4', '767,800', '7m', 'ly/)', '250.8', '644,000', 'EM', 'oh', '7)', 'VX', 'TM', 'I}', '537,300', 'aOR]']
['King', '225.1', '941,700', '1.26%', '224.9', '935,600', '0.76%', '227.4', '532,600', '4.07%', '', '', '', '247.9', '678,100', '9.11%']
TREB_M

  File "<ipython-input-2-c68d9f243969>", line 322, in export_trreb_area_indices_from_report
    df = self.extract_trreb_area_indices_from_report(filename)
  File "<ipython-input-2-c68d9f243969>", line 257, in extract_trreb_area_indices_from_report
    df = df.applymap(self._extract_index_value_from_cell)
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\frame.py", line 7844, in applymap
    return self.apply(infer).__finalize__(self, "applymap")
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\frame.py", line 7768, in apply
    return op.get_result()
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\apply.py", line 185, in get_result
    return self.apply_standard()
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\apply.py", line 276, in apply_standard
    results, res_index = self.apply_series_generator()
  File "C:\Users\Sin

1 table found. Using tabula to extract.
Error while processing TREB_MLS_HPI_Public_Tables_0313.pdf
22 of 110


  File "<ipython-input-2-c68d9f243969>", line 322, in export_trreb_area_indices_from_report
    df = self.extract_trreb_area_indices_from_report(filename)
  File "<ipython-input-2-c68d9f243969>", line 257, in extract_trreb_area_indices_from_report
    df = df.applymap(self._extract_index_value_from_cell)
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\frame.py", line 7844, in applymap
    return self.apply(infer).__finalize__(self, "applymap")
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\frame.py", line 7768, in apply
    return op.get_result()
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\apply.py", line 185, in get_result
    return self.apply_standard()
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\apply.py", line 276, in apply_standard
    results, res_index = self.apply_series_generator()
  File "C:\Users\Sin

tabula found 3 tables. Switching to Tesseract OCR to extract data.
TREB_MLS_HPI_Public_Tables_0314.pdf Column count: 4 Row count: 3
Error while processing TREB_MLS_HPI_Public_Tables_0314.pdf
23 of 110


  File "<ipython-input-2-c68d9f243969>", line 324, in export_trreb_area_indices_from_report
    assert len(df.columns) == 6


tabula found 3 tables. Switching to Tesseract OCR to extract data.
TREB_MLS_HPI_Public_Tables_0315.pdf Column count: 3 Row count: 4
Error while processing TREB_MLS_HPI_Public_Tables_0315.pdf
24 of 110


  File "<ipython-input-2-c68d9f243969>", line 324, in export_trreb_area_indices_from_report
    assert len(df.columns) == 6


1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0316.pdf Column count: 6 Row count: 38
25 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0317.pdf Column count: 6 Row count: 38
26 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0318.pdf Column count: 6 Row count: 38
27 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0319.pdf Column count: 6 Row count: 38
28 of 110
tabula found 17 tables. Switching to Tesseract OCR to extract data.
['Caledon', '237.2', '871,100', '10.27%', '237.5', '902,800', '10.36%', '270.9', '669,200', '11.67%', '', '', '', '248.3', '603,600', '6.34%']
['King', '225.7', '944,200', '2.17%', '225.1', '936,400', '2.85%', '229.5', '537,500', '0.44%', '', '', '', '250.2', '684,400', '8.50%']
TREB_MLS_HPI_Public_Tables_0320.pdf Column count: 6 Row count: 31
29 of 110
tabula found 17 tables. Switching to Tesseract OCR to extract data.
['Caledon', '305.1', '1,120,400', '28.63%', 

  File "<ipython-input-2-c68d9f243969>", line 322, in export_trreb_area_indices_from_report
    df = self.extract_trreb_area_indices_from_report(filename)
  File "<ipython-input-2-c68d9f243969>", line 257, in extract_trreb_area_indices_from_report
    df = df.applymap(self._extract_index_value_from_cell)
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\frame.py", line 7844, in applymap
    return self.apply(infer).__finalize__(self, "applymap")
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\frame.py", line 7768, in apply
    return op.get_result()
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\apply.py", line 185, in get_result
    return self.apply_standard()
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\apply.py", line 276, in apply_standard
    results, res_index = self.apply_series_generator()
  File "C:\Users\Sin

1 table found. Using tabula to extract.
Error while processing TREB_MLS_HPI_Public_Tables_0413.pdf
32 of 110


  File "<ipython-input-2-c68d9f243969>", line 322, in export_trreb_area_indices_from_report
    df = self.extract_trreb_area_indices_from_report(filename)
  File "<ipython-input-2-c68d9f243969>", line 257, in extract_trreb_area_indices_from_report
    df = df.applymap(self._extract_index_value_from_cell)
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\frame.py", line 7844, in applymap
    return self.apply(infer).__finalize__(self, "applymap")
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\frame.py", line 7768, in apply
    return op.get_result()
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\apply.py", line 185, in get_result
    return self.apply_standard()
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\apply.py", line 276, in apply_standard
    results, res_index = self.apply_series_generator()
  File "C:\Users\Sin

tabula found 3 tables. Switching to Tesseract OCR to extract data.
TREB_MLS_HPI_Public_Tables_0414.pdf Column count: 4 Row count: 3
Error while processing TREB_MLS_HPI_Public_Tables_0414.pdf
33 of 110


  File "<ipython-input-2-c68d9f243969>", line 324, in export_trreb_area_indices_from_report
    assert len(df.columns) == 6


tabula found 3 tables. Switching to Tesseract OCR to extract data.
TREB_MLS_HPI_Public_Tables_0415.pdf Column count: 4 Row count: 4
Error while processing TREB_MLS_HPI_Public_Tables_0415.pdf
34 of 110


  File "<ipython-input-2-c68d9f243969>", line 324, in export_trreb_area_indices_from_report
    assert len(df.columns) == 6


1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0416.pdf Column count: 6 Row count: 38
35 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0417.pdf Column count: 6 Row count: 38
36 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0418.pdf Column count: 6 Row count: 38
37 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0419.pdf Column count: 6 Row count: 38
38 of 110
tabula found 17 tables. Switching to Tesseract OCR to extract data.
['Caledon', '236.0', '866,600', '9.36%', '236.5', '899,000', '9.85%', '270.5', '668,200', '9.83%', '', '', '', '251.2', '610,700', '7.44%']
['King', '228.9', '957,600', '0.74%', '228.2', '949,300', '1.64%', '235.9', '552,500', '5.08%', '', '', '', '253.3', '692,800', '9.18%']
TREB_MLS_HPI_Public_Tables_0420.pdf Column count: 6 Row count: 30
39 of 110
tabula found 17 tables. Switching to Tesseract OCR to extract data.
['Caledon', '312.8', '1,148,700', '32.54%', '31

  File "<ipython-input-2-c68d9f243969>", line 322, in export_trreb_area_indices_from_report
    df = self.extract_trreb_area_indices_from_report(filename)
  File "<ipython-input-2-c68d9f243969>", line 257, in extract_trreb_area_indices_from_report
    df = df.applymap(self._extract_index_value_from_cell)
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\frame.py", line 7844, in applymap
    return self.apply(infer).__finalize__(self, "applymap")
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\frame.py", line 7768, in apply
    return op.get_result()
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\apply.py", line 185, in get_result
    return self.apply_standard()
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\apply.py", line 276, in apply_standard
    results, res_index = self.apply_series_generator()
  File "C:\Users\Sin

1 table found. Using tabula to extract.
Error while processing TREB_MLS_HPI_Public_Tables_0513.pdf
42 of 110


  File "<ipython-input-2-c68d9f243969>", line 322, in export_trreb_area_indices_from_report
    df = self.extract_trreb_area_indices_from_report(filename)
  File "<ipython-input-2-c68d9f243969>", line 257, in extract_trreb_area_indices_from_report
    df = df.applymap(self._extract_index_value_from_cell)
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\frame.py", line 7844, in applymap
    return self.apply(infer).__finalize__(self, "applymap")
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\frame.py", line 7768, in apply
    return op.get_result()
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\apply.py", line 185, in get_result
    return self.apply_standard()
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\apply.py", line 276, in apply_standard
    results, res_index = self.apply_series_generator()
  File "C:\Users\Sin

tabula found 3 tables. Switching to Tesseract OCR to extract data.
TREB_MLS_HPI_Public_Tables_0514.pdf Column count: 4 Row count: 3
Error while processing TREB_MLS_HPI_Public_Tables_0514.pdf
43 of 110


  File "<ipython-input-2-c68d9f243969>", line 324, in export_trreb_area_indices_from_report
    assert len(df.columns) == 6


tabula found 3 tables. Switching to Tesseract OCR to extract data.
TREB_MLS_HPI_Public_Tables_0515.pdf Column count: 3 Row count: 3
Error while processing TREB_MLS_HPI_Public_Tables_0515.pdf
44 of 110


  File "<ipython-input-2-c68d9f243969>", line 324, in export_trreb_area_indices_from_report
    assert len(df.columns) == 6


1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0516.pdf Column count: 6 Row count: 38
45 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0517.pdf Column count: 6 Row count: 38
46 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0518.pdf Column count: 6 Row count: 38
47 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0519.pdf Column count: 6 Row count: 38
48 of 110
tabula found 17 tables. Switching to Tesseract OCR to extract data.
['Caledon', '235.7', '865,500', '8.42%', '236.1', '897,500', '8.95%', '269.9', '666,800', '7.83%', '', '', '', '248.5', '604,100', '4.54%']
['York', 'Region', '276.0', '932,000', 'May)', '276.8', '1,052,600', 'Terk', '278.5', '805,700', 'mY', 'av)', 'yA}', '666,700', '7.85%', '257.6', '574,700', '42']
['King', '234.8', '982,400', '1.34%', '234.4', '975,100', '0.77%', '243.5', '570,300', '5.69%', '', '', '', '252.1', '689,500', '6.96%']
TREB_MLS_HPI_Public_Tables_0

  File "<ipython-input-2-c68d9f243969>", line 322, in export_trreb_area_indices_from_report
    df = self.extract_trreb_area_indices_from_report(filename)
  File "<ipython-input-2-c68d9f243969>", line 257, in extract_trreb_area_indices_from_report
    df = df.applymap(self._extract_index_value_from_cell)
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\frame.py", line 7844, in applymap
    return self.apply(infer).__finalize__(self, "applymap")
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\frame.py", line 7768, in apply
    return op.get_result()
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\apply.py", line 185, in get_result
    return self.apply_standard()
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\apply.py", line 276, in apply_standard
    results, res_index = self.apply_series_generator()
  File "C:\Users\Sin

tabula found 3 tables. Switching to Tesseract OCR to extract data.
TREB_MLS_HPI_Public_Tables_0613.pdf Column count: 4 Row count: 1
Error while processing TREB_MLS_HPI_Public_Tables_0613.pdf
51 of 110


  File "<ipython-input-2-c68d9f243969>", line 324, in export_trreb_area_indices_from_report
    assert len(df.columns) == 6


tabula found 3 tables. Switching to Tesseract OCR to extract data.
TREB_MLS_HPI_Public_Tables_0614.pdf Column count: 4 Row count: 3
Error while processing TREB_MLS_HPI_Public_Tables_0614.pdf
52 of 110


  File "<ipython-input-2-c68d9f243969>", line 324, in export_trreb_area_indices_from_report
    assert len(df.columns) == 6


tabula found 3 tables. Switching to Tesseract OCR to extract data.
TREB_MLS_HPI_Public_Tables_0615.pdf Column count: 3 Row count: 3
Error while processing TREB_MLS_HPI_Public_Tables_0615.pdf
53 of 110


  File "<ipython-input-2-c68d9f243969>", line 324, in export_trreb_area_indices_from_report
    assert len(df.columns) == 6


1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0616.pdf Column count: 6 Row count: 38
54 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0617.pdf Column count: 6 Row count: 38
55 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0618.pdf Column count: 6 Row count: 38
56 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0619.pdf Column count: 6 Row count: 38
57 of 110
tabula found 17 tables. Switching to Tesseract OCR to extract data.
['Caledon', '235.8', '865,900', '7.33%', '236.4', '898,600', '7.95%', '270.8', '669,000', '6.66%', '', '', '', '244.0', '593,200', '4.50%']
['King', '234.0', '979,000', '2.36%', '233.9', '973,000', '1.92%', '238.8', '559,300', '6.75%', '', '', '', '247.4', '676,700', '5.37%']
TREB_MLS_HPI_Public_Tables_0620.pdf Column count: 6 Row count: 32
58 of 110
1 table found. Using tabula to extract.
Error while processing TREB_MLS_HPI_Public_Tables_0712.pdf
59 of 110


  File "<ipython-input-2-c68d9f243969>", line 322, in export_trreb_area_indices_from_report
    df = self.extract_trreb_area_indices_from_report(filename)
  File "<ipython-input-2-c68d9f243969>", line 257, in extract_trreb_area_indices_from_report
    df = df.applymap(self._extract_index_value_from_cell)
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\frame.py", line 7844, in applymap
    return self.apply(infer).__finalize__(self, "applymap")
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\frame.py", line 7768, in apply
    return op.get_result()
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\apply.py", line 185, in get_result
    return self.apply_standard()
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\apply.py", line 276, in apply_standard
    results, res_index = self.apply_series_generator()
  File "C:\Users\Sin

tabula found 3 tables. Switching to Tesseract OCR to extract data.
TREB_MLS_HPI_Public_Tables_0713.pdf Column count: 4 Row count: 5
Error while processing TREB_MLS_HPI_Public_Tables_0713.pdf
60 of 110


  File "<ipython-input-2-c68d9f243969>", line 324, in export_trreb_area_indices_from_report
    assert len(df.columns) == 6


1 table found. Using tabula to extract.
Error while processing TREB_MLS_HPI_Public_Tables_0714.pdf
61 of 110


  File "<ipython-input-2-c68d9f243969>", line 322, in export_trreb_area_indices_from_report
    df = self.extract_trreb_area_indices_from_report(filename)
  File "<ipython-input-2-c68d9f243969>", line 257, in extract_trreb_area_indices_from_report
    df = df.applymap(self._extract_index_value_from_cell)
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\frame.py", line 7844, in applymap
    return self.apply(infer).__finalize__(self, "applymap")
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\frame.py", line 7768, in apply
    return op.get_result()
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\apply.py", line 185, in get_result
    return self.apply_standard()
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\apply.py", line 276, in apply_standard
    results, res_index = self.apply_series_generator()
  File "C:\Users\Sin

1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0715.pdf Column count: 6 Row count: 38
62 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0716.pdf Column count: 6 Row count: 38
63 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0717.pdf Column count: 6 Row count: 38
64 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0718.pdf Column count: 6 Row count: 38
65 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0719.pdf Column count: 6 Row count: 38
66 of 110
tabula found 17 tables. Switching to Tesseract OCR to extract data.
['Caledon', '244.3', '897,100', '11.10%', '246.1', '935,500', '12.17%', '279.0', '689,200', '9.93%', '', '', '', '254.9', '619,700', '8.65%']
['King', '244.9', '1,024,600', '4.97%', '244.9', '1,018,800', '4.66%', '248.2', '581,300', '7.45%', '', '', '', '259.9', '710,900', '10.31%']
['', 'Durham', 'Region', '', '', '_274.5', '', '', '639,700', '', ''

  File "<ipython-input-2-c68d9f243969>", line 322, in export_trreb_area_indices_from_report
    df = self.extract_trreb_area_indices_from_report(filename)
  File "<ipython-input-2-c68d9f243969>", line 257, in extract_trreb_area_indices_from_report
    df = df.applymap(self._extract_index_value_from_cell)
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\frame.py", line 7844, in applymap
    return self.apply(infer).__finalize__(self, "applymap")
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\frame.py", line 7768, in apply
    return op.get_result()
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\apply.py", line 185, in get_result
    return self.apply_standard()
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\apply.py", line 276, in apply_standard
    results, res_index = self.apply_series_generator()
  File "C:\Users\Sin

tabula found 3 tables. Switching to Tesseract OCR to extract data.
['Dufferin', 'County', '146.3', '$335,300', '3.83%', '150.0', '$341,800', '3.59%', '147.4', '—', '300', '3', '—']
TREB_MLS_HPI_Public_Tables_0813.pdf Column count: 4 Row count: 5
Error while processing TREB_MLS_HPI_Public_Tables_0813.pdf
69 of 110


  File "<ipython-input-2-c68d9f243969>", line 324, in export_trreb_area_indices_from_report
    assert len(df.columns) == 6


tabula found 3 tables. Switching to Tesseract OCR to extract data.
['169.7', '', '', '$631,000', '8.64%', '174.9', '', '', '$494,100', '8.30%', '', '', '', '163.8', '', '', '$358,800', '7.55%', '156.6', '', '', '$314,500', '4.54%']
TREB_MLS_HPI_Public_Tables_0814.pdf Column count: 4 Row count: 5
Error while processing TREB_MLS_HPI_Public_Tables_0814.pdf
70 of 110


  File "<ipython-input-2-c68d9f243969>", line 324, in export_trreb_area_indices_from_report
    assert len(df.columns) == 6


1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0815.pdf Column count: 6 Row count: 38
71 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0816.pdf Column count: 6 Row count: 38
72 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0817.pdf Column count: 6 Row count: 38
73 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0818.pdf Column count: 6 Row count: 38
74 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0819.pdf Column count: 6 Row count: 38
75 of 110
tabula found 17 tables. Switching to Tesseract OCR to extract data.
['Caledon', '247.8', '910,000', '12.03%', '248.7', '945,400', '12.38%', '285.0', '704,100', '12.74%', ':', ':', ':', '251.6', '611,700', '7.98%']
['King', '249.7', '1,044,700', '7.96%', '250.0', '1,040,000', '7.67%', '249.0', '583,200', '8.45%', '', '', '', '256.5', '701,600', '10.23%']
TREB_MLS_HPI_Public_Tables_0820.pdf Column count: 6 Row count: 3

  File "<ipython-input-2-c68d9f243969>", line 322, in export_trreb_area_indices_from_report
    df = self.extract_trreb_area_indices_from_report(filename)
  File "<ipython-input-2-c68d9f243969>", line 257, in extract_trreb_area_indices_from_report
    df = df.applymap(self._extract_index_value_from_cell)
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\frame.py", line 7844, in applymap
    return self.apply(infer).__finalize__(self, "applymap")
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\frame.py", line 7768, in apply
    return op.get_result()
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\apply.py", line 185, in get_result
    return self.apply_standard()
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\apply.py", line 276, in apply_standard
    results, res_index = self.apply_series_generator()
  File "C:\Users\Sin

tabula found 3 tables. Switching to Tesseract OCR to extract data.
TREB_MLS_HPI_Public_Tables_0913.pdf Column count: 4 Row count: 4
Error while processing TREB_MLS_HPI_Public_Tables_0913.pdf
78 of 110


  File "<ipython-input-2-c68d9f243969>", line 324, in export_trreb_area_indices_from_report
    assert len(df.columns) == 6


tabula found 3 tables. Switching to Tesseract OCR to extract data.
TREB_MLS_HPI_Public_Tables_0914.pdf Column count: 3 Row count: 4
Error while processing TREB_MLS_HPI_Public_Tables_0914.pdf
79 of 110


  File "<ipython-input-2-c68d9f243969>", line 324, in export_trreb_area_indices_from_report
    assert len(df.columns) == 6


1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0915.pdf Column count: 6 Row count: 38
80 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0916.pdf Column count: 6 Row count: 38
81 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0917.pdf Column count: 6 Row count: 38
82 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0918.pdf Column count: 6 Row count: 38
83 of 110
Error while processing TREB_MLS_HPI_Public_Tables_0919.pdf
84 of 110


  File "<ipython-input-2-c68d9f243969>", line 322, in export_trreb_area_indices_from_report
    df = self.extract_trreb_area_indices_from_report(filename)
  File "<ipython-input-2-c68d9f243969>", line 243, in extract_trreb_area_indices_from_report
    dfs = tabula.read_pdf(report_file, pages=2, silent=True)
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\tabula\io.py", line 317, in read_pdf
    raise ValueError(


tabula found 17 tables. Switching to Tesseract OCR to extract data.
['Caledon', '257.3', '944,900', '15.48%', '259.0', '984,500', '15.63%', '291.4', '719,900', '16.56%', '', '', '', '251.8', '612,100', '6.79%']
['York', 'Region', '284.3', '960,000', '1-97)', '288.0', '1,095,200', '4a', 'EW)', '287.0', '830,300', '12.02%', '268.6', '697,100', 'aM)', 'Vite', '570,500', 'WA)']
['King', '257.9', '1,079,000', '12.62%', '259.0', '1,077,400', '12.80%', '255.2', '597,700', '11.34%', '', '', '', '254.8', '696,900', '8.61%']
TREB_MLS_HPI_Public_Tables_0920.pdf Column count: 6 Row count: 31
85 of 110
1 table found. Using tabula to extract.
Error while processing TREB_MLS_HPI_Public_Tables_1012.pdf
86 of 110


  File "<ipython-input-2-c68d9f243969>", line 322, in export_trreb_area_indices_from_report
    df = self.extract_trreb_area_indices_from_report(filename)
  File "<ipython-input-2-c68d9f243969>", line 257, in extract_trreb_area_indices_from_report
    df = df.applymap(self._extract_index_value_from_cell)
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\frame.py", line 7844, in applymap
    return self.apply(infer).__finalize__(self, "applymap")
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\frame.py", line 7768, in apply
    return op.get_result()
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\apply.py", line 185, in get_result
    return self.apply_standard()
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\apply.py", line 276, in apply_standard
    results, res_index = self.apply_series_generator()
  File "C:\Users\Sin

tabula found 3 tables. Switching to Tesseract OCR to extract data.
TREB_MLS_HPI_Public_Tables_1013.pdf Column count: 4 Row count: 3
Error while processing TREB_MLS_HPI_Public_Tables_1013.pdf
87 of 110


  File "<ipython-input-2-c68d9f243969>", line 324, in export_trreb_area_indices_from_report
    assert len(df.columns) == 6


tabula found 3 tables. Switching to Tesseract OCR to extract data.
TREB_MLS_HPI_Public_Tables_1014.pdf Column count: 3 Row count: 4
Error while processing TREB_MLS_HPI_Public_Tables_1014.pdf
88 of 110


  File "<ipython-input-2-c68d9f243969>", line 324, in export_trreb_area_indices_from_report
    assert len(df.columns) == 6


1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_1015.pdf Column count: 6 Row count: 38
89 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_1016.pdf Column count: 6 Row count: 38
90 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_1017.pdf Column count: 6 Row count: 38
91 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_1018.pdf Column count: 6 Row count: 38
92 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_1019.pdf Column count: 6 Row count: 38
93 of 110
tabula found 17 tables. Switching to Tesseract OCR to extract data.
['Caledon', '262.0', '962,100', '16.03%', '264.3', '1,004,700', '16.28%', '296.3', '732,000', '17.16%', '', '', '', '250.1', '608,000', '9.07%']
['York', 'Region', '285.5', '964,100', 'Vath', '289.5', '1,100,900', 'MVR', 'ty)', '288.9', '835,800', '4', 'VY)', '269.6', '699,700', '4a', 'EW)', 'YAIR}', 'boy', 'o}¢', 'Mol', 'tt)', '9.57%']
['King', '260

  File "<ipython-input-2-c68d9f243969>", line 322, in export_trreb_area_indices_from_report
    df = self.extract_trreb_area_indices_from_report(filename)
  File "<ipython-input-2-c68d9f243969>", line 257, in extract_trreb_area_indices_from_report
    df = df.applymap(self._extract_index_value_from_cell)
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\frame.py", line 7844, in applymap
    return self.apply(infer).__finalize__(self, "applymap")
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\frame.py", line 7768, in apply
    return op.get_result()
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\apply.py", line 185, in get_result
    return self.apply_standard()
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\apply.py", line 276, in apply_standard
    results, res_index = self.apply_series_generator()
  File "C:\Users\Sin

tabula found 3 tables. Switching to Tesseract OCR to extract data.
TREB_MLS_HPI_Public_Tables_1113.pdf Column count: 4 Row count: 3
Error while processing TREB_MLS_HPI_Public_Tables_1113.pdf
96 of 110


  File "<ipython-input-2-c68d9f243969>", line 324, in export_trreb_area_indices_from_report
    assert len(df.columns) == 6


tabula found 3 tables. Switching to Tesseract OCR to extract data.
TREB_MLS_HPI_Public_Tables_1114.pdf Column count: 3 Row count: 4
Error while processing TREB_MLS_HPI_Public_Tables_1114.pdf
97 of 110


  File "<ipython-input-2-c68d9f243969>", line 324, in export_trreb_area_indices_from_report
    assert len(df.columns) == 6


1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_1115.pdf Column count: 6 Row count: 38
98 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_1116.pdf Column count: 6 Row count: 38
99 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_1117.pdf Column count: 6 Row count: 38
100 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_1118.pdf Column count: 6 Row count: 38
101 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_1119.pdf Column count: 6 Row count: 38
102 of 110
tabula found 17 tables. Switching to Tesseract OCR to extract data.
['Caledon', '264.8', '972,400', '17.38%', '266.9', '1,014,600', '17.89%', '295.5', '730,000', '16.11%', '', '', '', '245.9', '597,800', '4.91%']
['King', '257.4', '1,076,900', '11.82%', '258.6', '1,075,800', '12.14%', '257.2', '602,400', '12.66%', '', '', '', '250.3', '684,600', '5.08%']
TREB_MLS_HPI_Public_Tables_1120.pdf Column count: 6 Row coun

  File "<ipython-input-2-c68d9f243969>", line 322, in export_trreb_area_indices_from_report
    df = self.extract_trreb_area_indices_from_report(filename)
  File "<ipython-input-2-c68d9f243969>", line 257, in extract_trreb_area_indices_from_report
    df = df.applymap(self._extract_index_value_from_cell)
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\frame.py", line 7844, in applymap
    return self.apply(infer).__finalize__(self, "applymap")
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\frame.py", line 7768, in apply
    return op.get_result()
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\apply.py", line 185, in get_result
    return self.apply_standard()
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\apply.py", line 276, in apply_standard
    results, res_index = self.apply_series_generator()
  File "C:\Users\Sin

1 table found. Using tabula to extract.
Error while processing TREB_MLS_HPI_Public_Tables_1214.pdf
105 of 110


  File "<ipython-input-2-c68d9f243969>", line 322, in export_trreb_area_indices_from_report
    df = self.extract_trreb_area_indices_from_report(filename)
  File "<ipython-input-2-c68d9f243969>", line 257, in extract_trreb_area_indices_from_report
    df = df.applymap(self._extract_index_value_from_cell)
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\frame.py", line 7844, in applymap
    return self.apply(infer).__finalize__(self, "applymap")
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\frame.py", line 7768, in apply
    return op.get_result()
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\apply.py", line 185, in get_result
    return self.apply_standard()
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\pandas\core\apply.py", line 276, in apply_standard
    results, res_index = self.apply_series_generator()
  File "C:\Users\Sin

1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_1215.pdf Column count: 6 Row count: 38
106 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_1216.pdf Column count: 6 Row count: 38
107 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_1217.pdf Column count: 6 Row count: 38
108 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_1218.pdf Column count: 6 Row count: 38
109 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_1219.pdf Column count: 6 Row count: 38
110 of 110
tabula found 17 tables. Switching to Tesseract OCR to extract data.
['Milton', '314.5', '931,800', '17.26%', '316.3', '1,131,000', '19.77%', 'SOR', '810,800', '19.95%', '320.7', '552,000', '18.73%', '291.4', '557,800', '8.37%']
['Caledon', '266.6', '979,000', '18.23%', '269.1', '1,022,900', '18.91%', '298.0', '736,200', '16.18%', '', '', '', '247.3', '601,200', '6.64%']
['King', '256.2', '1,071,900', '13.92%', '

In [449]:
hpi.export_all_city_of_toronto_indices()

Found 110 files.
1 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0113.pdf Column count: 6 Row count: 37
2 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0114.pdf Column count: 6 Row count: 37
3 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0115.pdf Column count: 6 Row count: 37
4 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0116.pdf Column count: 6 Row count: 37
5 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0117.pdf Column count: 6 Row count: 37
6 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0118.pdf Column count: 6 Row count: 37
7 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0119.pdf Column count: 6 Row count: 37
8 of 110
tabula found 18 tables. Switching to Tesseract OCR to extract data.
TREB_MLS_HPI_Public_Tables_0120.pdf Column count: 6 Row count: 35
9 of 110
tabula found 18 tables. Sw

  File "<ipython-input-444-969ac060c470>", line 207, in export_city_of_toronto_indices_from_report
    df = self.extract_city_of_toronto_from_report(filename)
  File "<ipython-input-444-969ac060c470>", line 195, in extract_city_of_toronto_from_report
    dataframe_values[column] = index_values[i]


1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0314.pdf Column count: 6 Row count: 37
23 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0315.pdf Column count: 6 Row count: 37
24 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0316.pdf Column count: 6 Row count: 37
25 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0317.pdf Column count: 6 Row count: 37
26 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0318.pdf Column count: 6 Row count: 37
27 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0319.pdf Column count: 6 Row count: 37
28 of 110
tabula found 18 tables. Switching to Tesseract OCR to extract data.
aOR} 275.7 1,209,800 Teh 978,700 se ae WAR TITH 6.68% 625,600 Pal,
TREB_MLS_HPI_Public_Tables_0320.pdf Column count: 6 Row count: 35
29 of 110
tabula found 18 tables. Switching to Tesseract OCR to extract data.
City of Toronto 317.4 1,0

  File "<ipython-input-444-969ac060c470>", line 207, in export_city_of_toronto_indices_from_report
    df = self.extract_city_of_toronto_from_report(filename)
  File "<ipython-input-444-969ac060c470>", line 195, in extract_city_of_toronto_from_report
    dataframe_values[column] = index_values[i]


1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0414.pdf Column count: 6 Row count: 37
33 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0415.pdf Column count: 6 Row count: 37
34 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0416.pdf Column count: 6 Row count: 37
35 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0417.pdf Column count: 6 Row count: 37
36 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0418.pdf Column count: 6 Row count: 37
37 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0419.pdf Column count: 6 Row count: 37
38 of 110
tabula found 18 tables. Switching to Tesseract OCR to extract data.
TA ty 295.7 985,700 8.24% 289.8 689,900 8.01% 307.6 eM LIT H Pad
City of Toronto 297.5 964,600 10.43% 278.3
TREB_MLS_HPI_Public_Tables_0420.pdf Column count: 6 Row count: 35
39 of 110
tabula found 18 tables. Switching to Tesseract OCR t

  File "<ipython-input-444-969ac060c470>", line 207, in export_city_of_toronto_indices_from_report
    df = self.extract_city_of_toronto_from_report(filename)
  File "<ipython-input-444-969ac060c470>", line 195, in extract_city_of_toronto_from_report
    dataframe_values[column] = index_values[i]


1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0514.pdf Column count: 6 Row count: 37
43 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0515.pdf Column count: 6 Row count: 37
44 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0516.pdf Column count: 6 Row count: 37
45 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0517.pdf Column count: 6 Row count: 37
46 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0518.pdf Column count: 6 Row count: 37
47 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0519.pdf Column count: 6 Row count: 37
48 of 110
tabula found 18 tables. Switching to Tesseract OCR to extract data.
966,900 9.79% 278.5 7.57% 294.7 982,300 TE Ky ate TH 8.87% 307.6 RRA SITY MP
TREB_MLS_HPI_Public_Tables_0520.pdf Column count: 6 Row count: 35
49 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0612.pdf Column 

  File "<ipython-input-444-969ac060c470>", line 207, in export_city_of_toronto_indices_from_report
    df = self.extract_city_of_toronto_from_report(filename)
  File "<ipython-input-444-969ac060c470>", line 195, in extract_city_of_toronto_from_report
    dataframe_values[column] = index_values[i]


1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0714.pdf Column count: 6 Row count: 37
61 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0715.pdf Column count: 6 Row count: 37
62 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0716.pdf Column count: 6 Row count: 37
63 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0717.pdf Column count: 6 Row count: 37
64 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0718.pdf Column count: 6 Row count: 37
65 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0719.pdf Column count: 6 Row count: 37
66 of 110
tabula found 18 tables. Switching to Tesseract OCR to extract data.
TREB_MLS_HPI_Public_Tables_0720.pdf Column count: 6 Row count: 35
67 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0812.pdf Column count: 6 Row count: 37
68 of 110
tabula found 2 tables. Switching to Tesserac

  File "<ipython-input-444-969ac060c470>", line 207, in export_city_of_toronto_indices_from_report
    df = self.extract_city_of_toronto_from_report(filename)
  File "<ipython-input-444-969ac060c470>", line 195, in extract_city_of_toronto_from_report
    dataframe_values[column] = index_values[i]


1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0814.pdf Column count: 6 Row count: 37
70 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0815.pdf Column count: 6 Row count: 37
71 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0816.pdf Column count: 6 Row count: 37
72 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0817.pdf Column count: 6 Row count: 37
73 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0818.pdf Column count: 6 Row count: 37
74 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0819.pdf Column count: 6 Row count: 37
75 of 110
tabula found 18 tables. Switching to Tesseract OCR to extract data.
City of Toronto 299.7 971,800 9.34% 288.8 1,267,200 11.85%
TREB_MLS_HPI_Public_Tables_0820.pdf Column count: 6 Row count: 35
76 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0912.pdf Column count: 6 Row count

  File "<ipython-input-444-969ac060c470>", line 207, in export_city_of_toronto_indices_from_report
    df = self.extract_city_of_toronto_from_report(filename)
  File "<ipython-input-444-969ac060c470>", line 195, in extract_city_of_toronto_from_report
    dataframe_values[column] = index_values[i]


1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0914.pdf Column count: 6 Row count: 37
79 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0915.pdf Column count: 6 Row count: 37
80 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0916.pdf Column count: 6 Row count: 37
81 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0917.pdf Column count: 6 Row count: 37
82 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_0918.pdf Column count: 6 Row count: 37
83 of 110
Error while processing TREB_MLS_HPI_Public_Tables_0919.pdf
84 of 110


  File "<ipython-input-444-969ac060c470>", line 207, in export_city_of_toronto_indices_from_report
    df = self.extract_city_of_toronto_from_report(filename)
  File "<ipython-input-444-969ac060c470>", line 113, in extract_city_of_toronto_from_report
    dfs = tabula.read_pdf(report_file, pages=3, silent=True)
  File "C:\Users\Sinan Ozel\GitHub\toronto-msi\.toronto-msi\lib\site-packages\tabula\io.py", line 317, in read_pdf
    raise ValueError(


tabula found 18 tables. Switching to Tesseract OCR to extract data.
971,500 Aa 1,273,800 Va 307.1 1,023,600 11.23% 729,200 11.27% 619,200 6.79%
TREB_MLS_HPI_Public_Tables_0920.pdf Column count: 6 Row count: 35
85 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_1012.pdf Column count: 6 Row count: 37
86 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_1013.pdf Column count: 6 Row count: 37
87 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_1014.pdf Column count: 6 Row count: 37
88 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_1015.pdf Column count: 6 Row count: 37
89 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_1016.pdf Column count: 6 Row count: 37
90 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_1017.pdf Column count: 6 Row count: 37
91 of 110
1 table found. Using tabula to extract.
TREB_MLS_HPI_Public_Tables_1018.pdf Column c