In [1]:
pip install beautifulsoup4 selenium


Note: you may need to restart the kernel to use updated packages.


In [2]:
pip install selenium --upgrade


Note: you may need to restart the kernel to use updated packages.


This program is designed to gather property links from a real estate website using automated web browsing tools, specifically a library called Selenium. This lets the program act like a person using a web browser, scrolling through pages, waiting for new content to appear, and then grabbing the links to properties.

The program is smart enough to handle lots of pages efficiently. Instead of going through each page one by one, it can work on several pages at the same time. This is done using something called multithreading, which is like having multiple hands all working together. And if the program runs into any problems, like a page not loading properly, it will try again a few times before moving on.

Once all the property links are collected, the program shows them so you can see the results. This makes it easy to check the work and use those links for other things later on.

In [67]:
import requests

In [14]:
import time
import threading
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import WebDriverException

MAX_RETRIES = 3

def extract_links(driver):
    links = []

    max_scrolls = 20
    scrolls = 0

    driver.execute_script("window.scrollTo(0, document.body.scrollHeight/2);")
    time.sleep(20)

    while True:
        try:
            house_links_elements = driver.find_elements(By.CSS_SELECTOR, "a.ember-view.listing-tile-info")

            for link in house_links_elements:
                link_href = link.get_attribute('href')
                if link_href not in links:
                    links.append(link_href)

            if len(links) >= 10:
                break

            driver.execute_script("window.scrollBy(0, 500);")
            time.sleep(3)

            scrolls += 1
            if scrolls >= max_scrolls:
                break

        except WebDriverException:
            return []

    return links

def extract_links_from_pages(driver, start_page, end_page):
    all_links = []
    base_url = 'https://www.realestate.co.nz/residential/rental/auckland/auckland-city?by=oldest'
    driver.execute_script("document.body.style.zoom = '0.5'")
    for i in range(start_page, end_page + 1):
        retries = 0
        success = False
        while retries < MAX_RETRIES and not success:
            try:
                if i > 1:
                    url = f"{base_url}&page={i}"
                else:
                    url = base_url
                driver.get(url)
                page_links = extract_links(driver)
                all_links.extend(page_links)
                print(f"Extracted {len(page_links)} links from page {i}")
                success = True
            except WebDriverException:
                retries += 1
                print(f"Error loading page {i}. Retrying ({retries}/{MAX_RETRIES})...")
                if retries == MAX_RETRIES:
                    print(f"Failed to load page {i} after {MAX_RETRIES} retries. Moving to next page.")

    return all_links

def start_extraction_thread(start_page, end_page):
    options = webdriver.ChromeOptions()
    prefs = {
        "profile.managed_default_content_settings.images": 2,
        "profile.default_content_setting_values.notifications": 2,
        "profile.default_content_setting_values.popups": 2
    }
    options.add_experimental_option("prefs", prefs)
    options.add_argument("--disable-popup-blocking")

    while start_page <= end_page:
        try:
            driver = webdriver.Chrome(options=options)
            driver.set_script_timeout(300)
            links = extract_links_from_pages(driver, start_page, end_page)
            for link in links:
                print(link)
            driver.quit()
            break  # Exit the loop if extraction was successful
        except WebDriverException:
            print("Driver crashed. Restarting...")
            start_page = end_page  # Adjust the start_page if needed

if __name__ == "__main__":
    t1 = threading.Thread(target=start_extraction_thread, args=(1, 20))
    t2 = threading.Thread(target=start_extraction_thread, args=(21, 40))

    t1.start()
    t2.start()

    t1.join()
    t2.join()



Extracted 13 links from page 21
Extracted 11 links from page 1
Extracted 11 links from page 22
Extracted 10 links from page 2
Extracted 10 links from page 3
Extracted 10 links from page 23
Extracted 15 links from page 4
Extracted 13 links from page 24
Extracted 12 links from page 5
Error loading page 25. Retrying (1/3)...
Error loading page 6. Retrying (1/3)...
Error loading page 25. Retrying (2/3)...
Extracted 12 links from page 6
Error loading page 25. Retrying (3/3)...
Failed to load page 25 after 3 retries. Moving to next page.
Extracted 17 links from page 7
Extracted 12 links from page 26
Error loading page 8. Retrying (1/3)...
Error loading page 27. Retrying (1/3)...
Extracted 10 links from page 8
Error loading page 27. Retrying (2/3)...
Extracted 10 links from page 9
Extracted 10 links from page 10
Extracted 10 links from page 27
Extracted 10 links from page 11
Extracted 10 links from page 28
Extracted 11 links from page 29
Extracted 10 links from page 12
Extracted 10 links from

Extracted 10 links from page 38
Extracted 10 links from page 39
Extracted 5 links from page 40
https://www.realestate.co.nz/42435003/residential/rent/2011-11-liverpool-street-auckland-central
https://www.realestate.co.nz/42435098/residential/rent/502-great-north-road-grey-lynn
https://www.realestate.co.nz/42435158/residential/rent/6-magma-crescent-stonefields
https://www.realestate.co.nz/42435172/residential/rent/811-76-wakefield-street-auckland-central
https://www.realestate.co.nz/42435177/residential/rent/30a-dromorne-road-remuera
https://www.realestate.co.nz/42435216/residential/rent/3-365-great-south-road-ellerslie
https://www.realestate.co.nz/42435227/residential/rent/3d-31-scanlan-street-grey-lynn
https://www.realestate.co.nz/42435257/residential/rent/5a-pasteur-place-new-windsor
https://www.realestate.co.nz/42435288/residential/rent/24-paku-lane-mount-wellington
https://www.realestate.co.nz/42435365/residential/rent/a-23-watea-road-sandringham
https://www.realestate.co.nz/424353

In [68]:
url = 'https://raw.githubusercontent.com/robertoaltran/Population/main/links.txt'
response = requests.get(url)
sites = response.text.splitlines()

# Remover qualquer espaço em branco ou caracteres de nova linha no final de cada linha
sites = [site.strip() for site in sites]


In [272]:
quantity_elements = len(sites)
print("Elements:", quantity_elements)

Elements: 389


In [269]:
sites.sort()

In [None]:
sites

In [270]:
sites = [url for url in sites if not url.endswith('rent')]
sites.remove('Extracted 10 links from page 20')


In [271]:
siteurls = sites[:]

In [None]:
quantity_elements = len(sites)
print("Elements:", quantity_elements)

After collecting the links and evaluating their quality, we removed any potentially unreliable links. We ended up with 389 valid elements to be visited.

-----------------------------------------------------------

This script is a web scraper that utilizes Selenium, an automation tool, to navigate and fetch data from specific websites. Its primary purpose is to extract information about properties, such as addresses and prices, from provided URLs and store this data neatly into a structured format.

To maximize efficiency and speed, the script breaks down the list of websites into chunks and processes them simultaneously, using a technique called multithreading. This is akin to deploying multiple virtual browsers all working together. If the program encounters issues while scraping a site, it's designed to retry a couple of times before logging an error and proceeding to the next site.

After all the property information is compiled, the script saves everything into a CSV file for easy viewing and further analysis. This ensures that even large amounts of data are preserved and presented in a user-friendly manner.

In [48]:
import threading
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException, TimeoutException
import pandas as pd

MAX_RETRIES = 2

def initialize_driver():
    options = webdriver.ChromeOptions()
    prefs = {
        "profile.managed_default_content_settings.images": 2,
        "profile.default_content_setting_values.notifications": 2,
        "profile.default_content_setting_values.popups": 2
    }
    options.add_experimental_option("prefs", prefs)
    driver = webdriver.Chrome(options=options)
    driver.maximize_window()
    return driver

def extract_info_chunk(start_idx, end_idx, driver, info_chunk_list):
    for i in range(start_idx, end_idx):
        url = sites[i]
        info_dict = extract_info((url, driver))
        info_chunk_list.append(info_dict)
        print(info_dict)  

def extract_info(args):
    url, driver = args
    info_dict = {}
    
    retries = 0
    while retries < MAX_RETRIES:
        try:
            driver.get(url)
            print(f"Scraping: {url}")

            address_element = driver.find_element(By.CSS_SELECTOR, "h1[data-test='listing-title']")
            address = address_element.text.strip()
            info_dict["Address"] = address

            price_element = driver.find_element(By.CSS_SELECTOR, "h3.text-xl.font-semibold")
            price = price_element.text.strip()
            info_dict["Price"] = price

            elements = driver.find_elements(By.CSS_SELECTOR, "span.leading-\\[-1\\]")
            max_elements_to_extract = 8
            for i, element in enumerate(elements, start=1):
                if i > max_elements_to_extract:
                    break
                text = element.text.strip()
                info_dict[f"Info_{i}"] = text

            if len(info_dict) >= 6:
                break

        except (NoSuchElementException, TimeoutException) as e:
            retries += 1
            if retries == MAX_RETRIES:
                info_dict["Error"] = str(e)
                break

    return info_dict

if __name__ == "__main__":
   
    drivers = [initialize_driver() for _ in range(3)]
   
    info_list1 = []
    info_list2 = []
    info_list3 = []

    third_length = len(siteurls) // 3

    t1 = threading.Thread(target=extract_info_chunk, args=(0, third_length, drivers[0], info_list1))
    t2 = threading.Thread(target=extract_info_chunk, args=(third_length, 2 * third_length, drivers[1], info_list2))
    t3 = threading.Thread(target=extract_info_chunk, args=(2 * third_length, len(sites), drivers[2], info_list3))

    # iniciating threads
    t1.start()
    t2.start()
    t3.start()

    t1.join()
    t2.join()
    t3.join()

    # joinning results
    info_list = info_list1 + info_list2 + info_list3

    # closing drivers
    for driver in drivers:
        driver.quit()

    df = pd.DataFrame(info_list)
    df.to_csv('scraped_data.csv', index=False)
    print("\nScraping results:")
    print(df)



Scraping: https://www.realestate.co.nz/42438043/residential/rent/11c-athens-road-one-tree-hill
Scraping: https://www.realestate.co.nz/42426231/residential/rent/2-1976-great-north-road-avondale
Scraping: https://www.realestate.co.nz/3745538/residential/rent/20-9a-esplanade-road-mount-eden
{'Address': '20/9A Esplanade Road, Mount Eden, Auckland City', 'Price': '$360 per week', 'Info_1': 'Unit', 'Info_2': '1', 'Info_3': '1', 'Info_4': '1', 'Info_5': '', 'Info_6': '', 'Info_7': '', 'Info_8': ''}
{'Address': '11C Athens Road, One Tree Hill, Auckland City', 'Price': '$1,000 per week', 'Info_1': 'House', 'Info_2': '4', 'Info_3': '3', 'Info_4': '1', 'Info_5': '', 'Info_6': '', 'Info_7': '', 'Info_8': ''}
{'Address': '2/1976 Great North Road, Avondale, Auckland City', 'Price': '$590 per week', 'Info_1': 'Apartment', 'Info_2': '2', 'Info_3': '1', 'Info_4': '', 'Info_5': '', 'Info_6': '', 'Info_7': '1 storey'}
Scraping: https://www.realestate.co.nz/42187622/residential/rent/46-montrose-street-poi

Scraping: https://www.realestate.co.nz/42438076/residential/rent/108-223a-greenlane-west-epsom
Scraping: https://www.realestate.co.nz/42427859/residential/rent/7-elam-street-parnell
{'Address': '108/223a Greenlane West , Epsom, Auckland City', 'Price': '$770 per week', 'Info_1': 'Apartment', 'Info_2': '2', 'Info_3': '2', 'Info_4': '1', 'Info_5': '1 ensuite', 'Info_6': '', 'Info_7': '', 'Info_8': ''}
Scraping: https://www.realestate.co.nz/42214955/residential/rent/4g-16-market-place-viaduct-auckland-central
Scraping: https://www.realestate.co.nz/42438319/residential/rent/6-52-ascot-avenue-remuera
{'Address': '7 Elam Street, Parnell, Auckland City', 'Price': '$1,895 per week', 'Info_1': 'House', 'Info_2': '3', 'Info_3': '3', 'Info_4': '265m2', 'Info_5': '2', 'Info_6': '', 'Info_7': '', 'Info_8': ''}
{'Address': '6/52 Ascot Avenue, Remuera, Auckland City', 'Price': '$690 per week', 'Info_1': 'Townhouse', 'Info_2': '3', 'Info_3': '1', 'Info_4': '2', 'Info_5': '2', 'Info_6': '', 'Info_7': '

Scraping: https://www.realestate.co.nz/42428693/residential/rent/65-selwyn-avenue-mission-bay
Scraping: https://www.realestate.co.nz/42438709/residential/rent/37-tamora-lane-avondale
{'Address': '65 Selwyn Avenue, Mission Bay, Auckland City', 'Price': '$795 per week', 'Info_1': 'House', 'Info_2': '3', 'Info_3': '2', 'Info_4': '2', 'Info_5': 'Non furnished', 'Info_6': '', 'Info_7': '', 'Info_8': ''}
{'Address': '37 Tamora Lane, Avondale, Auckland City', 'Price': '$930 per week', 'Info_1': 'Townhouse', 'Info_2': '4', 'Info_3': '2', 'Info_4': '1', 'Info_5': '1', 'Info_6': '', 'Info_7': '', 'Info_8': ''}
{'Error': 'Message: timeout: Timed out receiving message from renderer: 288.820\n  (Session info: chrome=118.0.5993.70)\nStacktrace:\n0   chromedriver                        0x0000000100864510 chromedriver + 4310288\n1   chromedriver                        0x000000010085c4bc chromedriver + 4277436\n2   chromedriver                        0x000000010048fb6c chromedriver + 293740\n3   chrome

Scraping: https://www.realestate.co.nz/42439020/residential/rent/83-empire-road-epsom
{'Address': '10 Green Lane East, Remuera, Auckland City', 'Price': '$1,250 per week', 'Info_1': 'House', 'Info_2': '5', 'Info_3': '2', 'Info_4': '2', 'Info_5': '4', 'Info_6': 'Non furnished', 'Info_7': '', 'Info_8': ''}
{'Address': '83 Empire Road, Epsom, Auckland City', 'Price': '$1,750 per week', 'Info_1': 'House', 'Info_2': '5', 'Info_3': '1', 'Info_4': '2', 'Info_5': '2', 'Info_6': 'Non furnished', 'Info_7': '2 ensuites', 'Info_8': ''}
Scraping: https://www.realestate.co.nz/42430172/residential/rent/23-paget-street-freemans-bay
Scraping: https://www.realestate.co.nz/42439030/residential/rent/3-6-sarsfield-street-herne-bay
Scraping: https://www.realestate.co.nz/42336124/residential/rent/g06-246-khyber-pass-rd-newmarket
{'Address': '23 Paget Street, Freemans Bay, Auckland City', 'Price': '$2,450 per week', 'Info_1': 'House', 'Info_2': '3', 'Info_3': '3', 'Info_4': '1', 'Info_5': '', 'Info_6': '', 'I

Scraping: https://www.realestate.co.nz/42366991/residential/rent/33-139-quay-street-auckland-central
Scraping: https://www.realestate.co.nz/42430690/residential/rent/66b-queenstown-road-onehunga
Scraping: https://www.realestate.co.nz/42439743/residential/rent/3-exler-place-avondale
{'Address': '33/139 Quay Street, Auckland Central, Auckland City', 'Price': '$825 per week', 'Info_1': 'Apartment', 'Info_2': '1', 'Info_3': '1', 'Info_4': 'Furnished', 'Info_5': '', 'Info_6': '', 'Info_7': '', 'Info_8': ''}
Scraping: https://www.realestate.co.nz/42367499/residential/rent/1-51-aranui-road-mount-wellington
{'Address': '3 Exler Place, Avondale, Auckland City', 'Price': '$1,100 per week', 'Info_1': 'House', 'Info_2': '5', 'Info_3': '2', 'Info_4': '2', 'Info_5': '2', 'Info_6': '1 ensuite', 'Info_7': '', 'Info_8': ''}
{'Address': '1/51 Aranui Road, Mount Wellington, Auckland City', 'Price': '$950 per week', 'Info_1': 'Townhouse', 'Info_2': '4', 'Info_3': '3', 'Info_4': '3', 'Info_5': 'Non furnish

{'Address': '3/285 Point Chevalier Road, Point Chevalier, Auckland City', 'Price': '$980 per week', 'Info_1': 'House', 'Info_2': '4', 'Info_3': '2', 'Info_4': '2', 'Info_5': '', 'Info_6': '', 'Info_7': '', 'Info_8': ''}
Scraping: https://www.realestate.co.nz/42431811/residential/rent/5-15-henry-street-avondale
{'Address': '5/15 Henry Street, Avondale, Auckland City', 'Price': '$790 per week', 'Info_1': 'Townhouse', 'Info_2': '3', 'Info_3': '2', 'Info_4': '1', 'Info_5': 'Non furnished', 'Info_6': '', 'Info_7': '', 'Info_8': ''}
{'Address': '23E Windsor Street, Parnell, Auckland City', 'Price': '$1,600 per week', 'Info_1': 'Townhouse', 'Info_2': '4', 'Info_3': '4', 'Info_4': 'Non furnished', 'Info_5': '', 'Info_6': '', 'Info_7': '', 'Info_8': ''}
Scraping: https://www.realestate.co.nz/42381420/residential/rent/6-winchester-street-grey-lynn
Scraping: https://www.realestate.co.nz/42432225/residential/rent/58-margot-street-epsom
{'Address': '6 Winchester Street, Grey Lynn, Auckland City', '

Scraping: https://www.realestate.co.nz/42440321/residential/rent/4-hadlow-terrace-grey-lynn
{'Address': '4 Hadlow Terrace, Grey Lynn, Auckland City', 'Price': '$830 per week', 'Info_1': 'Apartment', 'Info_2': '2', 'Info_3': '3', 'Info_4': '1', 'Info_5': '', 'Info_6': '', 'Info_7': '', 'Info_8': ''}
Scraping: https://www.realestate.co.nz/42440335/residential/rent/6-hadlow-terrace-grey-lynn
Scraping: https://www.realestate.co.nz/42387203/residential/rent/2-14-richard-farrell-avenue-remuera
Scraping: https://www.realestate.co.nz/42432639/residential/rent/4a-westwood-terrace-saint-marys-bay
{'Address': '6 Hadlow Terrace, Grey Lynn, Auckland City', 'Price': '$830 per week', 'Info_1': 'House', 'Info_2': '2', 'Info_3': '3', 'Info_4': '1', 'Info_5': '', 'Info_6': '', 'Info_7': '', 'Info_8': ''}
{'Address': '2/14 Richard Farrell Avenue, Remuera, Auckland City', 'Price': '$420 per week', 'Info_1': 'House', 'Info_2': '1', 'Info_3': '1', 'Info_4': '1', 'Info_5': 'Non furnished', 'Info_6': '', 'Inf

Scraping: https://www.realestate.co.nz/42387476/residential/rent/8-hereford-street-freemans-bay
Scraping: https://www.realestate.co.nz/42440447/residential/rent/57-tuarangi-rd-grey-lynn
{'Address': '8 Hereford Street, Freemans Bay, Auckland City', 'Price': '$655 per week', 'Info_1': 'Apartment', 'Info_2': '1', 'Info_3': '1', 'Info_4': '1', 'Info_5': '', 'Info_6': '', 'Info_7': '', 'Info_8': ''}
{'Address': '57 Tuarangi Rd, Grey Lynn, Auckland City', 'Price': '$1,250 per week', 'Info_1': 'House', 'Info_2': '3', 'Info_3': '2', 'Info_4': '2', 'Info_5': '', 'Info_6': '', 'Info_7': '', 'Info_8': ''}
Scraping: https://www.realestate.co.nz/42432832/residential/rent/321-kohimaramara-rd-st-heliers
Scraping: https://www.realestate.co.nz/42387672/residential/rent/25-gibraltar-crescent-parnell
{'Address': '321 Kohimaramara Rd, St Heliers, Auckland City', 'Price': '$1,150 per week', 'Info_1': 'House', 'Info_2': '5', 'Info_3': '2', 'Info_4': '2', 'Info_5': '', 'Info_6': '', 'Info_7': '', 'Info_8': '

{'Address': '1M/94 Dominion Road, Mount Eden, Auckland City', 'Price': '$520 per week', 'Info_1': 'Apartment', 'Info_2': '1', 'Info_3': '1', 'Info_4': '', 'Info_5': '', 'Info_6': '', 'Info_7': '1 storey'}
Scraping: https://www.realestate.co.nz/42399892/residential/rent/10-277-mt-eden-road-mount-eden
{'Address': '10/277 Mt Eden Road, Mount Eden, Auckland City', 'Price': '$675 per week', 'Info_1': 'Apartment', 'Info_2': '3', 'Info_3': '1', 'Info_4': 'Non furnished', 'Info_5': '', 'Info_6': '', 'Info_7': '', 'Info_8': ''}
Scraping: https://www.realestate.co.nz/42433494/residential/rent/c604-176-broadway-newmarket
Scraping: https://www.realestate.co.nz/42400499/residential/rent/31-day-street-auckland-central
Scraping: https://www.realestate.co.nz/42440850/residential/rent/3-9-sentinel-road-herne-bay
{'Address': 'C604/176 Broadway, Newmarket, Auckland City', 'Price': '$600 per week', 'Info_1': 'Apartment', 'Info_2': '2', 'Info_3': '2', 'Info_4': '1', 'Info_5': '', 'Info_6': '', 'Info_7': ''

Exception in thread Thread-46:
Traceback (most recent call last):
  File "/Users/robertoaltran/opt/anaconda3/lib/python3.9/threading.py", line 973, in _bootstrap_inner
    self.run()
  File "/Users/robertoaltran/opt/anaconda3/lib/python3.9/threading.py", line 910, in run
    self._target(*self._args, **self._kwargs)
  File "/var/folders/52/xsl0gx4d6rnfwvwr1czp017w0000gn/T/ipykernel_85579/3099470490.py", line 24, in extract_info_chunk
  File "/var/folders/52/xsl0gx4d6rnfwvwr1czp017w0000gn/T/ipykernel_85579/3099470490.py", line 39, in extract_info
  File "/Users/robertoaltran/opt/anaconda3/lib/python3.9/site-packages/selenium/webdriver/remote/webelement.py", line 89, in text
    return self._execute(Command.GET_ELEMENT_TEXT)["value"]
  File "/Users/robertoaltran/opt/anaconda3/lib/python3.9/site-packages/selenium/webdriver/remote/webelement.py", line 394, in _execute
    return self._parent.execute(command, params)
  File "/Users/robertoaltran/opt/anaconda3/lib/python3.9/site-packages/sel

{'Address': '602/145 Symonds Street, Eden Terrace, Auckland City', 'Price': '$700 per week', 'Info_1': 'Apartment', 'Info_2': '1', 'Info_3': '1', 'Info_4': 'Furnished', 'Info_5': '', 'Info_6': '', 'Info_7': '', 'Info_8': ''}
Scraping: https://www.realestate.co.nz/42433799/residential/rent/72-nelson-street-auckland-central
Scraping: https://www.realestate.co.nz/42401201/residential/rent/4a-prospero-terrace-mount-albert
{'Address': '72 Nelson Street, Auckland Central, Auckland City', 'Price': '$475 per week', 'Info_1': 'Apartment', 'Info_2': '2', 'Info_3': '1', 'Info_4': '', 'Info_5': '', 'Info_6': '', 'Info_7': '7 storey'}
Scraping: https://www.realestate.co.nz/42433827/residential/rent/62-olsen-avenue-hillsborough
{'Address': '4A Prospero Terrace, Mount Albert, Auckland City', 'Price': '$525 per week', 'Info_1': 'Unit', 'Info_2': '1', 'Info_3': '1', 'Info_4': '1', 'Info_5': 'Non furnished', 'Info_6': '', 'Info_7': '', 'Info_8': ''}
{'Address': '62 Olsen Avenue, Hillsborough, Auckland C

Scraping: https://www.realestate.co.nz/42407575/residential/rent/35-chesterfield-ave-glendowie
Scraping: https://www.realestate.co.nz/42434557/residential/rent/168b-whitney-street-blockhouse-bay
{'Address': '35 Chesterfield Ave, Glendowie, Auckland City', 'Price': '$1,250 per week', 'Info_1': 'Townhouse', 'Info_2': '3', 'Info_3': '2', 'Info_4': '1', 'Info_5': '1', 'Info_6': 'Non furnished', 'Info_7': '', 'Info_8': ''}
{'Address': '168B Whitney Street, Blockhouse Bay, Auckland City', 'Price': '$745 per week', 'Info_1': 'Townhouse', 'Info_2': '3', 'Info_3': '1', 'Info_4': '1', 'Info_5': 'Non furnished', 'Info_6': '', 'Info_7': '', 'Info_8': ''}
Scraping: https://www.realestate.co.nz/42434605/residential/rent/26-eaton-road-hillsborough
{'Address': '26 Eaton road, Hillsborough, Auckland City', 'Price': '$850 per week', 'Info_1': 'House', 'Info_2': '4', 'Info_3': '2', 'Info_4': '1', 'Info_5': '', 'Info_6': '', 'Info_7': '', 'Info_8': ''}
Scraping: https://www.realestate.co.nz/42407600/resid

Scraping: https://www.realestate.co.nz/42435392/residential/rent/26-old-mill-road-grey-lynn
Scraping: https://www.realestate.co.nz/42411102/residential/rent/1-clare-place-mount-wellington
{'Address': '26 Old Mill Road , Grey Lynn, Auckland City', 'Price': '$2,450 per week', 'Info_1': 'House', 'Info_2': '4', 'Info_3': '2', 'Info_4': '284m2', 'Info_5': '2', 'Info_6': '', 'Info_7': '', 'Info_8': ''}
{'Address': '1 Clare Place, Mount Wellington, Auckland City', 'Price': '$690 per week', 'Info_1': 'House', 'Info_2': '3', 'Info_3': '1', 'Info_4': '', 'Info_5': '', 'Info_6': '', 'Info_7': '1 storey'}
Scraping: https://www.realestate.co.nz/42435397/residential/rent/2-6-prebble-place-mission-bay
Scraping: https://www.realestate.co.nz/42411202/residential/rent/18-charlton-avenue-mount-eden
{'Address': '2/6 Prebble Place, Mission Bay, Auckland City', 'Price': '$775 per week', 'Info_1': 'Unit', 'Info_2': '3', 'Info_3': '1', 'Info_4': '1', 'Info_5': '', 'Info_6': '', 'Info_7': '', 'Info_8': ''}
{'A

Scraping: https://www.realestate.co.nz/42416147/residential/rent/7-22-oak-street-royal-oak
{'Address': '7/22 Oak Street, Royal Oak, Auckland City', 'Price': '$320 per week', 'Info_1': 'House', 'Info_2': '1', 'Info_3': '1', 'Info_4': 'Non furnished', 'Info_5': '', 'Info_6': '', 'Info_7': '', 'Info_8': ''}
Scraping: https://www.realestate.co.nz/42436212/residential/rent/409-4-dockside-lane-auckland-central
{'Error': 'Message: timeout: Timed out receiving message from renderer: 247.218\n  (Session info: chrome=118.0.5993.70)\nStacktrace:\n0   chromedriver                        0x0000000100864510 chromedriver + 4310288\n1   chromedriver                        0x000000010085c4bc chromedriver + 4277436\n2   chromedriver                        0x000000010048fb6c chromedriver + 293740\n3   chromedriver                        0x0000000100477f54 chromedriver + 196436\n4   chromedriver                        0x0000000100477d60 chromedriver + 195936\n5   chromedriver                        0x0000

Scraping: https://www.realestate.co.nz/42436759/residential/rent/5d-18-cranbrook-place-glendowie
Scraping: https://www.realestate.co.nz/42418729/residential/rent/102-taniwha-street-waiotaiki-bay
{'Address': '5D/18 Cranbrook Place, Glendowie, Auckland City', 'Price': '$755 per week', 'Info_1': 'House', 'Info_2': '3', 'Info_3': '1', 'Info_4': '90m2', 'Info_5': '2', 'Info_6': '', 'Info_7': '', 'Info_8': ''}
{'Address': '102 Taniwha Street, Waiotaiki Bay, Auckland City', 'Price': '$600 per week', 'Info_1': 'House', 'Info_2': '2', 'Info_3': '1', 'Info_4': '1', 'Info_5': '', 'Info_6': '', 'Info_7': '', 'Info_8': ''}
Scraping: https://www.realestate.co.nz/42436849/residential/rent/52b-king-george-avenue-epsom
Scraping: https://www.realestate.co.nz/42418757/residential/rent/6-960-new-north-road-mount-albert
{'Address': '52B King George Avenue, Epsom, Auckland City', 'Price': '$1,400 per week', 'Info_1': 'House', 'Info_2': '5', 'Info_3': '3', 'Info_4': '2', 'Info_5': '1', 'Info_6': 'Non furnish

{'Address': 'a/1 Hoheria Road, Onehunga, Auckland City', 'Price': '$680 per week', 'Info_1': 'House', 'Info_2': '3', 'Info_3': '1', 'Info_4': '2', 'Info_5': '', 'Info_6': '', 'Info_7': '', 'Info_8': ''}
Scraping: https://www.realestate.co.nz/42421471/residential/rent/4h-19-fleet-street-eden-terrace
Scraping: https://www.realestate.co.nz/42437161/residential/rent/2205-32-swanson-street-auckland-central
{'Address': '4H/19 Fleet Street, Eden Terrace, Auckland City', 'Price': '$695 per week', 'Info_1': 'House', 'Info_2': '2', 'Info_3': '1', 'Info_4': '1', 'Info_5': '', 'Info_6': '', 'Info_7': '', 'Info_8': ''}
{'Address': '2205/32 Swanson Street, Auckland Central, Auckland City', 'Price': '$460 per week', 'Info_1': 'Apartment', 'Info_2': '1', 'Info_3': '1', 'Info_4': 'Non furnished', 'Info_5': '', 'Info_6': '', 'Info_7': '', 'Info_8': ''}
Scraping: https://www.realestate.co.nz/42421608/residential/rent/104-2-finch-street-morningside
Scraping: https://www.realestate.co.nz/42437182/residenti

Scraping: https://www.realestate.co.nz/42425338/residential/rent/4-183-mt-albert-road-sandringham
{'Address': '19/1 Glenside Crescent, Auckland Central, Auckland City', 'Price': '$750 per week', 'Info_1': 'Townhouse', 'Info_2': '2', 'Info_3': '2', 'Info_4': 'Non furnished', 'Info_5': '', 'Info_6': '', 'Info_7': '', 'Info_8': ''}
{'Address': '4/183 Mt Albert Road, Sandringham, Auckland City', 'Price': '$620 per week', 'Info_1': 'Unit', 'Info_2': '2', 'Info_3': '1', 'Info_4': '1', 'Info_5': '1', 'Info_6': 'Non furnished', 'Info_7': '', 'Info_8': ''}
Scraping: https://www.realestate.co.nz/42437997/residential/rent/3-296-ellerslie-panmure-highway-mount-wellington
Scraping: https://www.realestate.co.nz/42425389/residential/rent/6-245-gillies-avenue-epsom
{'Address': '3/296 Ellerslie Panmure Highway, Mount Wellington, Auckland City', 'Price': '$585 per week', 'Info_1': 'Unit', 'Info_2': '2', 'Info_3': '1', 'Info_4': '1', 'Info_5': 'Non furnished', 'Info_6': '', 'Info_7': '', 'Info_8': ''}
{'

Scraping: https://www.realestate.co.nz/42438017/residential/rent/8i-508-queen-street-auckland-central
{'Address': '8I/508 Queen Street, Auckland Central, Auckland City', 'Price': '$400 per week', 'Info_1': 'Apartment', 'Info_2': '1', 'Info_3': '1', 'Info_4': 'Furnished', 'Info_5': '', 'Info_6': '', 'Info_7': '', 'Info_8': ''}
Scraping: https://www.realestate.co.nz/42426148/residential/rent/1a-1-haverstock-sandringham
Scraping: https://www.realestate.co.nz/42438019/residential/rent/1-24-douglas-road-mount-eden
{'Address': '1A/1 Haverstock, Sandringham, Auckland City', 'Price': '$670 per week', 'Info_1': 'House', 'Info_2': '3', 'Info_3': '1', 'Info_4': '1', 'Info_5': '1', 'Info_6': '', 'Info_7': '', 'Info_8': ''}
{'Address': '1/24 Douglas Road, Mount Eden, Auckland City', 'Price': '$670 per week', 'Info_1': 'House', 'Info_2': '2', 'Info_3': '1', 'Info_4': '1', 'Info_5': 'Non furnished', 'Info_6': '', 'Info_7': '', 'Info_8': ''}
{'Error': 'Message: timeout: Timed out receiving message fro

"The script ran smoothly; however, it took about 15 hours to process all 389 links."





In [238]:
import re

url = "https://raw.githubusercontent.com/robertoaltran/Population/main/scraped_data.csv"
df = pd.read_csv(url)
df = df.dropna(subset=['Address'])
temp_filename = 'temp.csv'
df.to_csv(temp_filename, index=False)
with open(temp_filename, 'r') as file:
    content = file.read()
corrected_content = re.sub(r'\n(?=[^"])', ' ', content)
corrected_temp_filename = 'corrected_temp.csv'
with open(corrected_temp_filename, 'w') as file:
    file.write(corrected_content)
df_corrected = pd.read_csv(corrected_temp_filename)

In [254]:
df = df_corrected

In [255]:
def is_numeric(value):
    #Check if the value is numeric.
    if pd.isna(value):
        return False
    try:
        int(value)
        return True
    except ValueError:
        return False

In [256]:
column_rename = {
    'Info_1': 'House Type',
    'Info_2': 'Bedroom',
    'Info_3': 'Bathroom',
    'Info_4': 'Garage',
    'Info_5': 'Other Feature 1',
    'Info_6': 'Other Feature 2',
}

df.rename(columns=column_rename, inplace=True)

# Remover as colunas "Info 8" e "Error"
df.drop(['Info_7', 'Info_8', 'Error'], axis=1, inplace=True)

In [257]:
# Convert 'Price' column to numerical by removing unwanted strings
df['Price'] = df['Price'].astype(str)

df['Price'] = df['Price'].str.replace(r'\$', '', regex=True)
df['Price'] = df['Price'].str.replace(r' per week', '', regex=True)
df['Price'] = df['Price'].str.replace(r',', '', regex=True)
#df['Price'] = pd.to_numeric(df['Price'], errors='coerce')  # Convert to numeric (float)

# Handle numeric values in 'Bedroom', 'Bathroom' and 'Garage'
df['Bedroom'] = df['Bedroom'].apply(lambda x: int(x) if is_numeric(x) else None)
df['Bathroom'] = df['Bathroom'].apply(lambda x: int(x) if is_numeric(x) else None)
mask_non_numeric_garage = ~df['Garage'].apply(is_numeric)
df.loc[mask_non_numeric_garage, ['Garage', 'Other Feature 1', 'Other Feature 2']] = df.loc[mask_non_numeric_garage, ['Other Feature 1', 'Garage', 'Other Feature 2']].values

# Convert the columns to Int64 type
for col in ['Bedroom', 'Bathroom', 'Garage']:
    df[col] = pd.to_numeric(df[col], errors='coerce').astype(pd.Int64Dtype())

# Handle cases where 'Other Feature 2' is numeric but 'Other Feature 1' isn't
mask = df['Other Feature 2'].apply(is_numeric) & ~df['Other Feature 1'].apply(is_numeric)
df.loc[mask, ['Other Feature 1', 'Other Feature 2']] = df.loc[mask, ['Other Feature 2', 'Other Feature 1']].values

# Add up numeric 'Garage' and 'Other Feature 1' values
mask = df['Garage'].apply(is_numeric) & df['Other Feature 1'].apply(is_numeric)
df.loc[mask, 'Garage'] += df.loc[mask, 'Other Feature 1'].astype(int)
df.loc[mask, 'Other Feature 1'] = df.loc[mask, 'Other Feature 2']
df.loc[mask, 'Other Feature 2'] = None

# Handle 'Furniture'
df['Furniture'] = 0
furnished_mask = df['Other Feature 1'].str.contains('Furnished', na=False) | df['Other Feature 2'].str.contains('Furnished', na=False)
df.loc[furnished_mask, 'Furniture'] = 1
df.loc[furnished_mask, ['Other Feature 1', 'Other Feature 2']] = None

non_furnished_mask = df['Other Feature 1'].str.contains('Non furnished', na=False) | df['Other Feature 2'].str.contains('Non furnished', na=False)
df.loc[non_furnished_mask, ['Other Feature 1', 'Other Feature 2']] = None

# Impute missing values in 'House Type'
df['House Type'].fillna('Unknown', inplace=True)

# One-hot encoding for 'House Type'
df = pd.get_dummies(df, columns=['House Type'], prefix='Type')

# Ensure all categories are present and if not, add them
categories = ["House", "Apartment", "Unit", "Townhouse", "Unknown"]
for cat in categories:
    col_name = 'Type_' + cat
    if col_name not in df.columns:
        df[col_name] = 0


In [258]:
df.head(10)

Unnamed: 0,Address,Price,Bedroom,Bathroom,Garage,Other Feature 1,Other Feature 2,Furniture,Type_Apartment,Type_Carpark,Type_House,Type_Studio,Type_Townhouse,Type_Unit,Type_Unknown
0,"20/9A Esplanade Road, Mount Eden, Auckland City",360,1,1,1.0,,,0,0,0,0,0,0,1,0
1,"46 Montrose Street, Point Chevalier, Auckland ...",650,2,1,2.0,,,0,0,0,1,0,0,0,0
2,"1/351 Mount Eden Road, Mount Eden, Auckland City",530,2,1,,,,0,0,0,1,0,0,0,0
3,"64 School Road, Kingsland, Auckland City",750,3,1,2.0,,,0,0,0,1,0,0,0,0
4,"2E/25 Rutland Street, Auckland Central, Auckla...",370,1,1,,,,0,1,0,0,0,0,0,0
5,"4G/16 Market Place Viaduct, Auckland Central, ...",520,1,1,,,,0,1,0,0,0,0,0,0
6,"29/139 Quay Street, Auckland Central, Auckland...",790,1,1,,,,1,1,0,0,0,0,0,0
7,"147 Quay Street, Auckland Central, Auckland City",780,1,1,,,,1,1,0,0,0,0,0,0
8,"174A White Swan Road, Mount Roskill, Auckland ...",695,3,2,3.0,,,0,0,0,1,0,0,0,0
9,"B3/15 Scanlan Street, Grey Lynn, Auckland City",750,2,1,1.0,,,0,0,0,0,0,1,0,0


In [259]:
df['Suburb'] = df['Address'].str.split(',').str[-2].str.strip()


In [260]:
df.head()

Unnamed: 0,Address,Price,Bedroom,Bathroom,Garage,Other Feature 1,Other Feature 2,Furniture,Type_Apartment,Type_Carpark,Type_House,Type_Studio,Type_Townhouse,Type_Unit,Type_Unknown,Suburb
0,"20/9A Esplanade Road, Mount Eden, Auckland City",360,1,1,1.0,,,0,0,0,0,0,0,1,0,Mount Eden
1,"46 Montrose Street, Point Chevalier, Auckland ...",650,2,1,2.0,,,0,0,0,1,0,0,0,0,Point Chevalier
2,"1/351 Mount Eden Road, Mount Eden, Auckland City",530,2,1,,,,0,0,0,1,0,0,0,0,Mount Eden
3,"64 School Road, Kingsland, Auckland City",750,3,1,2.0,,,0,0,0,1,0,0,0,0,Kingsland
4,"2E/25 Rutland Street, Auckland Central, Auckla...",370,1,1,,,,0,1,0,0,0,0,0,0,Auckland Central


In [261]:
unique_values = df['Suburb'].unique()
string_values = [value for value in unique_values if isinstance(value, str)]
sorted_unique_values = sorted(string_values)

for value in sorted_unique_values:
    print(value)

Auckland Central
Avondale
Blockhouse Bay
Eden Terrace
Ellerslie
Epsom
Freemans Bay
Glen Innes
Glendowie
Grafton
Greenlane
Grey Lynn
Herne Bay
Hillsborough
Kingsland
Kohimarama
Meadowbank
Mission Bay
Morningside
Mount Albert
Mount Eden
Mount Roskill
Mount Wellington
New Windsor
Newmarket
One Tree Hill
Onehunga
Otahuhu
Parnell
Point Chevalier
Point England
Ponsonby
Remuera
Royal Oak
Saint Heliers
Saint Johns
Saint Marys Bay
Sandringham
St Heliers
Stonefields
Three Kings
Waiotaiki Bay
Waterview
Westmere


In [220]:
df.to_csv('RealstateRent.csv', index=False)


This program is made to fix and organize data about houses and apartments from an online file.

First, the program gets data from a website link and removes any entries without an address. It saves this data to a file, checks for any mistakes like extra lines, and then corrects them. After making these fixes, it reads the data back and gets ready for more detailed changes.

The main job of the program is to make the property data easy to understand. It gives new names to some columns to make them clearer. It removes unnecessary information and changes the price details to simple numbers by getting rid of symbols and words. The program also checks details like the number of rooms and parking spaces, making sure they are in the right place and shown as numbers. It also looks for houses that come with or without furniture, marking them correctly. At the end, it changes the property type (like "House" or "Apartment") into a format that a computer can easily understand. If any type is missing, the program adds it. Now, the cleaned-up data is ready for more use, like studying or making predictions.

----------------------------------------------------------------

Gathering good data is very important for study. For my school work, I looked into different areas using the police website and nz.statistics. I mainly used the 2018 census for information. Even though it was from 2018, it was updated in 2020, so it's still recent. As a student still learning English, I believe it's key to use trusted data for my research.

In [100]:
pd.set_option('display.max_columns', None)


In [101]:
!pip install pandas openpyxl



In [102]:
url = 'https://raw.githubusercontent.com/robertoaltran/Population/main/2018-SA1-dataset-household-AucklandRegion.csv'
data2 = pd.read_csv(url)

In [103]:
data2 = data2.iloc[:, [0, 1, 4]]
headers = data2.iloc[0]
data2 = data2[1:]
data2.columns = headers
data2.head()

Unnamed: 0,Area_Code,Area_Description,2018.0
1,110200,Okahukura Peninsula,522.0
2,110300,Inlet Kaipara Harbour South,0.0
3,110400,Cape Rodney,1275.0
4,110500,Wellsford,654.0
5,110600,Oceanic Auckland Region East,6.0


### Total households:

This metric shows the number of homes currently being lived in. It can help us gauge housing demand in an area.

In [104]:
url = 'https://raw.githubusercontent.com/robertoaltran/Population/main/2018-SA1-dataset-individual-part-1-AucklandRegion_updated_28-7-20.csv'
data3 = pd.read_csv(url)

In [105]:
data3 = data3.iloc[:, [0, 1, 4] + list(range(61, 78)) + [80] + list(range(91, 94)) + list(range(346, 353))]

In [106]:
headers = data3.iloc[0]
data3 = data3[1:]
data3.columns = headers
data3.head()

Unnamed: 0,Area_Code,Area_Description,2018 Census,0-4 years,5-9 years,10-14 years,15-19 years,20-24 years,25-29 years,30-34 years,35-39 years,40-44 years,45-49 years,50-54 years,55-59 years,60-64 years,65-69 years,70-74 years,75-79 years,80-84 years,Median age(17),Under 15 years,15-29 years,30-64 years,Less than one year,1 year,2 years,3 years,4 years,5-9 years.1,10-19 years
1,110200,Okahukura Peninsula,1491,99,96,111,84,75,72,78,93,87,99,105,162,105,84,51,48,21,41.7,306,234,732,6,3,3,9,3,33,60
2,110300,Inlet Kaipara Harbour South,0,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C
3,110400,Cape Rodney,3525,216,234,237,168,153,168,177,201,204,240,264,312,291,258,198,123,51,45,687,489,1683,27,21,21,18,21,66,138
4,110500,Wellsford,1929,168,150,129,138,105,138,150,123,105,93,114,108,84,99,87,57,42,34.4,447,384,777,12,18,6,18,6,78,78
5,110600,Oceanic Auckland Region East,9,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,62,0,0,6,C,C,C,C,C,C,C



**Census usually resident population count**: 
  The total number of people who live in an area most of the time.

**Age in five year groups**: 
 Grouping people by age, like 0-5 years, 6-10 years, and so on.

**Age in broad groups**: 
 Grouping people by age in larger groups, like children, adults, and seniors.

**Years at usual residence**: 
 How many years a person has lived in their current home.

updated_28-7-20

In [107]:
url = 'https://raw.githubusercontent.com/robertoaltran/Population/main/2018-SA1-dataset-individual-part-2-AucklandRegion.csv'
data4 = pd.read_csv(url)

In [108]:
data4 = data4.iloc[:, [0, 1] + list(range(174, 180)) + list(range(215, 226)) + list(range(264, 270))+ [274]]

In [109]:
headers = data4.iloc[0]
data4 = data4[1:]
data4.columns = headers

data4.head()

Unnamed: 0,Area_Code,Area_Description,No children,One child,Two children,Three children,Four children,Five children,No qualification,Level 1 certificate,Level 2 certificate,Level 3 certificate,Level 4 certificate,Level 5 diploma,Level 6 diploma,Bachelor degree and Level 7 qualification,Post-graduate and honours degrees,Masters degree,Doctorate degree,"$5,000 or less","$5,001 – $10,000","$10,001 – $20,000","$20,001 – $30,000","$30,001 – $50,000","$50,001 – $70,000",Median personal income($)
1,110200,Okahukura Peninsula,108,93,141,90,48,21,225,198,156,135,120,54,45,93,30,15,3,132,57,198,168,288,177,32600
2,110300,Inlet Kaipara Harbour South,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C
3,110400,Cape Rodney,315,156,378,246,120,33,495,357,321,249,315,129,150,333,102,81,39,312,123,504,408,642,393,32100
4,110500,Wellsford,150,87,159,144,75,33,381,219,186,141,132,57,48,90,30,15,0,192,72,321,270,303,195,25400
5,110600,Oceanic Auckland Region East,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,12600


### Feature Relevance for House Rental Price Model:

When predicting house rental prices in New Zealand, it's essential to determine the relevance of potential features. Below is a brief overview:


**Number of Children Born**: Can influence demand for property type and size.

**Education Metrics**: Locations near educational institutions or with a high student population might have specific rental demands.

**Income Metrics**: Areas with higher incomes might exhibit higher rental prices. The source of income can offer additional insights.

**Travel Means to Education**: Indicates transport infrastructure and proximity to educational establishments.

In [110]:
url = 'https://raw.githubusercontent.com/robertoaltran/Population/main/2018-SA1-dataset-individual-part-3a-AucklandRegion.csv'
data5 = pd.read_csv(url)

In [111]:
data5 = data5.iloc[:, [0, 1] + list(range(16, 21)) + list(range(66, 73)) + list(range(156, 172))]

In [112]:
headers = data5.iloc[0]
data5 = data5[1:]
data5.columns = headers
data5.head()

Unnamed: 0,Area_Code,Area_Description,Employed Full time,Employed Part time,Unemployed,Not in the Labour Force,Total stated,Managers,Professionals,Technicians and Trades Workers,Community and Personal Service Workers,Clerical and Administrative Workers,Sales Workers,Machinery Operators and Drivers,Manufacturing,Electricity Gas Water and Waste Services,Construction,Wholesale Trade,Retail Trade,Accommodation and Food Services,Transport Postal and Warehousing,Information Media and Telecommunications,Financial and Insurance Services,Rental Hiring and Real Estate Services,Professional Scientific and Technical Services,Administrative and Support Services,Public Administration and Safety,Education and Training,Health Care and Social Assistance,Arts and Recreation Services
1,110200,Okahukura Peninsula,639,198,39,306,1182,204,93,117,75,66,66,81,72,3,114,42,57,39,42,6,12,15,42,39,24,48,54,9
2,110300,Inlet Kaipara Harbour South,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C
3,110400,Cape Rodney,1329,558,48,903,2838,507,309,246,147,186,147,93,165,6,264,90,126,111,60,27,21,51,156,72,24,141,135,57
4,110500,Wellsford,681,213,66,525,1485,156,96,144,78,87,117,81,90,3,153,45,114,78,54,3,15,6,63,33,15,54,54,12
5,110600,Oceanic Auckland Region East,C,C,C,6,9,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C,C



**Work and Labour Force Status**:
- This shows how many people in an area have jobs. Places with more people working might have higher rent prices because more people need homes.

**Status in Employment**:
- This tells us what kind of jobs people have, like full-time or part-time. Areas with many full-time workers might have higher rents because they have a steady income.

**Occupation by Residence & Workplace Address**:
- This information reveals the types of jobs people do and where they work. Areas with lots of high-paying jobs, like doctors or lawyers, might have higher rents. Also, if people live close to their work, they might be willing to pay more for rent.

**Industry by Residence & Workplace Address**:
- This tells us about the businesses and industries where people work. For instance, areas close to big business hubs might have higher rents. On the other hand, places with seasonal jobs might have different rent patterns.

In [113]:
url = 'https://raw.githubusercontent.com/robertoaltran/Population/main/2018-SA1-dataset-individual-part-3b-AucklandRegion_updated_16-7-20.csv'
data6 = pd.read_csv(url, header = 1)

In [114]:
data6 = data6.iloc[:, [0, 1, 32 , 38, 41, 44, 50, 53, 56]]

In [115]:
data6.drop(0, inplace=True)
data6.reset_index(drop=True, inplace=True)
data6.rename(columns={'Unnamed: 0': 'Area_Code', 'Unnamed: 1': 'Area_Description'}, inplace=True)

In [116]:
data6.head()

Unnamed: 0,Area_Code,Area_Description,Work at home,"Drive a private car, truck or van","Drive a company car, truck or van","Passenger in a car, truck, van or company bus",Train,Bicycle,Walk or jog
0,110200,Okahukura Peninsula,168,315,78,12,0,0,18
1,110300,Inlet Kaipara Harbour South,C,C,C,C,C,C,C
2,110400,Cape Rodney,375,564,129,39,3,6,33
3,110500,Wellsford,57,315,108,39,0,3,81
4,110600,Oceanic Auckland Region East,C,C,C,C,C,C,C


**Main Means of Travel to Work by Residence & Workplace Address**:
- This shows how people get to their jobs, like by car, bus, or walking. Areas with good public transport might have higher rents because it's easier for people to get around. On the other hand, places where most people drive might need homes with parking spaces.

In [117]:
url = 'https://raw.githubusercontent.com/robertoaltran/Population/main/ANZSOC_Full%20Data_data-auckland.csv'  
data8 = pd.read_csv(url)
data8.head()

Unnamed: 0,ANZSOC Division,Year Month,Victimisations,Area Unit,Number of Records,Territorial Authority
0,"Abduction, Harassment and Other Related Offenc...",Dec-22,1,Mangere South.,1,Auckland.
1,"Abduction, Harassment and Other Related Offenc...",Oct-22,1,Royal Oak.,1,Auckland.
2,"Abduction, Harassment and Other Related Offenc...",Dec-22,1,Mascot.,1,Auckland.
3,"Abduction, Harassment and Other Related Offenc...",Dec-22,1,Balmoral.,1,Auckland.
4,"Abduction, Harassment and Other Related Offenc...",Oct-22,1,Drury.,1,Auckland.


In [158]:
data8['Area Unit'] = data8['Area Unit'].str.replace(r'\.$', '', regex=True)

## Auckland - Crime Categories and Incidents:

**Acts Intended to Cause Injury**
This category involves deliberate acts where the intent was to inflict harm or injury to another individual.

**Sexual Assault and Related Offences**
These are offenses that involve any unwanted sexual activity or behavior without the explicit consent of the victim.

**Abduction, Harassment and Other Related Offences Against Personal Liberty**
This category encompasses crimes where a person's freedom or liberty is violated, including kidnapping and harassment.

**Robbery, Extortion and Related Offences**
Such crimes involve forcibly taking something of value from another person, often coupled with threats or violence.

**Unlawful Entry With Intent/Burglary, Break and Enter**
These are offenses where individuals unlawfully enter property, usually with the intent to commit theft or another crime.

**Theft and Related Offences**
This category covers various forms of theft, from petty thefts to more significant robberies, excluding break-ins.


### Source of Data:
The data is derived from the "Victimisation Time and Place" dataset, which offers detailed insights into when and where crimes occurred across Auckland.

In [159]:
data_crimes_2023 = data8[data8['Year Month'].str.contains('23')]
grouped_crimes = data_crimes_2023.groupby(['Area Unit', 'ANZSOC Division'])['Victimisations'].sum().reset_index()
pivot_crimes = grouped_crimes.pivot_table(index='Area Unit', columns='ANZSOC Division', values='Victimisations', fill_value=0).reset_index()

In [160]:
pivot_crimes.head(10)

ANZSOC Division,Area Unit,"Abduction, Harassment and Other Related Offences Against a Person",Acts Intended to Cause Injury,"Robbery, Extortion and Related Offences",Sexual Assault and Related Offences,Theft and Related Offences,"Unlawful Entry With Intent/Burglary, Break and Enter"
0,-29,0,2,0,0,7,1
1,999999,1,66,16,28,204,28
2,Abbotts Park,0,8,4,1,197,50
3,Akarana,0,20,2,1,96,73
4,Albany,1,80,15,4,734,63
5,Algies Bay,0,2,0,0,2,2
6,Ambury,0,5,0,0,34,17
7,Aorere,2,8,1,1,68,31
8,Arahanga,0,12,0,0,32,20
9,Arch Hill,0,6,3,3,92,40


In [161]:
unique_value = pivot_crimes['Area Unit'].unique()
string_values = [value for value in unique_value if isinstance(value, str)]
sorted_unique_value = sorted(string_values)


In [162]:
pivot_crimes['Area Unit'] = pivot_crimes['Area Unit'].str.replace(r' (North|South|East|West|Central)\b', '', regex=True)
pivot_crimes['Area Unit'] = pivot_crimes['Area Unit'].str.replace(r' \(Auckland\)', '', regex=True)

In [163]:
result['Area Unit'] = result['Area Unit'].replace(['Auckland City-Marinas', 'Auckland Harbourside'], 'Auckland')

In [164]:
result = pivot_crimes.groupby('Area Unit').sum().reset_index()

In [124]:
dataframes = [data2, data3, data4, data5, data6]
names = ['data2', 'data3', 'data4', 'data5', 'data6']

for df, name in zip(dataframes, names):
    if 'Area_Description' not in df.columns:
        print(f"{name} doesn't have 'Area_Description' column.")

In [148]:
merged_df = data2.merge(data3, on=['Area_Code', 'Area_Description'], how='outer')
merged_df = merged_df.merge(data4, on=['Area_Code', 'Area_Description'], how='outer')
merged_df = merged_df.merge(data5, on=['Area_Code', 'Area_Description'], how='outer')
merged_df = merged_df.merge(data6, on=['Area_Code', 'Area_Description'], how='outer')

In [149]:
merged_final = merged_df

In [150]:
unique_values = merged_df['Area_Description'].unique()
string_values = [value for value in unique_values if isinstance(value, str)]
sorted_unique_values = sorted(string_values)


In [151]:
merged_final['Area_Description'] = merged_final['Area_Description'].str.replace(r' (North|South|East|West|Central)\b', '', regex=True)
merged_final['Area_Description'] = merged_final['Area_Description'].str.replace(r' \(Auckland\)', '', regex=True)

In [152]:
object_cols = merged_final.select_dtypes(include=['object']).columns
for col in object_cols:
    try:
        _ = pd.to_numeric(merged_final[col], errors='coerce')
    except Exception as e:
        print(f"Error in column {col}: {e}")


Error in column 5-9 years: arg must be a list, tuple, 1-d array, or Series
Error in column 5-9 years: arg must be a list, tuple, 1-d array, or Series


In [153]:
cols = merged_final.columns.tolist()
index_of_duplicate = cols.index("5-9 years")
cols[index_of_duplicate] = "5-9 year"
merged_final.columns = cols

In [154]:
import numpy as np
object_cols_list = object_cols.tolist()

if 'Area_Description' in object_cols_list:
    object_cols_list.remove('Area_Description')

for col in object_cols_list:
    merged_final[col] = merged_final[col].replace('C', np.nan)
    merged_final[col] = pd.to_numeric(merged_final[col], errors='coerce')

In [156]:
merged_final = merged_final.sort_values(by='Area_Description')

merged_final.head(20)

Unnamed: 0,Area_Code,Area_Description,2018.0,2018 Census,0-4 years,5-9 year,10-14 years,15-19 years,20-24 years,25-29 years,30-34 years,35-39 years,40-44 years,45-49 years,50-54 years,55-59 years,60-64 years,65-69 years,70-74 years,75-79 years,80-84 years,Median age(17),Under 15 years,15-29 years,30-64 years,Less than one year,1 year,2 years,3 years,4 years,5-9 years,10-19 years,No children,One child,Two children,Three children,Four children,Five children,No qualification,Level 1 certificate,Level 2 certificate,Level 3 certificate,Level 4 certificate,Level 5 diploma,Level 6 diploma,Bachelor degree and Level 7 qualification,Post-graduate and honours degrees,Masters degree,Doctorate degree,"$5,000 or less","$5,001 – $10,000","$10,001 – $20,000","$20,001 – $30,000","$30,001 – $50,000","$50,001 – $70,000",Median personal income($),Employed Full time,Employed Part time,Unemployed,Not in the Labour Force,Total stated,Managers,Professionals,Technicians and Trades Workers,Community and Personal Service Workers,Clerical and Administrative Workers,Sales Workers,Machinery Operators and Drivers,Manufacturing,Electricity Gas Water and Waste Services,Construction,Wholesale Trade,Retail Trade,Accommodation and Food Services,Transport Postal and Warehousing,Information Media and Telecommunications,Financial and Insurance Services,Rental Hiring and Real Estate Services,Professional Scientific and Technical Services,Administrative and Support Services,Public Administration and Safety,Education and Training,Health Care and Social Assistance,Arts and Recreation Services,Work at home,"Drive a private car, truck or van","Drive a company car, truck or van","Passenger in a car, truck, van or company bus",Train,Bicycle,Walk or jog
170,127200.0,Akoranga,405.0,1167.0,42.0,39,27.0,153.0,105.0,84.0,57.0,45.0,33.0,48.0,39.0,36.0,39.0,33.0,48.0,69.0,87.0,43.9,108.0,342.0,291.0,21.0,18.0,18.0,18.0,12.0,48.0,126.0,237.0,66.0,111.0,87.0,36.0,24.0,147.0,87.0,78.0,195.0,51.0,36.0,75.0,132.0,45.0,36.0,6.0,168.0,78.0,201.0,267.0,192.0,81.0,22100.0,285.0,129.0,51.0,591.0,1056.0,48.0,96.0,45.0,72.0,42.0,60.0,9.0,21.0,3.0,33.0,18.0,45.0,51.0,15.0,9.0,12.0,3.0,51.0,24.0,15.0,27.0,48.0,18.0,12.0,135.0,21.0,9.0,0.0,3.0,36.0
71,117300.0,Albany,162.0,525.0,18.0,12,6.0,93.0,96.0,54.0,57.0,30.0,36.0,30.0,18.0,18.0,21.0,15.0,6.0,3.0,3.0,28.4,39.0,246.0,213.0,45.0,24.0,30.0,18.0,9.0,51.0,72.0,156.0,24.0,30.0,9.0,3.0,3.0,24.0,21.0,27.0,126.0,27.0,9.0,12.0,90.0,21.0,27.0,3.0,144.0,33.0,90.0,39.0,78.0,54.0,17400.0,201.0,90.0,30.0,171.0,489.0,45.0,84.0,21.0,36.0,30.0,42.0,12.0,21.0,0.0,21.0,18.0,48.0,15.0,9.0,9.0,9.0,0.0,36.0,12.0,9.0,36.0,18.0,3.0,6.0,42.0,3.0,6.0,0.0,0.0,3.0
72,117400.0,Albany,882.0,2811.0,126.0,189,171.0,183.0,234.0,156.0,162.0,204.0,171.0,180.0,159.0,144.0,93.0,90.0,138.0,144.0,117.0,39.6,483.0,570.0,1104.0,141.0,159.0,108.0,72.0,60.0,204.0,447.0,318.0,165.0,384.0,177.0,54.0,30.0,306.0,204.0,159.0,216.0,123.0,93.0,117.0,387.0,126.0,135.0,21.0,465.0,117.0,345.0,333.0,420.0,249.0,26100.0,852.0,324.0,72.0,1080.0,2325.0,240.0,300.0,123.0,87.0,141.0,168.0,51.0,93.0,6.0,105.0,102.0,153.0,84.0,30.0,36.0,42.0,39.0,159.0,51.0,36.0,90.0,87.0,15.0,78.0,534.0,138.0,36.0,3.0,6.0,33.0
79,118100.0,Albany,1143.0,3405.0,171.0,186,198.0,222.0,306.0,231.0,243.0,237.0,219.0,264.0,234.0,189.0,195.0,168.0,114.0,81.0,51.0,38.0,555.0,756.0,1578.0,132.0,150.0,117.0,66.0,69.0,312.0,645.0,465.0,234.0,438.0,174.0,42.0,15.0,258.0,201.0,195.0,324.0,171.0,129.0,123.0,537.0,180.0,192.0,30.0,534.0,153.0,381.0,354.0,531.0,375.0,30300.0,1317.0,399.0,99.0,1035.0,2853.0,354.0,459.0,165.0,132.0,231.0,213.0,63.0,120.0,12.0,129.0,153.0,183.0,126.0,63.0,36.0,93.0,39.0,249.0,75.0,66.0,150.0,135.0,36.0,84.0,945.0,240.0,63.0,0.0,9.0,57.0
65,116700.0,Albany Heights,1053.0,3153.0,210.0,192,165.0,189.0,375.0,300.0,312.0,288.0,195.0,186.0,201.0,186.0,153.0,93.0,45.0,33.0,18.0,32.4,567.0,864.0,1521.0,132.0,177.0,114.0,120.0,102.0,393.0,576.0,486.0,234.0,312.0,117.0,30.0,6.0,150.0,147.0,150.0,303.0,120.0,114.0,123.0,585.0,183.0,174.0,15.0,507.0,132.0,291.0,225.0,480.0,363.0,36700.0,1371.0,390.0,111.0,714.0,2586.0,402.0,495.0,162.0,141.0,213.0,210.0,54.0,111.0,9.0,144.0,144.0,231.0,111.0,48.0,48.0,99.0,51.0,285.0,87.0,57.0,138.0,108.0,33.0,72.0,507.0,111.0,33.0,0.0,3.0,21.0
502,160400.0,Alfriston,1059.0,3807.0,243.0,249,237.0,225.0,285.0,309.0,303.0,249.0,198.0,252.0,228.0,216.0,204.0,129.0,129.0,102.0,111.0,36.0,726.0,816.0,1656.0,45.0,54.0,60.0,51.0,48.0,294.0,699.0,408.0,219.0,384.0,240.0,105.0,54.0,561.0,261.0,207.0,297.0,204.0,144.0,138.0,492.0,147.0,99.0,15.0,432.0,144.0,522.0,381.0,582.0,495.0,32300.0,1572.0,336.0,93.0,1077.0,3078.0,363.0,435.0,201.0,138.0,252.0,234.0,147.0,258.0,12.0,171.0,147.0,204.0,105.0,159.0,21.0,54.0,42.0,150.0,87.0,69.0,129.0,162.0,18.0,63.0,693.0,234.0,39.0,21.0,0.0,9.0
17,111900.0,Algies Bay-Scotts Landing,498.0,1185.0,45.0,48,42.0,45.0,27.0,27.0,48.0,33.0,48.0,72.0,60.0,90.0,117.0,135.0,126.0,78.0,51.0,60.2,135.0,99.0,471.0,6.0,12.0,6.0,9.0,9.0,33.0,48.0,111.0,45.0,171.0,111.0,39.0,18.0,189.0,150.0,102.0,60.0,99.0,42.0,72.0,138.0,51.0,39.0,12.0,93.0,42.0,222.0,174.0,204.0,156.0,29500.0,327.0,168.0,24.0,528.0,1050.0,108.0,126.0,57.0,54.0,60.0,39.0,18.0,51.0,3.0,60.0,21.0,48.0,27.0,12.0,12.0,9.0,15.0,60.0,18.0,6.0,42.0,63.0,6.0,72.0,195.0,45.0,12.0,0.0,3.0,6.0
552,165400.0,Anselmi Ridge,855.0,2448.0,138.0,180,159.0,120.0,93.0,87.0,108.0,150.0,171.0,168.0,129.0,123.0,87.0,93.0,144.0,174.0,150.0,45.7,474.0,300.0,939.0,21.0,12.0,15.0,24.0,15.0,96.0,171.0,198.0,93.0,351.0,216.0,102.0,24.0,393.0,228.0,201.0,165.0,171.0,99.0,138.0,228.0,105.0,48.0,0.0,177.0,51.0,303.0,348.0,321.0,264.0,35900.0,873.0,228.0,27.0,840.0,1968.0,276.0,279.0,123.0,99.0,132.0,75.0,57.0,120.0,6.0,123.0,90.0,87.0,33.0,72.0,12.0,45.0,18.0,117.0,39.0,69.0,84.0,84.0,15.0,33.0,105.0,42.0,9.0,0.0,0.0,6.0
243,134500.0,Anzac Avenue,1458.0,2748.0,60.0,15,15.0,96.0,507.0,615.0,453.0,261.0,126.0,138.0,90.0,111.0,93.0,81.0,45.0,24.0,12.0,30.6,90.0,1215.0,1269.0,309.0,270.0,177.0,141.0,126.0,294.0,285.0,858.0,111.0,81.0,33.0,9.0,6.0,90.0,75.0,93.0,264.0,93.0,99.0,99.0,684.0,276.0,288.0,72.0,387.0,177.0,369.0,228.0,504.0,384.0,37200.0,1527.0,363.0,135.0,630.0,2655.0,303.0,711.0,153.0,243.0,213.0,171.0,24.0,63.0,6.0,51.0,78.0,123.0,303.0,48.0,60.0,117.0,51.0,399.0,120.0,75.0,189.0,105.0,60.0,51.0,201.0,48.0,24.0,12.0,9.0,453.0
434,153600.0,Aorere,453.0,2061.0,129.0,201,165.0,183.0,171.0,168.0,144.0,141.0,117.0,138.0,132.0,108.0,75.0,54.0,54.0,51.0,12.0,30.3,495.0,528.0,858.0,9.0,36.0,24.0,18.0,9.0,102.0,327.0,210.0,87.0,147.0,90.0,69.0,30.0,375.0,138.0,138.0,225.0,111.0,78.0,36.0,117.0,21.0,18.0,0.0,288.0,69.0,270.0,201.0,405.0,234.0,27100.0,804.0,171.0,111.0,480.0,1566.0,87.0,123.0,111.0,102.0,129.0,87.0,174.0,168.0,6.0,69.0,75.0,51.0,54.0,120.0,15.0,18.0,15.0,48.0,69.0,60.0,72.0,78.0,9.0,15.0,432.0,57.0,69.0,9.0,3.0,12.0


In [166]:
del merged_final['Area_Code']
merged_final['Area_Description'] = merged_final['Area_Description'].replace(['Auckland Airport', 'Auckland-University'], 'Auckland')

In [135]:
numerical_columns = merged_final.select_dtypes(include=[np.number]).columns
merged_final[numerical_columns] = merged_final[numerical_columns].fillna(0)

In [167]:
merged_final = merged_final.groupby('Area_Description').sum().reset_index()

In [168]:
merged_final = merged_final.merge(pivot_crimes, left_on='Area_Description', right_on='Area Unit', how='outer')


In [171]:
merged_final = merged_final.drop_duplicates(subset='Area_Description', keep='first')

In [172]:
merged_final.head(30)

Unnamed: 0,Area_Description,2018.0,2018 Census,0-4 years,10-14 years,15-19 years,20-24 years,25-29 years,30-34 years,35-39 years,40-44 years,45-49 years,50-54 years,55-59 years,60-64 years,65-69 years,70-74 years,75-79 years,80-84 years,Median age(17),Under 15 years,15-29 years,30-64 years,Less than one year,1 year,2 years,3 years,4 years,5-9 years,10-19 years,No children,One child,Two children,Three children,Four children,Five children,No qualification,Level 1 certificate,Level 2 certificate,Level 3 certificate,Level 4 certificate,Level 5 diploma,Level 6 diploma,Bachelor degree and Level 7 qualification,Post-graduate and honours degrees,Masters degree,Doctorate degree,"$5,000 or less","$5,001 – $10,000","$10,001 – $20,000","$20,001 – $30,000","$30,001 – $50,000","$50,001 – $70,000",Median personal income($),Employed Full time,Employed Part time,Unemployed,Not in the Labour Force,Total stated,Managers,Professionals,Technicians and Trades Workers,Community and Personal Service Workers,Clerical and Administrative Workers,Sales Workers,Machinery Operators and Drivers,Manufacturing,Electricity Gas Water and Waste Services,Construction,Wholesale Trade,Retail Trade,Accommodation and Food Services,Transport Postal and Warehousing,Information Media and Telecommunications,Financial and Insurance Services,Rental Hiring and Real Estate Services,Professional Scientific and Technical Services,Administrative and Support Services,Public Administration and Safety,Education and Training,Health Care and Social Assistance,Arts and Recreation Services,Work at home,"Drive a private car, truck or van","Drive a company car, truck or van","Passenger in a car, truck, van or company bus",Train,Bicycle,Walk or jog,Area Unit,"Abduction, Harassment and Other Related Offences Against a Person",Acts Intended to Cause Injury,"Robbery, Extortion and Related Offences",Sexual Assault and Related Offences,Theft and Related Offences,"Unlawful Entry With Intent/Burglary, Break and Enter"
0,Akoranga,405.0,1167.0,42.0,27.0,153.0,105.0,84.0,57.0,45.0,33.0,48.0,39.0,36.0,39.0,33.0,48.0,69.0,87.0,43.9,108.0,342.0,291.0,21.0,18.0,18.0,18.0,12.0,48.0,126.0,237.0,66.0,111.0,87.0,36.0,24.0,147.0,87.0,78.0,195.0,51.0,36.0,75.0,132.0,45.0,36.0,6.0,168.0,78.0,201.0,267.0,192.0,81.0,22100.0,285.0,129.0,51.0,591.0,1056.0,48.0,96.0,45.0,72.0,42.0,60.0,9.0,21.0,3.0,33.0,18.0,45.0,51.0,15.0,9.0,12.0,3.0,51.0,24.0,15.0,27.0,48.0,18.0,12.0,135.0,21.0,9.0,0.0,3.0,36.0,,,,,,,
1,Albany,2187.0,6741.0,315.0,375.0,498.0,636.0,441.0,462.0,471.0,426.0,474.0,411.0,351.0,309.0,273.0,258.0,228.0,171.0,106.0,1077.0,1572.0,2895.0,318.0,333.0,255.0,156.0,138.0,567.0,1164.0,939.0,423.0,852.0,360.0,99.0,48.0,588.0,426.0,381.0,666.0,321.0,231.0,252.0,1014.0,327.0,354.0,54.0,1143.0,303.0,816.0,726.0,1029.0,678.0,73800.0,2370.0,813.0,201.0,2286.0,5667.0,639.0,843.0,309.0,255.0,402.0,423.0,126.0,234.0,18.0,255.0,273.0,384.0,225.0,102.0,81.0,144.0,78.0,444.0,138.0,111.0,276.0,240.0,54.0,168.0,1521.0,381.0,105.0,3.0,15.0,93.0,Albany,1.0,80.0,15.0,4.0,734.0,63.0
2,Albany Heights,1053.0,3153.0,210.0,165.0,189.0,375.0,300.0,312.0,288.0,195.0,186.0,201.0,186.0,153.0,93.0,45.0,33.0,18.0,32.4,567.0,864.0,1521.0,132.0,177.0,114.0,120.0,102.0,393.0,576.0,486.0,234.0,312.0,117.0,30.0,6.0,150.0,147.0,150.0,303.0,120.0,114.0,123.0,585.0,183.0,174.0,15.0,507.0,132.0,291.0,225.0,480.0,363.0,36700.0,1371.0,390.0,111.0,714.0,2586.0,402.0,495.0,162.0,141.0,213.0,210.0,54.0,111.0,9.0,144.0,144.0,231.0,111.0,48.0,48.0,99.0,51.0,285.0,87.0,57.0,138.0,108.0,33.0,72.0,507.0,111.0,33.0,0.0,3.0,21.0,,,,,,,
3,Alfriston,1059.0,3807.0,243.0,237.0,225.0,285.0,309.0,303.0,249.0,198.0,252.0,228.0,216.0,204.0,129.0,129.0,102.0,111.0,36.0,726.0,816.0,1656.0,45.0,54.0,60.0,51.0,48.0,294.0,699.0,408.0,219.0,384.0,240.0,105.0,54.0,561.0,261.0,207.0,297.0,204.0,144.0,138.0,492.0,147.0,99.0,15.0,432.0,144.0,522.0,381.0,582.0,495.0,32300.0,1572.0,336.0,93.0,1077.0,3078.0,363.0,435.0,201.0,138.0,252.0,234.0,147.0,258.0,12.0,171.0,147.0,204.0,105.0,159.0,21.0,54.0,42.0,150.0,87.0,69.0,129.0,162.0,18.0,63.0,693.0,234.0,39.0,21.0,0.0,9.0,,,,,,,
4,Algies Bay-Scotts Landing,498.0,1185.0,45.0,42.0,45.0,27.0,27.0,48.0,33.0,48.0,72.0,60.0,90.0,117.0,135.0,126.0,78.0,51.0,60.2,135.0,99.0,471.0,6.0,12.0,6.0,9.0,9.0,33.0,48.0,111.0,45.0,171.0,111.0,39.0,18.0,189.0,150.0,102.0,60.0,99.0,42.0,72.0,138.0,51.0,39.0,12.0,93.0,42.0,222.0,174.0,204.0,156.0,29500.0,327.0,168.0,24.0,528.0,1050.0,108.0,126.0,57.0,54.0,60.0,39.0,18.0,51.0,3.0,60.0,21.0,48.0,27.0,12.0,12.0,9.0,15.0,60.0,18.0,6.0,42.0,63.0,6.0,72.0,195.0,45.0,12.0,0.0,3.0,6.0,,,,,,,
5,Anselmi Ridge,855.0,2448.0,138.0,159.0,120.0,93.0,87.0,108.0,150.0,171.0,168.0,129.0,123.0,87.0,93.0,144.0,174.0,150.0,45.7,474.0,300.0,939.0,21.0,12.0,15.0,24.0,15.0,96.0,171.0,198.0,93.0,351.0,216.0,102.0,24.0,393.0,228.0,201.0,165.0,171.0,99.0,138.0,228.0,105.0,48.0,0.0,177.0,51.0,303.0,348.0,321.0,264.0,35900.0,873.0,228.0,27.0,840.0,1968.0,276.0,279.0,123.0,99.0,132.0,75.0,57.0,120.0,6.0,123.0,90.0,87.0,33.0,72.0,12.0,45.0,18.0,117.0,39.0,69.0,84.0,84.0,15.0,33.0,105.0,42.0,9.0,0.0,0.0,6.0,,,,,,,
6,Anzac Avenue,1458.0,2748.0,60.0,15.0,96.0,507.0,615.0,453.0,261.0,126.0,138.0,90.0,111.0,93.0,81.0,45.0,24.0,12.0,30.6,90.0,1215.0,1269.0,309.0,270.0,177.0,141.0,126.0,294.0,285.0,858.0,111.0,81.0,33.0,9.0,6.0,90.0,75.0,93.0,264.0,93.0,99.0,99.0,684.0,276.0,288.0,72.0,387.0,177.0,369.0,228.0,504.0,384.0,37200.0,1527.0,363.0,135.0,630.0,2655.0,303.0,711.0,153.0,243.0,213.0,171.0,24.0,63.0,6.0,51.0,78.0,123.0,303.0,48.0,60.0,117.0,51.0,399.0,120.0,75.0,189.0,105.0,60.0,51.0,201.0,48.0,24.0,12.0,9.0,453.0,,,,,,,
7,Aorere,1962.0,8718.0,648.0,723.0,759.0,780.0,741.0,702.0,630.0,507.0,516.0,480.0,408.0,339.0,243.0,174.0,138.0,66.0,86.7,2199.0,2286.0,3588.0,114.0,141.0,147.0,117.0,93.0,606.0,1236.0,888.0,360.0,540.0,378.0,246.0,123.0,1455.0,570.0,573.0,840.0,474.0,321.0,189.0,540.0,108.0,72.0,6.0,1326.0,345.0,1068.0,750.0,1653.0,954.0,77400.0,3312.0,750.0,366.0,2091.0,6519.0,369.0,516.0,525.0,438.0,498.0,417.0,675.0,633.0,21.0,330.0,312.0,315.0,237.0,456.0,60.0,72.0,57.0,216.0,303.0,207.0,255.0,366.0,39.0,69.0,1653.0,201.0,243.0,45.0,12.0,45.0,Aorere,2.0,8.0,1.0,1.0,68.0,31.0
8,Ararimu,675.0,2124.0,135.0,180.0,132.0,90.0,54.0,105.0,129.0,180.0,198.0,201.0,198.0,126.0,75.0,66.0,48.0,15.0,42.1,486.0,279.0,1137.0,6.0,9.0,6.0,12.0,9.0,39.0,138.0,201.0,99.0,252.0,141.0,42.0,9.0,219.0,192.0,168.0,159.0,201.0,102.0,84.0,207.0,96.0,54.0,9.0,198.0,66.0,174.0,153.0,243.0,282.0,48600.0,1020.0,237.0,33.0,345.0,1638.0,351.0,267.0,174.0,93.0,153.0,69.0,54.0,147.0,9.0,195.0,111.0,72.0,27.0,57.0,9.0,24.0,24.0,114.0,63.0,51.0,105.0,72.0,15.0,147.0,429.0,168.0,18.0,12.0,0.0,12.0,,,,,,,
9,Ardmore,426.0,1386.0,78.0,87.0,75.0,84.0,87.0,87.0,66.0,93.0,114.0,117.0,108.0,90.0,84.0,57.0,27.0,15.0,41.1,273.0,249.0,669.0,12.0,9.0,6.0,3.0,3.0,27.0,66.0,144.0,69.0,129.0,81.0,36.0,12.0,162.0,138.0,114.0,126.0,120.0,57.0,51.0,129.0,39.0,33.0,9.0,138.0,42.0,165.0,114.0,186.0,174.0,40700.0,633.0,150.0,45.0,282.0,1113.0,210.0,150.0,108.0,57.0,96.0,48.0,42.0,72.0,9.0,111.0,66.0,39.0,12.0,45.0,12.0,21.0,24.0,78.0,36.0,30.0,54.0,48.0,27.0,120.0,297.0,99.0,18.0,9.0,0.0,21.0,Ardmore,0.0,8.0,1.0,1.0,58.0,27.0


Unnamed: 0,Address,Price,Info_1,Info_2,Info_3,Info_4,Info_5,Info_6,Info_7,Info_8,Error
0,"20/9A Esplanade Road, Mount Eden, Auckland City",$360 per week,Unit,1.0,1.0,1,,,,,
1,"46 Montrose Street, Point Chevalier, Auckland ...",$650 per week,House,2.0,1.0,2,,,,,
2,"1/351 Mount Eden Road, Mount Eden, Auckland City",$530 per week,House,2.0,1.0,,,,1 Separate toilet,,
3,"64 School Road, Kingsland, Auckland City",$750 per week,House,3.0,1.0,2,,,,,
4,"2E/25 Rutland Street, Auckland Central, Auckla...",$370 per week,Apartment,1.0,1.0,Non furnished,,,,,
...,...,...,...,...,...,...,...,...,...,...,...
291,"16/39 Pitt Steet, Auckland Central, Auckland City",$895 per week,Townhouse,3.0,1.0,2,Non furnished,1 ensuite,,,
292,"203/70 Anzac Avenue, Auckland Central, Aucklan...",$690 per week,Apartment,2.0,1.0,Furnished,,,,,
293,"3/12/91 St Georges Bay Road, Parnell, Auckland...","$1,200 per week",House,3.0,1.0,2,,,,,
294,"3/9 Sentinel Road, Herne Bay, Auckland City",$520 per week,House,1.0,1.0,Furnished,,,,,


In [251]:
df = df_corrected

In [262]:
cols = ['Suburb'] + [col for col in df if col != 'Suburb']
df = df[cols]
df['Suburb'] = df['Suburb'].replace('Auckland Central', 'Auckland')


In [263]:
df.head()

Unnamed: 0,Suburb,Address,Price,Bedroom,Bathroom,Garage,Other Feature 1,Other Feature 2,Furniture,Type_Apartment,Type_Carpark,Type_House,Type_Studio,Type_Townhouse,Type_Unit,Type_Unknown
0,Mount Eden,"20/9A Esplanade Road, Mount Eden, Auckland City",360,1,1,1.0,,,0,0,0,0,0,0,1,0
1,Point Chevalier,"46 Montrose Street, Point Chevalier, Auckland ...",650,2,1,2.0,,,0,0,0,1,0,0,0,0
2,Mount Eden,"1/351 Mount Eden Road, Mount Eden, Auckland City",530,2,1,,,,0,0,0,1,0,0,0,0
3,Kingsland,"64 School Road, Kingsland, Auckland City",750,3,1,2.0,,,0,0,0,1,0,0,0,0
4,Auckland,"2E/25 Rutland Street, Auckland Central, Auckla...",370,1,1,,,,0,1,0,0,0,0,0,0


In [264]:
df = df.merge(merged_final, left_on='Suburb', right_on='Area_Description', how='left')

In [266]:
df.drop('Area_Description', axis=1, inplace=True)
df = df.sort_values(by='Suburb')

In [267]:
df.head()

Unnamed: 0,Suburb,Address,Price,Bedroom,Bathroom,Garage,Other Feature 1,Other Feature 2,Furniture,Type_Apartment,Type_Carpark,Type_House,Type_Studio,Type_Townhouse,Type_Unit,Type_Unknown,2018.0,2018 Census,0-4 years,10-14 years,15-19 years,20-24 years,25-29 years,30-34 years,35-39 years,40-44 years,45-49 years,50-54 years,55-59 years,60-64 years,65-69 years,70-74 years,75-79 years,80-84 years,Median age(17),Under 15 years,15-29 years,30-64 years,Less than one year,1 year,2 years,3 years,4 years,5-9 years,10-19 years,No children,One child,Two children,Three children,Four children,Five children,No qualification,Level 1 certificate,Level 2 certificate,Level 3 certificate,Level 4 certificate,Level 5 diploma,Level 6 diploma,Bachelor degree and Level 7 qualification,Post-graduate and honours degrees,Masters degree,Doctorate degree,"$5,000 or less","$5,001 – $10,000","$10,001 – $20,000","$20,001 – $30,000","$30,001 – $50,000","$50,001 – $70,000",Median personal income($),Employed Full time,Employed Part time,Unemployed,Not in the Labour Force,Total stated,Managers,Professionals,Technicians and Trades Workers,Community and Personal Service Workers,Clerical and Administrative Workers,Sales Workers,Machinery Operators and Drivers,Manufacturing,Electricity Gas Water and Waste Services,Construction,Wholesale Trade,Retail Trade,Accommodation and Food Services,Transport Postal and Warehousing,Information Media and Telecommunications,Financial and Insurance Services,Rental Hiring and Real Estate Services,Professional Scientific and Technical Services,Administrative and Support Services,Public Administration and Safety,Education and Training,Health Care and Social Assistance,Arts and Recreation Services,Work at home,"Drive a private car, truck or van","Drive a company car, truck or van","Passenger in a car, truck, van or company bus",Train,Bicycle,Walk or jog,Area Unit,"Abduction, Harassment and Other Related Offences Against a Person",Acts Intended to Cause Injury,"Robbery, Extortion and Related Offences",Sexual Assault and Related Offences,Theft and Related Offences,"Unlawful Entry With Intent/Burglary, Break and Enter"
86,Auckland,"9/3 Eden Crescent, Auckland Central, Auckland ...",620,2,1,1.0,,,0,1,0,0,0,0,0,0,213.0,735.0,66.0,33.0,39.0,69.0,75.0,69.0,60.0,36.0,45.0,60.0,57.0,39.0,24.0,9.0,12.0,3.0,64.8,141.0,180.0,366.0,21.0,24.0,9.0,3.0,15.0,39.0,33.0,81.0,30.0,36.0,15.0,21.0,3.0,69.0,42.0,45.0,60.0,54.0,18.0,21.0,72.0,27.0,18.0,3.0,78.0,30.0,87.0,69.0,150.0,96.0,66800.0,351.0,69.0,21.0,153.0,594.0,57.0,90.0,45.0,42.0,51.0,33.0,42.0,45.0,3.0,33.0,18.0,30.0,42.0,48.0,9.0,6.0,9.0,36.0,21.0,21.0,33.0,21.0,12.0,30.0,120.0,39.0,9.0,0.0,3.0,36.0,Auckland,1.0,262.0,42.0,11.0,1628.0,219.0
222,Auckland,"2B/2 White Street, Auckland Central, Auckland ...",Auckland Central insights,2,1,,,,1,1,0,0,0,0,0,0,213.0,735.0,66.0,33.0,39.0,69.0,75.0,69.0,60.0,36.0,45.0,60.0,57.0,39.0,24.0,9.0,12.0,3.0,64.8,141.0,180.0,366.0,21.0,24.0,9.0,3.0,15.0,39.0,33.0,81.0,30.0,36.0,15.0,21.0,3.0,69.0,42.0,45.0,60.0,54.0,18.0,21.0,72.0,27.0,18.0,3.0,78.0,30.0,87.0,69.0,150.0,96.0,66800.0,351.0,69.0,21.0,153.0,594.0,57.0,90.0,45.0,42.0,51.0,33.0,42.0,45.0,3.0,33.0,18.0,30.0,42.0,48.0,9.0,6.0,9.0,36.0,21.0,21.0,33.0,21.0,12.0,30.0,120.0,39.0,9.0,0.0,3.0,36.0,Auckland,1.0,262.0,42.0,11.0,1628.0,219.0
109,Auckland,"406/138 Anzac Ave, Auckland Central, Auckland ...",320,1,1,,1 ensuite,,0,1,0,0,0,0,0,0,213.0,735.0,66.0,33.0,39.0,69.0,75.0,69.0,60.0,36.0,45.0,60.0,57.0,39.0,24.0,9.0,12.0,3.0,64.8,141.0,180.0,366.0,21.0,24.0,9.0,3.0,15.0,39.0,33.0,81.0,30.0,36.0,15.0,21.0,3.0,69.0,42.0,45.0,60.0,54.0,18.0,21.0,72.0,27.0,18.0,3.0,78.0,30.0,87.0,69.0,150.0,96.0,66800.0,351.0,69.0,21.0,153.0,594.0,57.0,90.0,45.0,42.0,51.0,33.0,42.0,45.0,3.0,33.0,18.0,30.0,42.0,48.0,9.0,6.0,9.0,36.0,21.0,21.0,33.0,21.0,12.0,30.0,120.0,39.0,9.0,0.0,3.0,36.0,Auckland,1.0,262.0,42.0,11.0,1628.0,219.0
110,Auckland,"121/72 Nelson St , Auckland Central, Auckland ...",470,2,1,,,,0,1,0,0,0,0,0,0,213.0,735.0,66.0,33.0,39.0,69.0,75.0,69.0,60.0,36.0,45.0,60.0,57.0,39.0,24.0,9.0,12.0,3.0,64.8,141.0,180.0,366.0,21.0,24.0,9.0,3.0,15.0,39.0,33.0,81.0,30.0,36.0,15.0,21.0,3.0,69.0,42.0,45.0,60.0,54.0,18.0,21.0,72.0,27.0,18.0,3.0,78.0,30.0,87.0,69.0,150.0,96.0,66800.0,351.0,69.0,21.0,153.0,594.0,57.0,90.0,45.0,42.0,51.0,33.0,42.0,45.0,3.0,33.0,18.0,30.0,42.0,48.0,9.0,6.0,9.0,36.0,21.0,21.0,33.0,21.0,12.0,30.0,120.0,39.0,9.0,0.0,3.0,36.0,Auckland,1.0,262.0,42.0,11.0,1628.0,219.0
221,Auckland,"72 Nelson Street, Auckland Central, Auckland City",375,1,1,,,,0,1,0,0,0,0,0,0,213.0,735.0,66.0,33.0,39.0,69.0,75.0,69.0,60.0,36.0,45.0,60.0,57.0,39.0,24.0,9.0,12.0,3.0,64.8,141.0,180.0,366.0,21.0,24.0,9.0,3.0,15.0,39.0,33.0,81.0,30.0,36.0,15.0,21.0,3.0,69.0,42.0,45.0,60.0,54.0,18.0,21.0,72.0,27.0,18.0,3.0,78.0,30.0,87.0,69.0,150.0,96.0,66800.0,351.0,69.0,21.0,153.0,594.0,57.0,90.0,45.0,42.0,51.0,33.0,42.0,45.0,3.0,33.0,18.0,30.0,42.0,48.0,9.0,6.0,9.0,36.0,21.0,21.0,33.0,21.0,12.0,30.0,120.0,39.0,9.0,0.0,3.0,36.0,Auckland,1.0,262.0,42.0,11.0,1628.0,219.0


In [273]:
df.to_csv('prediction.csv', index=False)