<a href="https://colab.research.google.com/github/tchintchie/whgscraper/blob/master/soup.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [96]:
from requests import get
from requests.exceptions import RequestException
from contextlib import closing
from bs4 import BeautifulSoup as bs
import pandas as pd

In [97]:
def simple_get(url):
    """
    Attempts to get the content at `url` by making an HTTP GET request.
    If the content-type of response is some kind of HTML/XML, return the
    text content, otherwise return None.
    """
    try:
        with closing(get(url, stream=True)) as resp:
            if is_good_response(resp):
                return resp.content
            else:
                return None

    except RequestException as e:
        log_error(f'Error during requests to {url} : {str(e)}')
        return None

In [98]:

def is_good_response(resp):
    """
    Returns True if the response seems to be HTML, False otherwise.
    """
    content_type = resp.headers['Content-Type'].lower()
    return (resp.status_code == 200 
            and content_type is not None 
            and content_type.find('html') > -1)

In [99]:

def log_error(e):
    """
    It is always a good idea to log errors. 
    This function just prints them, but you can
    make it do anything.
    """
    print(e)

In [100]:
url = "https://www.oehboersen.at"
html = simple_get(url)

In [101]:
oeh = bs(html, "html5lib")

In [102]:
title = oeh.title.string

In [103]:
rows = oeh.find_all("div", class_="property-content")
rows[0]

<div class="property-content">
		
				<div class="row visible-xs visible-sm">
			<span class="type">2-Zimmer Wohnung</span> <span>·</span> <span class="status">Miete</span>		</div>
		
		<div class="row headlines">
			<a href="https://www.oehboersen.at/property/2-zimmer-wohnung-ueber-den-daechern-der-stadt/" tabindex="0"><h3 class="title col-sm-8">2.Zimmer.Wohnung.Über den Dächern der Stadt</h3> </a>
			<div class="price-tag col-sm-4"><h4 class="price text-primary">€ 597,- <span class="BKInfo">(inkl. BK)</span></h4>
								<h4 class="flaeche">63 m<sup>2</sup></h4>
							</div>
		</div>
		
				
		<div class="row">
			<a href="https://www.oehboersen.at/property/2-zimmer-wohnung-ueber-den-daechern-der-stadt/"><h4 class="address col-sm-8"><strong>Altstadt</strong><br/><br/></h4></a>
			
		</div>
		

			
				<div class="property-meta clearfix">
							<div>
					<div class="meta-title"><i class="fa fa-expand"></i></div>
					<div class="meta-data" data-toggle="tooltip" title="Fläche des Mi

In [129]:
def scrape():

  titles = []
  types = []
  prices = []
  bks = []
  areas = []
  for div in oeh.find_all("div",class_ = "property-content"):
    title = div.find("h3",class_="title col-sm-8").text
    titles.append(title)
    type_ = div.find("span", class_="type").text
    types.append(type_)
    price=div.find("h4",class_= "price text-primary").next
    prices.append(price)
    bk= div.find("span", class_="BKInfo").text
    bks.append(bk)
    if div.find("div", attrs={"title":"Fläche des Mietgegenstandes"}) != None:
      area = div.find("div", attrs={"title":"Fläche des Mietgegenstandes"}).text
    else:
      area = "NA"
    areas.append(area)
  titles_s = pd.Series(titles, name="title")
  types_s = pd.Series(types, name = "type")
  prices_s = pd.Series(prices, name="price")
  bks_s = pd.Series(bks, name = "bk")
  areas_s = pd.Series(areas, name = "area")  
  return pd.concat([titles_s, types_s, prices_s, bks_s, areas_s], axis = 1)

In [130]:
scrape()

Unnamed: 0,title,type,price,bk,area
0,2.Zimmer.Wohnung.Über den Dächern der Stadt,2-Zimmer Wohnung,"€ 597,-",(inkl. BK),63 m2
1,Gemütliche Bleibe in Sportlicher 3er WG,Zimmer in 3er WG,"€ 280,-",(inkl. BK),8 m2
2,Roomie in 5er WG,Zimmer in 5er WG,"€ 430,-",(inkl. BK),13.5 m2
3,Schöne 3 Zimmerwohnung in Pradl als 3er WG an ...,3-Zimmer Wohnung,"€ 1200,-",(inkl. BK),64 m2
4,"Zwischenmiete August+September, 22qm Zimmer im...",Zimmer in 3er WG,"€ 440,-",(inkl. BK),
5,1-Zi.-Wohnung mit Westbalkon in Innsbruck,Wohnung,"€ 720,-",(inkl. BK),35 m2
6,„Zimmer in Veggi-WG ;-) in Arzl“,Zimmer in 3er WG,"€ 410,-",(inkl. BK),18 m2
7,Zimmer in 2er WG fürs Wintersemester 2020/21,Zimmer in 2er WG,"€ 480,-",(inkl. BK),20 m2
8,Helle 2- Zimmerwohnung mit Balkon – Uninähe,2-Zimmer Wohnung,"€ 1080,-",(inkl. BK),55 m2
9,12 qm-Zimmer im grünen Saggen (5er-WG),Zimmer in WG,"€ 328,50",(inkl. BK),12 m2
