# Web Scraping - Hamburg

In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np
import re
import urllib.parse

In [2]:
website = 'https://www.immobilo.de/mieten/hamburg'

In [3]:
response = requests.get(website)
response.status_code

200

In [4]:
soup = BeautifulSoup(response.content, 'html.parser')
results = soup.find_all('div', {'class': 'item__main-info clearfix'})

In [5]:
#Title
results[0].find('a', {'class': 'js-item-title-link ci-search-result__link'}).get_text().strip()

'Apartment in Hamburg, Lindleystraße'

In [6]:
#Location
results[0].find('div', {'class': 'item__locality'}).get_text().strip()

'20539 Hamburg'

In [7]:
#Rooms
results[0].find('div', {'class': 'item__spec item-spec-rooms'}).get_text().rstrip(' Zi.')

'1'

In [8]:
#Area
results[0].find('div', {'class': 'item__spec item-spec-area'}).get_text().strip().replace('\n', '').rstrip(' m2')

'84'

In [9]:
#Price
results[0].find('div', {'class': 'item__spec item-spec-price'}).get_text().replace('\xa0', '').rstrip(' €').replace('.', '').replace(',', '.')

'2900.00'

In [10]:
#Type of price
results[0].find('div', {'class': 'small text-muted item-spec-price-type'}).get_text().strip()

'Kaltmiete, zzgl. NK'

In [11]:
#Apartment type
results[0].find(text=re.compile('Immobilientyp:')).strip().replace('\n', '').lstrip('Immobilientyp:').lstrip()

'Sonstiges, Sonstiges Wohnen'

In [12]:
#Relative link
relative_url = results[0].find('a', {'class': 'js-item-title-link ci-search-result__link'}).get('href')

In [13]:
root_url = 'https://www.immobilo.de'
combine_url = root_url + relative_url

In [14]:
# Iteration through multiple pages

title_list = []
location_list = []
room_list = []
area_list = []
price_list = []
price_type_list = []
apartment_type_list = []
relative_link_list = []

for i in range(1, 216):
    
    website = 'https://www.immobilo.de/mieten/hamburg?page=' + str(i)
    
    response = requests.get(website)
    
    soup = BeautifulSoup(response.content, 'html.parser')
    
    results = soup.find_all('div', {'class': 'item__main-info clearfix'})

    
    #Loop through results
    for result in results:

        #Title
        try:
            title_list.append(result.find('a', {'class': 'js-item-title-link ci-search-result__link'}).get_text().strip())
        except:
            title_list.append('')

        #Location
        try:
            location_list.append(result.find('div', {'class': 'item__locality'}).get_text().strip())
        except:
            location_list.append('')

        #Rooms
        try:
            room_list.append(result.find('div', {'class': 'item__spec item-spec-rooms'}).get_text().rstrip(' Zi.'))
        except:
            room_list.append('')

        #Area
        try:
            area_list.append(result.find('div', {'class': 'item__spec item-spec-area'}).get_text().strip().replace('\n', '').rstrip(' m2'))
        except:
            area_list.append('')

         #Price
        try:
            price_list.append(result.find('div', {'class': 'item__spec item-spec-price'}).get_text().replace('\xa0', '').rstrip(' €').replace('.', '').replace(',', '.'))
        except:
            price_list.append('')

        #Type of price
        try:
            price_type_list.append(result.find('div', {'class': 'small text-muted item-spec-price-type'}).get_text().strip())
        except:
            price_type_list.append('')

        #Type of apartment
        try:
            apartment_type_list.append(result.find(text=re.compile('Immobilientyp:')).strip().replace('\n', '').lstrip('Immobilientyp:').lstrip())
        except:
            apartment_type_list.append('')

        #Relative link
        try:
            relative_link_list.append(result.find('a', {'class': 'js-item-title-link ci-search-result__link'}).get('href'))
        except:
            relative_link_list.append('')

In [15]:
url_list = []

for link in relative_link_list:
    url_list.append(urllib.parse.urljoin(root_url, link))

In [16]:
df = pd.DataFrame({'Title': title_list, 'Location': location_list, 'Rooms': room_list, 'Area': area_list, 
                  'Price': price_list, 'Price_type': price_type_list, 'Apartment_type': apartment_type_list, 'Link': url_list})

In [17]:
df.to_excel('Apartment_Hamburg.xlsx', index=False)