In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import googlemaps

### Set your preferences here
- Rent : Maximum rent you prefer
- Travelling Time : Maximum travel time using public transport

Note that this only splits out the offer IDs from student union Munich website, rest you need to check everything for yourself

Run the `cells` one after one for proper results. 

In [8]:
# Change these default values

rent_limit = 600 # 600 Eur
travelling_limit = 50 # 50 Mins

# use my password for getting the housing details
password_werk = "bzGYXf"


In [9]:
# URL of the page
url = "https://www.studierendenwerk-muenchen-oberbayern.de/en/accommodation/private-accommodation-service/rooms-for-rent/"

# Send a request to fetch the page content
response = requests.get(url)
response.raise_for_status()  # Check for errors

# Parse the page content using BeautifulSoup
soup = BeautifulSoup(response.text, 'html.parser')

# Find the table
table = soup.find('table', class_='c-stwm-privatzimmervermittlung__table')

# Find all rows except the header
rows = table.find_all('tr')[1:]

# The DataFrame
data = []

# Extract data from each row
for row in rows:
    columns = row.find_all('td')
    
    if len(columns) >= 6:
        offer = columns[0].get_text(strip=True)
        
        # Extract city_area_street with handling <br> tags
        city_area_street_html = columns[1]
        city_area_street = ' '.join([line.strip() for line in city_area_street_html.stripped_strings])
        
        room_type = columns[2].get_text(strip=True)
        rent = columns[3].get_text(strip=True)
        num_of_rooms = columns[4].get_text(strip=True)
        size = columns[5].get_text(strip=True)
        
        data.append([offer, city_area_street, room_type, rent, num_of_rooms, size])


# Creating a DataFrame 
data_housing = pd.DataFrame(data, columns=["Offer", "City area/Street", "Room Type", "Rent (€)", "Number of Rooms", "Size (m²)"])

In [10]:
# Cleaning the rent for use
data = data_housing
data['Rent (€)'] = data['Rent (€)'].str.replace('€', '').str.replace('.', '').str.replace(',', '.').str.strip()
data['Rent (€)'] = pd.to_numeric(data['Rent (€)'], errors='coerce')

In [11]:
# Filtering
filtered_data = data[(data['Rent (€)'] <= rent_limit) & (~data['Room Type'].str.contains('Subletting', case=False))]
print(f"Total Options Avaliable : {filtered_data.count()["Offer"]}")

Total Options Avaliable : 14


In [12]:
def clean_address(address):
    return address.replace('Str.', ' Str.').replace('Weg', ' Weg').replace('  ', ' ')

def convert_time_to_numeric(time_str):
    try:
        return int(time_str.split()[0])
    except ValueError:
        return None

In [13]:
# Google Maps Time Avaliability
gmaps_api = "your_api"
campus_address = "Geschwister-Scholl-Platz 1, 80539 München, Germany"

# Initiating gMaps
gmaps = googlemaps.Client(key= gmaps_api)

# Getting and cleaning the address
data_final = pd.DataFrame({"Offer" : filtered_data["Offer"], 
                               "Rent" : filtered_data["Rent (€)"] ,
                               "Address" :filtered_data["City area/Street"],
                               "Type" : filtered_data["Room Type"],
                               "Size" : filtered_data["Size (m²)"]},)

# Using gmaps
def get_travel_time(address):
    try:
        matrix = gmaps.distance_matrix(origins = campus_address, destinations = address, mode = "transit")
        # Check if the response contains the 'duration' field
        if matrix['rows'][0]['elements'][0]['status'] == 'OK':
            duration = matrix['rows'][0]['elements'][0]['duration']['text']
            return duration
        else:
            return "No route found"
    except Exception as e:
        print(f"Error retrieving travel time for address '{address}': {e}")
        return None

# Apply functions to DataFrame
data_final['Cleaned Address'] = data_final['Address'].apply(clean_address)
data_final['Travel Time'] = data_final['Cleaned Address'].apply(get_travel_time)

data_final = data_final.drop(columns=['Cleaned Address'])

In [14]:
# The time filtering
final = data_final
final = final[(~final['Travel Time'].str.contains('hour', case = False))]
final.loc[:, 'Travel Time'] = final['Travel Time'].apply(convert_time_to_numeric)

# Total Offers
print(f"Total Offers are : {final[final["Travel Time"] < travelling_limit].count()["Offer"]} ")

Total Offers are : 9 


In [15]:
final[final["Travel Time"] < travelling_limit]

Unnamed: 0,Offer,Rent,Address,Type,Size,Travel Time
1,60907,600.0,Harlaching Grünwalderstrasse 227a,Separate room,15.0,42
2,60906,500.0,Neuried Kraillinger Weg,House,40.0,44
3,60905,390.0,Kirchheim Dr.-Johanna-Decker-Str.,Room in a shared flat,13.0,49
4,60904,470.0,Ramersdorf Balanstrasse,Separate room,20.0,49
5,60902,500.0,Thalkirchen Pogner Str.,Separate room,15.0,18
7,60900,600.0,Maxvorstadt Agnesstraße,Separate room,25.0,22
13,60894,420.0,Aubing Streitbergstrasse,Room in a shared flat,11.0,43
25,60872,200.0,Berg am Laim Plankensteinstr.,Room in a shared flat,18.5,38
26,60871,600.0,Schwabing Gunezrainerstr.,Separate room,21.0,14
