# Comparison of rent and sale prices

## 0) Imports

In [1]:
import numpy as np
from matplotlib import pyplot as plt
import pandas as pd
import glob

%load_ext nb_black
%matplotlib inline

plt.rcParams["figure.figsize"] = (15, 10)
pd.set_option("display.precision", 2)

<IPython.core.display.Javascript object>

## 1) Data cleaning

#### load data:

In [2]:
rent_filepath = sorted(glob.glob("../data/mieten*"))[-1]
sale_filepath = sorted(glob.glob("../data/kaufen*"))[-1]
rent_df = pd.read_csv(rent_filepath, sep=";", low_memory=False)
sale_df = pd.read_csv(sale_filepath, sep=";", low_memory=False)

<IPython.core.display.Javascript object>

#### remove unnecessary prefixes:

In [3]:
rent_df.columns = [
    x.replace("obj_", "").replace("ga_", "").replace("geo_", "")
    for x in rent_df.columns
]
sale_df.columns = [
    x.replace("obj_", "").replace("ga_", "").replace("geo_", "")
    for x in sale_df.columns
]

<IPython.core.display.Javascript object>

#### remove duplicates:

In [4]:
rent_df = rent_df.drop_duplicates(subset="scoutId")
sale_df = sale_df.drop_duplicates(subset="scoutId")

<IPython.core.display.Javascript object>

#### remove columns with missing values:

In [5]:
rent_df = rent_df[rent_df.columns[~rent_df.isnull().any()]]
sale_df = sale_df[sale_df.columns[~sale_df.isnull().any()]]

<IPython.core.display.Javascript object>

## 2) Feature engineering

#### add links:

In [6]:
rent_df["link"] = "https://www.immobilienscout24.de/expose/" + rent_df[
    "scoutId"
].astype(str)
sale_df["link"] = "https://www.immobilienscout24.de/expose/" + sale_df[
    "scoutId"
].astype(str)

<IPython.core.display.Javascript object>

#### add relative prices:

In [7]:
rent_df["rent_m2"] = rent_df["baseRent"] / rent_df["livingSpace"]
sale_df["price_m2"] = sale_df["purchasePrice"] / sale_df["livingSpace"]

<IPython.core.display.Javascript object>

#### add return of investment based on median rents for zipCode

In [8]:
g = rent_df.groupby("zipCode")["rent_m2"].median()
g.name = "rent_m2_zipCode"
sale_df = sale_df.join(g, on="zipCode").copy()

sale_df["yearly_ROI"] = sale_df["rent_m2_zipCode"] * 12 / sale_df["price_m2"]

<IPython.core.display.Javascript object>

#### add combined dataframe with median rents and prices:

In [9]:
group = "zipCode"

combined_df = pd.concat(
    [
        rent_df.groupby(group)["rent_m2"].median(),
        rent_df.groupby(group)["rent_m2"].size(),
        sale_df.groupby(group)["price_m2"].median(),
        sale_df.groupby(group)["price_m2"].size(),
        sale_df.groupby(group)["yearly_ROI"].median(),
    ],
    axis=1,
    keys=[
        "median_rent_m2",
        "num_rent",
        "median_price_m2",
        "num_sale",
        "median_yearly_ROI",
    ],
).copy()

combined_df = combined_df.dropna()

<IPython.core.display.Javascript object>

#### select features:

In [10]:
rent_select_columns = [
    "rent_m2",
    "baseRent",
    "livingSpace",
    "noRooms",
    "zipCode",
    "regio2",
    "link",
]
rent_select_df = rent_df[rent_select_columns].copy()

sale_select_columns = [
    "yearly_ROI",
    "price_m2",
    "purchasePrice",
    "livingSpace",
    "noRooms",
    "zipCode",
    "regio2",
    "link",
]
sale_select_df = sale_df[sale_select_columns].copy()

<IPython.core.display.Javascript object>

## 3) Results

#### group overview:

In [11]:
min_num = 5
combined_filt_ord = "median_yearly_ROI"

combined_filt = (combined_df["num_rent"] > min_num) & (
    combined_df["num_sale"] > min_num
)
combined_df[combined_filt].sort_values(combined_filt_ord, ascending=False)

Unnamed: 0_level_0,median_rent_m2,num_rent,median_price_m2,num_sale,median_yearly_ROI
zipCode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
4639,4.66,10.0,25.40,8.0,2.20
47169,6.20,79.0,142.25,20.0,0.52
6369,4.50,8.0,406.88,10.0,0.13
38700,6.41,16.0,663.27,20.0,0.12
38644,6.36,8.0,710.53,17.0,0.11
...,...,...,...,...,...
83700,16.04,16.0,12426.92,28.0,0.02
82418,10.79,9.0,8465.59,10.0,0.02
7589,5.00,8.0,3933.01,14.0,0.02
28757,5.31,14.0,4472.42,8.0,0.01


<IPython.core.display.Javascript object>

#### group single:

In [12]:
combined_arg = 72074
combined_df.loc[combined_arg]

median_rent_m2         15.32
num_rent               13.00
median_price_m2      4736.84
num_sale                5.00
median_yearly_ROI       0.04
Name: 72074, dtype: float64

<IPython.core.display.Javascript object>

#### apartments for rent:

In [13]:
rent_filt_col = "zipCode"
rent_filt_arg = [72074]
rent_filt_ord = "rent_per_m2"

rent_filt = rent_select_df[rent_filt_col].isin(rent_filt_arg)
rent_select_df[rent_filt].sort_values("rent_m2").head(20)

Unnamed: 0,rent_m2,baseRent,livingSpace,noRooms,zipCode,regio2,link
41628,12.5,900.0,72.0,2.0,72074,Tübingen_Kreis,https://www.immobilienscout24.de/expose/117139032
45614,13.49,985.0,73.0,3.0,72074,Tübingen_Kreis,https://www.immobilienscout24.de/expose/117026980
25107,13.55,865.0,63.85,2.0,72074,Tübingen_Kreis,https://www.immobilienscout24.de/expose/117067170
91096,13.64,300.0,22.0,1.0,72074,Tübingen_Kreis,https://www.immobilienscout24.de/expose/65446426
87752,13.78,620.0,45.0,2.0,72074,Tübingen_Kreis,https://www.immobilienscout24.de/expose/90113522
47727,14.29,400.0,28.0,1.0,72074,Tübingen_Kreis,https://www.immobilienscout24.de/expose/116987357
6413,15.32,950.0,62.0,2.0,72074,Tübingen_Kreis,https://www.immobilienscout24.de/expose/117408623
41795,15.56,560.0,36.0,1.5,72074,Tübingen_Kreis,https://www.immobilienscout24.de/expose/117129297
4157,15.57,475.0,30.5,1.5,72074,Tübingen_Kreis,https://www.immobilienscout24.de/expose/94758279
84196,15.79,300.0,19.0,1.0,72074,Tübingen_Kreis,https://www.immobilienscout24.de/expose/104538924


<IPython.core.display.Javascript object>

#### apartments for sale:

In [14]:
sale_filt_col = "zipCode"
sale_filt_arg = [72074]
sale_filt_ord = "yearly_ROI"

sale_filt = sale_select_df[sale_filt_col].isin(sale_filt_arg)
sale_select_df[sale_filt].sort_values("yearly_ROI", ascending=False).head(50)

Unnamed: 0,yearly_ROI,price_m2,purchasePrice,livingSpace,noRooms,zipCode,regio2,link
23874,0.06,3266.67,245000.0,75.0,3.0,72074,Tübingen_Kreis,https://www.immobilienscout24.de/expose/117116996
13553,0.04,4614.86,341500.0,74.0,3.0,72074,Tübingen_Kreis,https://www.immobilienscout24.de/expose/116920085
4100,0.04,4736.84,360000.0,76.0,3.0,72074,Tübingen_Kreis,https://www.immobilienscout24.de/expose/116571440
11077,0.04,5148.39,798000.0,155.0,4.0,72074,Tübingen_Kreis,https://www.immobilienscout24.de/expose/117501062
11177,0.03,5572.73,613000.0,110.0,3.5,72074,Tübingen_Kreis,https://www.immobilienscout24.de/expose/117071241


<IPython.core.display.Javascript object>