# Comparison of rent and sale prices

## 0) Imports

In [1]:
import numpy as np
from matplotlib import pyplot as plt
import pandas as pd
from sklearn.cluster import KMeans
import glob

%load_ext nb_black
%matplotlib inline

plt.rcParams["figure.figsize"] = (15, 10)
pd.set_option("display.precision", 2)

<IPython.core.display.Javascript object>

## 1) Data Engineering

### 1.0) Data cleaning

#### load data:

In [2]:
rent_filepath = sorted(glob.glob("../data/mieten*"))[-1]
sale_filepath = sorted(glob.glob("../data/kaufen*"))[-1]
rent_df = pd.read_csv(rent_filepath, sep=";", low_memory=False)
sale_df = pd.read_csv(sale_filepath, sep=";", low_memory=False)

<IPython.core.display.Javascript object>

#### remove unnecessary prefixes:

In [3]:
rent_df.columns = [
    x.replace("obj_", "").replace("ga_", "").replace("geo_", "")
    for x in rent_df.columns
]
sale_df.columns = [
    x.replace("obj_", "").replace("ga_", "").replace("geo_", "")
    for x in sale_df.columns
]

<IPython.core.display.Javascript object>

#### remove duplicates:

In [4]:
rent_df = rent_df.drop_duplicates(subset="scoutId")
sale_df = sale_df.drop_duplicates(subset="scoutId")

<IPython.core.display.Javascript object>

#### remove columns with missing values:

In [5]:
rent_df = rent_df[rent_df.columns[~rent_df.isnull().any()]]
sale_df = sale_df[sale_df.columns[~sale_df.isnull().any()]]

<IPython.core.display.Javascript object>

### 1.1) Feature engineering

#### add links:

In [6]:
rent_df["link"] = "https://www.immobilienscout24.de/expose/" + rent_df[
    "scoutId"
].astype(str)
sale_df["link"] = "https://www.immobilienscout24.de/expose/" + sale_df[
    "scoutId"
].astype(str)

<IPython.core.display.Javascript object>

#### add relative prices:

In [7]:
rent_df["rent_m2"] = rent_df["baseRent"] / rent_df["livingSpace"]
sale_df["price_m2"] = sale_df["purchasePrice"] / sale_df["livingSpace"]

<IPython.core.display.Javascript object>

#### add return of investment based on median rents for zipCode

In [8]:
g = rent_df.groupby("zipCode")["rent_m2"].median()
g.name = "rent_m2_zipCode"
sale_df = sale_df.join(g, on="zipCode").copy()

sale_df["yearly_ROI"] = sale_df["rent_m2_zipCode"] * 12 / sale_df["price_m2"]

<IPython.core.display.Javascript object>

#### add combined dataframe with median rents and prices:

In [9]:
group = "zipCode"

combined_df = pd.concat(
    [
        rent_df.groupby(group)["rent_m2"].median(),
        rent_df.groupby(group)["rent_m2"].size(),
        sale_df.groupby(group)["price_m2"].median(),
        sale_df.groupby(group)["price_m2"].size(),
        sale_df.groupby(group)["yearly_ROI"].median(),
    ],
    axis=1,
    keys=[
        "median_rent_m2",
        "num_rent",
        "median_price_m2",
        "num_sale",
        "median_yearly_ROI",
    ],
).copy()

combined_df = combined_df.replace([np.inf, -np.inf], np.nan)
combined_df = combined_df.dropna()

<IPython.core.display.Javascript object>

#### select features:

In [10]:
rent_select_columns = [
    "rent_m2",
    "baseRent",
    "livingSpace",
    "noRooms",
    "zipCode",
    "regio2",
    "link",
]
rent_select_df = rent_df[rent_select_columns].copy()

sale_select_columns = [
    "yearly_ROI",
    "price_m2",
    "purchasePrice",
    "livingSpace",
    "noRooms",
    "zipCode",
    "regio2",
    "link",
]
sale_select_df = sale_df[sale_select_columns].copy()

<IPython.core.display.Javascript object>

## 2) Results

### 2.0) Apartments by region

#### regions overview:

In [19]:
min_num = 1
combined_filt_ord = "median_yearly_ROI"

combined_filt = (combined_df["num_rent"] > min_num) & (
    combined_df["num_sale"] > min_num
)
combined_df[combined_filt].sort_values(combined_filt_ord, ascending=False)

Unnamed: 0_level_0,median_rent_m2,num_rent,median_price_m2,num_sale,median_yearly_ROI
zipCode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
4639,4.69,13.0,25.37,7.0,2.22
47169,6.22,80.0,142.26,21.0,0.52
7973,4.98,67.0,155.37,3.0,0.38
6648,5.39,3.0,553.79,2.0,0.37
6749,5.51,133.0,464.14,2.0,0.24
...,...,...,...,...,...
7589,5.00,8.0,3968.10,11.0,0.02
20354,18.01,3.0,14608.70,3.0,0.01
40212,11.52,2.0,10499.04,2.0,0.01
80539,27.27,11.0,25000.00,7.0,0.01


<IPython.core.display.Javascript object>

#### single region:

In [12]:
combined_arg = 4639
combined_df.loc[combined_arg]

median_rent_m2        4.69
num_rent             13.00
median_price_m2      25.37
num_sale              7.00
median_yearly_ROI     2.22
Name: 4639, dtype: float64

<IPython.core.display.Javascript object>

### 2.1) Apartments for rent

In [34]:
rent_filt_col = "zipCode"
rent_filt_arg = [12489]
rent_ord = "rent_m2"

rent_filt = rent_select_df[rent_filt_col].isin(rent_filt_arg)
rent_select_df[rent_filt].sort_values(rent_ord).head(20)

Unnamed: 0,rent_m2,baseRent,livingSpace,noRooms,zipCode,regio2,link
7819,7.01,467.5,66.72,2.0,12489,Berlin,https://www.immobilienscout24.de/expose/109208094
24532,9.5,715.0,75.28,2.0,12489,Berlin,https://www.immobilienscout24.de/expose/104916496
57716,9.69,1599.0,165.1,4.0,12489,Berlin,https://www.immobilienscout24.de/expose/116956183
2403,10.38,1399.0,134.75,4.0,12489,Berlin,https://www.immobilienscout24.de/expose/116959624
59602,11.22,717.97,63.99,2.0,12489,Berlin,https://www.immobilienscout24.de/expose/116869830
78002,11.93,674.37,56.53,2.0,12489,Berlin,https://www.immobilienscout24.de/expose/114726475
73530,11.99,1149.0,95.86,3.0,12489,Berlin,https://www.immobilienscout24.de/expose/115452919
81965,12.51,825.0,65.96,2.0,12489,Berlin,https://www.immobilienscout24.de/expose/113471229
63122,13.09,1279.88,97.81,3.0,12489,Berlin,https://www.immobilienscout24.de/expose/116604623
84905,13.5,1699.0,125.86,4.0,12489,Berlin,https://www.immobilienscout24.de/expose/111513335


<IPython.core.display.Javascript object>

### 2.2) Apartments for sale

In [35]:
sale_filt_col = "zipCode"
sale_filt_arg = [12489]
sale_ord = "yearly_ROI"

sale_filt = sale_select_df[sale_filt_col].isin(sale_filt_arg)
sale_select_df[sale_filt].sort_values(sale_ord, ascending=False).head(50)

Unnamed: 0,yearly_ROI,price_m2,purchasePrice,livingSpace,noRooms,zipCode,regio2,link
11252,0.06,2369.34,170000.0,71.75,3.0,12489,Berlin,https://www.immobilienscout24.de/expose/114960018
31683,0.06,2395.29,175000.0,73.06,3.0,12489,Berlin,https://www.immobilienscout24.de/expose/114881500
26452,0.06,2638.52,150000.0,56.85,2.0,12489,Berlin,https://www.immobilienscout24.de/expose/116870264
12728,0.06,2659.05,135000.0,50.77,2.0,12489,Berlin,https://www.immobilienscout24.de/expose/117648770
11250,0.05,3000.0,130500.0,43.5,2.0,12489,Berlin,https://www.immobilienscout24.de/expose/114954791
26440,0.05,3014.18,170000.0,56.4,2.0,12489,Berlin,https://www.immobilienscout24.de/expose/116870259
3268,0.05,3124.54,213000.0,68.17,3.0,12489,Berlin,https://www.immobilienscout24.de/expose/113564857
22947,0.05,3249.21,215000.0,66.17,3.0,12489,Berlin,https://www.immobilienscout24.de/expose/117538579
3137,0.05,3383.12,265000.0,78.33,3.0,12489,Berlin,https://www.immobilienscout24.de/expose/116924411
14729,0.04,4090.91,225000.0,55.0,2.0,12489,Berlin,https://www.immobilienscout24.de/expose/117614342


<IPython.core.display.Javascript object>