# <center>Comparison of rent and sale prices</center>

## 0) Imports

In [1]:
import numpy as np
from matplotlib import pyplot as plt
import pandas as pd
from sklearn.cluster import KMeans
import glob

%load_ext nb_black
%matplotlib inline

plt.rcParams["figure.figsize"] = (15, 10)
pd.set_option("display.precision", 2)

<IPython.core.display.Javascript object>

## 1) Data Engineering

### 1.0) Data cleaning

#### load data:

In [2]:
rent_filepath = sorted(glob.glob("../data/mieten*"))[-1]
sale_filepath = sorted(glob.glob("../data/kaufen*"))[-1]
rent_df = pd.read_csv(rent_filepath, sep=";", low_memory=False)
sale_df = pd.read_csv(sale_filepath, sep=";", low_memory=False)

<IPython.core.display.Javascript object>

#### remove unnecessary prefixes:

In [3]:
rent_df.columns = [
    x.replace("obj_", "").replace("ga_", "").replace("geo_", "")
    for x in rent_df.columns
]
sale_df.columns = [
    x.replace("obj_", "").replace("ga_", "").replace("geo_", "")
    for x in sale_df.columns
]

<IPython.core.display.Javascript object>

#### remove duplicates:

In [4]:
rent_df = rent_df.drop_duplicates(subset="scoutId")
sale_df = sale_df.drop_duplicates(subset="scoutId")

<IPython.core.display.Javascript object>

#### remove columns with missing values:

In [5]:
rent_df = rent_df[rent_df.columns[~rent_df.isnull().any()]]
sale_df = sale_df[sale_df.columns[~sale_df.isnull().any()]]

<IPython.core.display.Javascript object>

### 1.1) Feature engineering

#### add links:

In [6]:
rent_df["link"] = "https://www.immobilienscout24.de/expose/" + rent_df[
    "scoutId"
].astype(str)
sale_df["link"] = "https://www.immobilienscout24.de/expose/" + sale_df[
    "scoutId"
].astype(str)

<IPython.core.display.Javascript object>

#### add relative prices:

In [7]:
rent_df["rent_m2"] = rent_df["baseRent"] / rent_df["livingSpace"]
sale_df["price_m2"] = sale_df["purchasePrice"] / sale_df["livingSpace"]

<IPython.core.display.Javascript object>

#### add return of investment based on median rents for zipCode

In [8]:
g = rent_df.groupby("zipCode")["rent_m2"].median()
g.name = "rent_m2_zipCode"
sale_df = sale_df.join(g, on="zipCode").copy()

sale_df["yearly_ROI"] = sale_df["rent_m2_zipCode"] * 12 / sale_df["price_m2"]

<IPython.core.display.Javascript object>

#### add combined dataframe with median rents and prices:

In [9]:
group = "zipCode"

combined_df = pd.concat(
    [
        rent_df.groupby(group)["rent_m2"].median(),
        rent_df.groupby(group)["rent_m2"].size(),
        sale_df.groupby(group)["price_m2"].median(),
        sale_df.groupby(group)["price_m2"].size(),
        sale_df.groupby(group)["yearly_ROI"].median(),
    ],
    axis=1,
    keys=[
        "median_rent_m2",
        "num_rent",
        "median_price_m2",
        "num_sale",
        "median_yearly_ROI",
    ],
).copy()

combined_df = combined_df.replace([np.inf, -np.inf], np.nan)
combined_df = combined_df.dropna()

<IPython.core.display.Javascript object>

#### select features:

In [10]:
rent_select_columns = [
    "rent_m2",
    "baseRent",
    "livingSpace",
    "noRooms",
    "zipCode",
    "regio2",
    "link",
]
rent_select_df = rent_df[rent_select_columns].copy()

sale_select_columns = [
    "yearly_ROI",
    "price_m2",
    "purchasePrice",
    "livingSpace",
    "noRooms",
    "zipCode",
    "regio2",
    "link",
]
sale_select_df = sale_df[sale_select_columns].copy()

<IPython.core.display.Javascript object>

## 2) Results

### 2.0) Apartments by region

#### regions overview:

In [11]:
min_num = 5
combined_filt_ord = "median_yearly_ROI"

combined_filt = (combined_df["num_rent"] > min_num) & (
    combined_df["num_sale"] > min_num
)
combined_df[combined_filt].sort_values(combined_filt_ord, ascending=False)

Unnamed: 0_level_0,median_rent_m2,num_rent,median_price_m2,num_sale,median_yearly_ROI
zipCode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
4639,4.94,12.0,25.37,7.0,2.34
47169,6.25,63.0,142.25,22.0,0.53
6231,5.32,101.0,421.70,6.0,0.15
6369,4.50,10.0,385.59,8.0,0.14
47229,6.40,24.0,576.92,9.0,0.13
...,...,...,...,...,...
27472,6.45,21.0,4320.45,10.0,0.02
13053,6.25,7.0,4600.00,6.0,0.02
20148,18.66,16.0,13950.00,9.0,0.02
83700,16.74,11.0,12895.17,32.0,0.02


<IPython.core.display.Javascript object>

#### single region:

In [12]:
combined_arg = 12489
combined_df.loc[combined_arg]

median_rent_m2         12.51
num_rent               13.00
median_price_m2      3212.61
num_sale               18.00
median_yearly_ROI       0.05
Name: 12489, dtype: float64

<IPython.core.display.Javascript object>

### 2.1) Apartments for rent

In [13]:
rent_filt_col = "zipCode"
rent_filt_arg = [12489]
rent_ord = "rent_m2"

rent_filt = rent_select_df[rent_filt_col].isin(rent_filt_arg)
rent_select_df[rent_filt].sort_values(rent_ord).head(20)

Unnamed: 0,rent_m2,baseRent,livingSpace,noRooms,zipCode,regio2,link
14842,7.63,648.95,85.0,3.0,12489,Berlin,https://www.immobilienscout24.de/expose/118535004
24615,8.02,836.0,104.27,3.0,12489,Berlin,https://www.immobilienscout24.de/expose/72903352
45998,9.58,602.58,62.9,2.0,12489,Berlin,https://www.immobilienscout24.de/expose/118525653
95210,10.8,545.94,50.55,1.0,12489,Berlin,https://www.immobilienscout24.de/expose/76766337
53256,11.76,1800.0,153.0,5.0,12489,Berlin,https://www.immobilienscout24.de/expose/118192255
14817,12.0,756.0,63.0,2.0,12489,Berlin,https://www.immobilienscout24.de/expose/118536816
55518,12.51,950.5,76.0,1.0,12489,Berlin,https://www.immobilienscout24.de/expose/118050095
82185,12.51,825.0,65.96,2.0,12489,Berlin,https://www.immobilienscout24.de/expose/113471229
84720,13.5,1699.0,125.86,4.0,12489,Berlin,https://www.immobilienscout24.de/expose/111513335
14790,16.94,1246.78,73.6,2.0,12489,Berlin,https://www.immobilienscout24.de/expose/118542152


<IPython.core.display.Javascript object>

### 2.2) Apartments for sale

In [14]:
sale_filt_col = "zipCode"
sale_filt_arg = [12489]
sale_ord = "yearly_ROI"

sale_filt = sale_select_df[sale_filt_col].isin(sale_filt_arg)
sale_select_df[sale_filt].sort_values(sale_ord, ascending=False).head(50)

Unnamed: 0,yearly_ROI,price_m2,purchasePrice,livingSpace,noRooms,zipCode,regio2,link
12189,0.06,2369.34,170000.0,71.75,3.0,12489,Berlin,https://www.immobilienscout24.de/expose/114960018
24158,0.06,2405.89,170000.0,70.66,2.5,12489,Berlin,https://www.immobilienscout24.de/expose/118637931
22949,0.06,2481.73,180000.0,72.53,3.0,12489,Berlin,https://www.immobilienscout24.de/expose/118866628
29412,0.06,2638.52,150000.0,56.85,2.0,12489,Berlin,https://www.immobilienscout24.de/expose/116870264
1251,0.05,2860.5,195000.0,68.17,3.0,12489,Berlin,https://www.immobilienscout24.de/expose/113564857
26739,0.05,2866.36,160000.0,55.82,2.0,12489,Berlin,https://www.immobilienscout24.de/expose/117863760
12177,0.05,3000.0,130500.0,43.5,2.0,12489,Berlin,https://www.immobilienscout24.de/expose/114954791
29428,0.05,3014.18,170000.0,56.4,2.0,12489,Berlin,https://www.immobilienscout24.de/expose/116870259
26184,0.05,3042.11,289000.0,95.0,3.0,12489,Berlin,https://www.immobilienscout24.de/expose/118089849
8932,0.04,3383.12,265000.0,78.33,3.0,12489,Berlin,https://www.immobilienscout24.de/expose/116924411


<IPython.core.display.Javascript object>