# Comparison of rent and sale prices

## 0) Imports

In [1]:
import numpy as np
from matplotlib import pyplot as plt
import pandas as pd
from sklearn.cluster import KMeans
import glob

%load_ext nb_black
%matplotlib inline

plt.rcParams["figure.figsize"] = (15, 10)
pd.set_option("display.precision", 2)

<IPython.core.display.Javascript object>

## 1) Data Engineering

### 1.0) Data cleaning

#### load data:

In [2]:
rent_filepath = sorted(glob.glob("../data/mieten*"))[-1]
sale_filepath = sorted(glob.glob("../data/kaufen*"))[-1]
rent_df = pd.read_csv(rent_filepath, sep=";", low_memory=False)
sale_df = pd.read_csv(sale_filepath, sep=";", low_memory=False)

<IPython.core.display.Javascript object>

#### remove unnecessary prefixes:

In [3]:
rent_df.columns = [
    x.replace("obj_", "").replace("ga_", "").replace("geo_", "")
    for x in rent_df.columns
]
sale_df.columns = [
    x.replace("obj_", "").replace("ga_", "").replace("geo_", "")
    for x in sale_df.columns
]

<IPython.core.display.Javascript object>

#### remove duplicates:

In [4]:
rent_df = rent_df.drop_duplicates(subset="scoutId")
sale_df = sale_df.drop_duplicates(subset="scoutId")

<IPython.core.display.Javascript object>

#### remove columns with missing values:

In [5]:
rent_df = rent_df[rent_df.columns[~rent_df.isnull().any()]]
sale_df = sale_df[sale_df.columns[~sale_df.isnull().any()]]

<IPython.core.display.Javascript object>

### 1.1) Feature engineering

#### add links:

In [6]:
rent_df["link"] = "https://www.immobilienscout24.de/expose/" + rent_df[
    "scoutId"
].astype(str)
sale_df["link"] = "https://www.immobilienscout24.de/expose/" + sale_df[
    "scoutId"
].astype(str)

<IPython.core.display.Javascript object>

#### add relative prices:

In [7]:
rent_df["rent_m2"] = rent_df["baseRent"] / rent_df["livingSpace"]
sale_df["price_m2"] = sale_df["purchasePrice"] / sale_df["livingSpace"]

<IPython.core.display.Javascript object>

#### add return of investment based on median rents for zipCode

In [8]:
g = rent_df.groupby("zipCode")["rent_m2"].median()
g.name = "rent_m2_zipCode"
sale_df = sale_df.join(g, on="zipCode").copy()

sale_df["yearly_ROI"] = sale_df["rent_m2_zipCode"] * 12 / sale_df["price_m2"]

<IPython.core.display.Javascript object>

#### add combined dataframe with median rents and prices:

In [9]:
group = "zipCode"

combined_df = pd.concat(
    [
        rent_df.groupby(group)["rent_m2"].median(),
        rent_df.groupby(group)["rent_m2"].size(),
        sale_df.groupby(group)["price_m2"].median(),
        sale_df.groupby(group)["price_m2"].size(),
        sale_df.groupby(group)["yearly_ROI"].median(),
    ],
    axis=1,
    keys=[
        "median_rent_m2",
        "num_rent",
        "median_price_m2",
        "num_sale",
        "median_yearly_ROI",
    ],
).copy()

combined_df = combined_df.replace([np.inf, -np.inf], np.nan)
combined_df = combined_df.dropna()

<IPython.core.display.Javascript object>

#### select features:

In [10]:
rent_select_columns = [
    "rent_m2",
    "baseRent",
    "livingSpace",
    "noRooms",
    "zipCode",
    "regio2",
    "link",
]
rent_select_df = rent_df[rent_select_columns].copy()

sale_select_columns = [
    "yearly_ROI",
    "price_m2",
    "purchasePrice",
    "livingSpace",
    "noRooms",
    "zipCode",
    "regio2",
    "link",
]
sale_select_df = sale_df[sale_select_columns].copy()

<IPython.core.display.Javascript object>

## 2) Results

### 2.0) Apartments by region

#### regions overview:

In [11]:
min_num = 5
combined_filt_ord = "median_yearly_ROI"

combined_filt = (combined_df["num_rent"] > min_num) & (
    combined_df["num_sale"] > min_num
)
combined_df[combined_filt].sort_values(combined_filt_ord, ascending=False)

Unnamed: 0_level_0,median_rent_m2,num_rent,median_price_m2,num_sale,median_yearly_ROI
zipCode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
4639,4.67,12.0,25.37,7.0,2.21
47169,6.23,81.0,142.26,21.0,0.53
6369,4.50,9.0,406.88,10.0,0.13
38700,6.56,15.0,663.27,22.0,0.12
38644,6.58,8.0,710.53,17.0,0.11
...,...,...,...,...,...
81675,23.07,34.0,15281.92,8.0,0.02
20148,21.00,18.0,13950.00,9.0,0.02
81679,24.38,16.0,17453.40,18.0,0.02
83700,16.45,16.0,12953.30,28.0,0.02


<IPython.core.display.Javascript object>

#### single region:

In [16]:
combined_arg = 72074
combined_df.loc[combined_arg]

median_rent_m2         14.38
num_rent               16.00
median_price_m2      5148.39
num_sale                3.00
median_yearly_ROI       0.03
Name: 72074, dtype: float64

<IPython.core.display.Javascript object>

### 2.1) Apartments for rent

In [13]:
rent_filt_col = "zipCode"
rent_filt_arg = [72074]
rent_ord = "rent_m2"

rent_filt = rent_select_df[rent_filt_col].isin(rent_filt_arg)
rent_select_df[rent_filt].sort_values(rent_ord).head(20)

Unnamed: 0,rent_m2,baseRent,livingSpace,noRooms,zipCode,regio2,link
32804,10.0,900.0,90.0,3.0,72074,Reutlingen_Kreis,https://www.immobilienscout24.de/expose/117477004
95146,12.29,590.0,48.0,2.0,72074,Tübingen_Kreis,https://www.immobilienscout24.de/expose/81969323
42834,12.79,1260.0,98.5,3.0,72074,Tübingen_Kreis,https://www.immobilienscout24.de/expose/117685253
6989,13.55,865.0,63.85,2.0,72074,Tübingen_Kreis,https://www.immobilienscout24.de/expose/117067170
23427,13.64,300.0,22.0,1.0,72074,Tübingen_Kreis,https://www.immobilienscout24.de/expose/65446426
89087,13.75,880.0,64.0,3.0,72074,Tübingen_Kreis,https://www.immobilienscout24.de/expose/106303465
47288,14.0,1470.0,105.0,3.0,72074,Tübingen_Kreis,https://www.immobilienscout24.de/expose/117551317
59137,14.29,400.0,28.0,1.0,72074,Tübingen_Kreis,https://www.immobilienscout24.de/expose/116987357
40940,14.47,550.0,38.0,2.0,72074,Tübingen_Kreis,https://www.immobilienscout24.de/expose/117736870
18287,15.57,475.0,30.5,1.5,72074,Tübingen_Kreis,https://www.immobilienscout24.de/expose/94758279


<IPython.core.display.Javascript object>

### 2.2) Apartments for sale

In [14]:
sale_filt_col = "zipCode"
sale_filt_arg = [72074]
sale_ord = "yearly_ROI"

sale_filt = sale_select_df[sale_filt_col].isin(sale_filt_arg)
sale_select_df[sale_filt].sort_values(sale_ord, ascending=False).head(50)

Unnamed: 0,yearly_ROI,price_m2,purchasePrice,livingSpace,noRooms,zipCode,regio2,link
2400,0.04,4736.84,360000.0,76.0,3.0,72074,Tübingen_Kreis,https://www.immobilienscout24.de/expose/116571440
2410,0.03,5148.39,798000.0,155.0,4.0,72074,Tübingen_Kreis,https://www.immobilienscout24.de/expose/117501062
17342,0.03,5572.73,613000.0,110.0,3.5,72074,Tübingen_Kreis,https://www.immobilienscout24.de/expose/117071241


<IPython.core.display.Javascript object>