# Comparison of rent and sale prices

## 0) Imports

In [1]:
%load_ext nb_black
import numpy as np

from matplotlib import pyplot as plt

%matplotlib inline
plt.rcParams["figure.figsize"] = (15, 10)

import pandas as pd

pd.set_option("display.precision", 4)

<IPython.core.display.Javascript object>

## 1) Data cleaning

#### load data:

In [2]:
date = "2020-04-13"
rent_filepath = f"../data/mieten{date}.csv"
sale_filepath = f"../data/kaufen{date}.csv"
rent_df = pd.read_csv(rent_filepath, sep=";", low_memory=False)
sale_df = pd.read_csv(sale_filepath, sep=";", low_memory=False)

<IPython.core.display.Javascript object>

#### remove duplicates:

In [3]:
rent_df = rent_df.drop_duplicates(subset="obj_scoutId")
sale_df = sale_df.drop_duplicates(subset="obj_scoutId")

<IPython.core.display.Javascript object>

#### remove columns with missing values:

In [4]:
rent_df = rent_df[rent_df.columns[~rent_df.isnull().any()]]
sale_df = sale_df[sale_df.columns[~sale_df.isnull().any()]]

<IPython.core.display.Javascript object>

## 2) Feature engineering

#### add relative prices:

In [5]:
rent_df["rent_m2"] = rent_df["obj_baseRent"] / rent_df["obj_livingSpace"]
rent_df["link"] = "https://www.immobilienscout24.de/expose/" + rent_df[
    "obj_scoutId"
].astype(str)

sale_df["price_m2"] = sale_df["obj_purchasePrice"] / sale_df["obj_livingSpace"]
sale_df["link"] = "https://www.immobilienscout24.de/expose/" + sale_df[
    "obj_scoutId"
].astype(str)

<IPython.core.display.Javascript object>

#### add median rents for zip and kreis

In [6]:
g = rent_df.groupby("obj_zipCode")["rent_m2"].median()
g.name = "rent_m2_zip"
rent_df = rent_df.join(g, on="obj_zipCode").copy()

g = rent_df.groupby("obj_regio2")["rent_m2"].median()
g.name = "rent_m2_kreis"
rent_df = rent_df.join(g, on="obj_regio2").copy()

<IPython.core.display.Javascript object>

#### add median prices for zip and kreis

In [7]:
g = sale_df.groupby("obj_zipCode")["price_m2"].median()
g.name = "price_m2_zip"
sale_df = sale_df.join(g, on="obj_zipCode").copy()

g = sale_df.groupby("obj_regio2")["price_m2"].median()
g.name = "price_m2_kreis"
sale_df = sale_df.join(g, on="obj_regio2").copy()

<IPython.core.display.Javascript object>

#### add return of investment based on median prices for zip

In [8]:
g = rent_df.groupby("obj_zipCode")["rent_m2"].median()
g.name = "rent_m2_zip"
sale_df = sale_df.join(g, on="obj_zipCode").copy()

sale_df["yearly_ROI"] = sale_df["rent_m2_zip"] * 12 / sale_df["price_m2"]

<IPython.core.display.Javascript object>

#### group median rents and prices into new dataframe:

In [9]:
group = "obj_zipCode"
min_count = 10
rent_feature = "rent_m2"

rent_group = rent_df.groupby(group)
rent_filt = rent_group[rent_feature].agg(["median", "count"])["count"] >= min_count
rent_group_df = rent_group[rent_feature].agg(["median", "count"])[rent_filt]

sale_group = sale_df.groupby(group)
sale_filt = sale_group["price_m2"].agg(["median", "count"])["count"] >= min_count
sale_group_df = sale_group["price_m2"].agg(["median", "count"])[sale_filt]

<IPython.core.display.Javascript object>

#### combine rent and sale into new dataframe:

In [10]:
combined_df = pd.concat(
    [
        rent_group_df["median"],
        rent_group_df["count"],
        sale_group_df["median"],
        sale_group_df["count"],
    ],
    axis=1,
    keys=["median_rent_m2", "rent_count", "median_price_m2", "sale_count",],
).copy()
combined_df = combined_df.dropna()

<IPython.core.display.Javascript object>

#### compute median return of investment ratios:

In [11]:
combined_feature = "median_yearly_ROI"
combined_df[combined_feature] = (
    combined_df["median_rent_m2"] * 12 / (combined_df["median_price_m2"])
)

combined_feature = "median_price_in_rents"
combined_df[combined_feature] = combined_df["median_price_m2"] / (
    combined_df["median_rent_m2"] * 12
)

<IPython.core.display.Javascript object>

#### select features:

In [12]:
rent_select_columns = [
    "rent_m2",
    "rent_m2_zip",
    "rent_m2_kreis",
    "obj_baseRent",
    "obj_livingSpace",
    "obj_noRooms",
    "obj_zipCode",
    "obj_regio2",
    "link",
]
rent_select_df = rent_df[rent_select_columns].copy()

sale_select_columns = [
    "yearly_ROI",
    "rent_m2_zip",
    "price_m2",
    "price_m2_zip",
    "price_m2_kreis",
    "obj_purchasePrice",
    "obj_livingSpace",
    "obj_noRooms",
    "obj_zipCode",
    "obj_regio2",
    "link",
]
sale_select_df = sale_df[sale_select_columns].copy()

<IPython.core.display.Javascript object>

## 3) Results

In [13]:
combined_df = combined_df.sort_values("median_yearly_ROI", ascending=False)
combined_df

Unnamed: 0_level_0,median_rent_m2,rent_count,median_price_m2,sale_count,median_yearly_ROI,median_price_in_rents
obj_zipCode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
47169,6.1978,75.0,142.2333,19.0,0.5229,1.9124
38700,6.7241,12.0,653.0612,19.0,0.1236,8.0935
6667,5.1015,183.0,574.4681,17.0,0.1066,9.3839
8393,4.9833,56.0,601.2951,13.0,0.0995,10.0551
8468,4.5455,103.0,565.7143,11.0,0.0964,10.3714
...,...,...,...,...,...,...
18556,5.5625,12.0,3134.7580,12.0,0.0213,46.9627
60327,18.3294,35.0,10450.7268,22.0,0.0210,47.5134
20149,22.1010,16.0,12964.5447,12.0,0.0205,48.8838
81679,25.1235,10.0,17453.4005,12.0,0.0173,57.8920


<IPython.core.display.Javascript object>

#### apartments for rent:

In [17]:
rent_filt_col = "obj_zipCode"
rent_filt_arg = 72074
rent_filt_ord = "rent_per_m2"

rent_filt = rent_select_df[rent_filt_col] == rent_filt_arg
rent_select_df[rent_filt].sort_values("rent_m2").head(20)

Unnamed: 0,rent_m2,rent_m2_zip,rent_m2_kreis,obj_baseRent,obj_livingSpace,obj_noRooms,obj_zipCode,obj_regio2,link
87123,11.6667,13.7778,11.6026,700.0,60.0,2.0,72074,Tübingen_Kreis,https://www.immobilienscout24.de/expose/79889378
35384,12.5,13.7778,11.6026,900.0,72.0,2.0,72074,Tübingen_Kreis,https://www.immobilienscout24.de/expose/117139032
40751,13.4932,13.7778,11.6026,985.0,73.0,3.0,72074,Tübingen_Kreis,https://www.immobilienscout24.de/expose/117026980
28458,13.5474,13.7778,11.6026,865.0,63.85,2.0,72074,Tübingen_Kreis,https://www.immobilienscout24.de/expose/117067170
88506,13.6364,13.7778,11.6026,300.0,22.0,1.0,72074,Tübingen_Kreis,https://www.immobilienscout24.de/expose/65446426
85294,13.7778,13.7778,11.6026,620.0,45.0,2.0,72074,Tübingen_Kreis,https://www.immobilienscout24.de/expose/90113522
7955,15.5738,13.7778,11.6026,475.0,30.5,1.5,72074,Tübingen_Kreis,https://www.immobilienscout24.de/expose/94758279
81904,15.7895,13.7778,11.6026,300.0,19.0,1.0,72074,Tübingen_Kreis,https://www.immobilienscout24.de/expose/104538924
87849,19.6209,13.7778,11.6026,590.0,30.07,1.0,72074,Tübingen_Kreis,https://www.immobilienscout24.de/expose/73109515
58579,20.6503,13.7778,11.6026,470.0,22.76,1.0,72074,Tübingen_Kreis,https://www.immobilienscout24.de/expose/116058550


<IPython.core.display.Javascript object>

#### apartments for sale:

In [16]:
sale_filt_col = "obj_regio2"  # "obj_zipCode"
sale_filt_arg = "Duisburg"
sale_filt_ord = "yearly_ROI"

sale_filt = sale_select_df[sale_filt_col] == sale_filt_arg
sale_select_df[sale_filt].sort_values("yearly_ROI", ascending=False).head(20)

Unnamed: 0,yearly_ROI,rent_m2_zip,price_m2,price_m2_zip,price_m2_kreis,obj_purchasePrice,obj_livingSpace,obj_noRooms,obj_zipCode,obj_regio2,link
6315,0.7403,6.1962,100.4367,1096.2963,1264.2602,69000.0,687.0,3.0,47137,Duisburg,https://www.immobilienscout24.de/expose/117056421
19358,0.6991,6.1978,106.383,142.2333,1264.2602,10000.0,94.0,4.0,47169,Duisburg,https://www.immobilienscout24.de/expose/117056597
19360,0.6991,6.1978,106.383,142.2333,1264.2602,10000.0,94.0,4.0,47169,Duisburg,https://www.immobilienscout24.de/expose/117056598
19365,0.6952,6.1978,106.9767,142.2333,1264.2602,4600.0,43.0,1.0,47169,Duisburg,https://www.immobilienscout24.de/expose/117056600
19374,0.6895,6.1978,107.8652,142.2333,1264.2602,9600.0,89.0,4.0,47169,Duisburg,https://www.immobilienscout24.de/expose/117056589
19376,0.6893,6.1978,107.8947,142.2333,1264.2602,4100.0,38.0,1.0,47169,Duisburg,https://www.immobilienscout24.de/expose/117056599
19368,0.6454,6.1978,115.2381,142.2333,1264.2602,12100.0,105.0,5.0,47169,Duisburg,https://www.immobilienscout24.de/expose/117056593
19375,0.6454,6.1978,115.2381,142.2333,1264.2602,12100.0,105.0,5.0,47169,Duisburg,https://www.immobilienscout24.de/expose/117056590
6202,0.5235,6.1978,142.0611,142.2333,1264.2602,18100.0,127.41,5.0,47169,Duisburg,https://www.immobilienscout24.de/expose/117056344
6200,0.5229,6.1978,142.2239,142.2333,1264.2602,7700.0,54.14,2.0,47169,Duisburg,https://www.immobilienscout24.de/expose/117056348


<IPython.core.display.Javascript object>