# Comparison of rent and sale prices

## 0) Imports

In [1]:
%load_ext nb_black
import numpy as np

from matplotlib import pyplot as plt

%matplotlib inline
plt.rcParams["figure.figsize"] = (15, 10)

import pandas as pd

pd.set_option("display.precision", 2)

<IPython.core.display.Javascript object>

## 1) Data cleaning

#### load data:

In [3]:
date = "2020-04-13"
rent_filepath = f"../data/mieten{date}.csv"
sale_filepath = f"../data/kaufen{date}.csv"
rent_df = pd.read_csv(rent_filepath, sep=";", low_memory=False)
sale_df = pd.read_csv(sale_filepath, sep=";", low_memory=False)

FileNotFoundError: [Errno 2] File ../data/kaufen2020-04-13.csv does not exist: '../data/kaufen2020-04-13.csv'

<IPython.core.display.Javascript object>

#### remove unnecessary prefixes:

In [None]:
rent_df.columns = [
    x.replace("obj_", "").replace("ga_", "").replace("geo_", "")
    for x in rent_df.columns
]
sale_df.columns = [
    x.replace("obj_", "").replace("ga_", "").replace("geo_", "")
    for x in sale_df.columns
]

#### remove duplicates:

In [None]:
rent_df = rent_df.drop_duplicates(subset="scoutId")
sale_df = sale_df.drop_duplicates(subset="scoutId")

#### remove columns with missing values:

In [None]:
rent_df = rent_df[rent_df.columns[~rent_df.isnull().any()]]
sale_df = sale_df[sale_df.columns[~sale_df.isnull().any()]]

#### remove rows with unwanted features

## 2) Feature engineering

#### add relative prices:

In [None]:
rent_df["rent_m2"] = rent_df["baseRent"] / rent_df["livingSpace"]
rent_df["link"] = "https://www.immobilienscout24.de/expose/" + rent_df[
    "scoutId"
].astype(str)

sale_df["price_m2"] = sale_df["purchasePrice"] / sale_df["livingSpace"]
sale_df["link"] = "https://www.immobilienscout24.de/expose/" + sale_df[
    "scoutId"
].astype(str)

#### add median rents for zip and kreis

In [None]:
g = rent_df.groupby("zipCode")["rent_m2"].median()
g.name = "rent_m2_zip"
rent_df = rent_df.join(g, on="zipCode").copy()

g = rent_df.groupby("regio2")["rent_m2"].median()
g.name = "rent_m2_kreis"
rent_df = rent_df.join(g, on="regio2").copy()

#### add median prices for zip and kreis

In [None]:
g = sale_df.groupby("zipCode")["price_m2"].median()
g.name = "price_m2_zip"
sale_df = sale_df.join(g, on="zipCode").copy()

g = sale_df.groupby("regio2")["price_m2"].median()
g.name = "price_m2_kreis"
sale_df = sale_df.join(g, on="regio2").copy()

#### add return of investment based on median prices for zip

In [None]:
g = rent_df.groupby("zipCode")["rent_m2"].median()
g.name = "rent_m2_zip"
sale_df = sale_df.join(g, on="zipCode").copy()

sale_df["yearly_ROI"] = sale_df["rent_m2_zip"] * 12 / sale_df["price_m2"]

#### group median rents and prices into new dataframe:

In [None]:
group = "zipCode"
min_count = 10

rent_group = rent_df.groupby(group)
rent_filt = rent_group["rent_m2"].agg(["median", "count"])["count"] >= min_count
rent_group_df = rent_group["rent_m2"].agg(["median", "count"])[rent_filt]

sale_group = sale_df.groupby(group)
sale_filt = sale_group["price_m2"].agg(["median", "count"])["count"] >= min_count
sale_group_df = sale_group["price_m2"].agg(["median", "count"])[sale_filt]

combined_df = pd.concat(
    [
        rent_group_df["median"],
        rent_group_df["count"],
        sale_group_df["median"],
        sale_group_df["count"],
    ],
    axis=1,
    keys=["median_rent_m2", "rent_count", "median_price_m2", "sale_count",],
).copy()
combined_df = combined_df.dropna()

#### compute median return of investment ratios:

In [None]:
combined_feature = "median_yearly_ROI"
combined_df[combined_feature] = (
    combined_df["median_rent_m2"] * 12 / (combined_df["median_price_m2"])
)

#### select features:

In [None]:
rent_select_columns = [
    "rent_m2",
    "rent_m2_zip",
    "rent_m2_kreis",
    "baseRent",
    "livingSpace",
    "noRooms",
    "zipCode",
    "regio2",
    "link",
]
rent_select_df = rent_df[rent_select_columns].copy()

sale_select_columns = [
    "yearly_ROI",
    "rent_m2_zip",
    "price_m2",
    "price_m2_zip",
    "price_m2_kreis",
    "purchasePrice",
    "livingSpace",
    "noRooms",
    "zipCode",
    "regio2",
    "link",
]
sale_select_df = sale_df[sale_select_columns].copy()

## 3) Results

In [None]:
combined_df = combined_df.sort_values("median_yearly_ROI", ascending=False)
combined_df

#### apartments for rent:

In [None]:
rent_filt_col = "zipCode"
rent_filt_arg = 1069
rent_filt_ord = "rent_per_m2"

rent_filt = rent_select_df[rent_filt_col] == rent_filt_arg
rent_select_df[rent_filt].sort_values("rent_m2").head(20)

#### apartments for sale:

In [15]:
sale_filt_col = "zipCode"  # "obj_zipCode"
sale_filt_arg = 47169  # "Duisburg"
sale_filt_ord = "yearly_ROI"

sale_filt = sale_select_df[sale_filt_col] == sale_filt_arg
sale_select_df[sale_filt].sort_values("yearly_ROI", ascending=False).head(50)

Unnamed: 0,yearly_ROI,rent_m2_zip,price_m2,price_m2_zip,price_m2_kreis,purchasePrice,livingSpace,noRooms,zipCode,regio2,link
19360,0.7,6.2,106.38,142.23,1264.26,10000.0,94.0,4.0,47169,Duisburg,https://www.immobilienscout24.de/expose/117056598
19358,0.7,6.2,106.38,142.23,1264.26,10000.0,94.0,4.0,47169,Duisburg,https://www.immobilienscout24.de/expose/117056597
19365,0.7,6.2,106.98,142.23,1264.26,4600.0,43.0,1.0,47169,Duisburg,https://www.immobilienscout24.de/expose/117056600
19374,0.69,6.2,107.87,142.23,1264.26,9600.0,89.0,4.0,47169,Duisburg,https://www.immobilienscout24.de/expose/117056589
19376,0.69,6.2,107.89,142.23,1264.26,4100.0,38.0,1.0,47169,Duisburg,https://www.immobilienscout24.de/expose/117056599
19375,0.65,6.2,115.24,142.23,1264.26,12100.0,105.0,5.0,47169,Duisburg,https://www.immobilienscout24.de/expose/117056590
19368,0.65,6.2,115.24,142.23,1264.26,12100.0,105.0,5.0,47169,Duisburg,https://www.immobilienscout24.de/expose/117056593
6202,0.52,6.2,142.06,142.23,1264.26,18100.0,127.41,5.0,47169,Duisburg,https://www.immobilienscout24.de/expose/117056344
6200,0.52,6.2,142.22,142.23,1264.26,7700.0,54.14,2.0,47169,Duisburg,https://www.immobilienscout24.de/expose/117056348
6203,0.52,6.2,142.23,142.23,1264.26,11400.0,80.15,5.0,47169,Duisburg,https://www.immobilienscout24.de/expose/117056350


<IPython.core.display.Javascript object>

In [16]:
sale_df.loc[27482]

timestamp                                             2020-04-13 03:58:40.571308
regio1                                                       Nordrhein_Westfalen
cId                                                                     10545277
newlyConst                                                                     n
balcony                                                                        n
picturecount                                                                   4
scoutId                                                                116053299
hasKitchen                                                                     n
bln                                                          Nordrhein_Westfalen
cellar                                                                         y
purchasePriceRange                                                             1
houseNumber                                                                   18
livingSpace                 

<IPython.core.display.Javascript object>

In [17]:
sale_df.loc[26221]

timestamp                                             2020-04-13 03:48:48.758580
regio1                                                       Nordrhein_Westfalen
cId                                                                     15375545
newlyConst                                                                     n
balcony                                                                        y
picturecount                                                                   8
scoutId                                                                116451277
hasKitchen                                                                     n
bln                                                          Nordrhein_Westfalen
cellar                                                                         y
purchasePriceRange                                                             3
houseNumber                                                       no_information
livingSpace                 

<IPython.core.display.Javascript object>