# German real estate listings over time

## 0) Imports

In [12]:
import numpy as np
from matplotlib import pyplot as plt
import pandas as pd
import glob

%load_ext nb_black
%matplotlib inline

plt.rcParams["figure.figsize"] = (15, 10)
pd.set_option("display.precision", 2)

The nb_black extension is already loaded. To reload it, use:
  %reload_ext nb_black


<IPython.core.display.Javascript object>

## 1) Data preparation

#### helper functions:

In [17]:
def load_df(filepath):
    df = pd.read_csv(filepath, sep=";", low_memory=False)
    df.columns = [
        x.replace("obj_", "").replace("ga_", "").replace("geo_", "") for x in df.columns
    ]
    df = df.drop_duplicates(subset="scoutId")
    df = df[df.columns[~df.isnull().any()]]
    return df


def create_combined_df(date, rent_df, sale_df, group):
    rent_df["rent_m2"] = rent_df["baseRent"] / rent_df["livingSpace"]
    sale_df["price_m2"] = sale_df["purchasePrice"] / sale_df["livingSpace"]

    g = rent_df.groupby("zipCode")["rent_m2"].median()
    g.name = "rent_m2_zipCode"
    sale_df = sale_df.join(g, on="zipCode").copy()
    sale_df["yearly_ROI"] = sale_df["rent_m2_zipCode"] * 12 / sale_df["price_m2"]

    combined_df = pd.concat(
        [
            rent_df.groupby(group)["rent_m2"].median(),
            rent_df.groupby(group)["rent_m2"].size(),
            sale_df.groupby(group)["price_m2"].median(),
            sale_df.groupby(group)["price_m2"].size(),
            sale_df.groupby(group)["yearly_ROI"].median(),
        ],
        axis=1,
        keys=[
            "median_rent_m2: " + date,
            "num_rent: " + date,
            "median_price_m2: " + date,
            "num_sale: " + date,
            "median_yearly_ROI: " + date,
        ],
    ).copy()

    combined_df = combined_df.dropna()
    return combined_df

<IPython.core.display.Javascript object>

#### find dates:

In [18]:
dates = sorted(
    [
        s[s.find("mieten") + len("mieten") : s.rfind(".csv")]
        for s in glob.glob("../data/mieten*")
    ]
)

<IPython.core.display.Javascript object>

#### parallel execution:

In [22]:
group = "zipCode"
time_df = pd.DataFrame()

for date in dates:
    rent_filepath = "../data/mieten" + date + ".csv"
    sale_filepath = "../data/kaufen" + date + ".csv"
    rent_df = load_df(rent_filepath)
    sale_df = load_df(sale_filepath)
    combined_df = create_combined_df(date, rent_df, sale_df, group)
    time_df = pd.concat([time_df, combined_df], axis=1)

<IPython.core.display.Javascript object>

In [25]:
#time_df = time_df.dropna()
time_df

Unnamed: 0_level_0,median_rent_m2: 2020-04-04,num_rent: 2020-04-04,median_price_m2: 2020-04-04,num_sale: 2020-04-04,median_yearly_ROI: 2020-04-04,median_rent_m2: 2020-04-07,num_rent: 2020-04-07,median_price_m2: 2020-04-07,num_sale: 2020-04-07,median_yearly_ROI: 2020-04-07,...,median_rent_m2: 2020-04-16,num_rent: 2020-04-16,median_price_m2: 2020-04-16,num_sale: 2020-04-16,median_yearly_ROI: 2020-04-16,median_rent_m2: 2020-04-19,num_rent: 2020-04-19,median_price_m2: 2020-04-19,num_sale: 2020-04-19,median_yearly_ROI: 2020-04-19
zipCode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1067,11.50,273.0,2135.34,3.0,0.06,11.00,290.0,2135.34,3.0,0.06,...,11.01,288.0,2135.34,3.0,0.06,11.50,275.0,2135.34,3.0,0.06
1069,8.63,90.0,3970.12,14.0,0.03,8.63,75.0,4110.18,18.0,0.03,...,8.63,83.0,3970.12,14.0,0.03,8.98,89.0,3970.24,11.0,0.03
1097,9.50,113.0,2100.00,13.0,0.05,9.43,114.0,2100.00,13.0,0.05,...,9.50,119.0,2169.40,14.0,0.05,9.55,132.0,2079.41,18.0,0.06
1099,9.06,114.0,2945.90,25.0,0.04,9.00,147.0,3104.17,27.0,0.03,...,9.00,153.0,3128.03,30.0,0.03,9.23,135.0,3025.03,32.0,0.04
1109,8.20,26.0,2679.69,12.0,0.04,8.20,22.0,2750.00,13.0,0.04,...,8.09,26.0,2750.00,11.0,0.04,8.29,28.0,2750.00,11.0,0.04
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99955,6.67,1.0,1109.10,1.0,0.07,6.67,1.0,1109.10,1.0,0.07,...,5.30,2.0,1109.10,1.0,0.06,5.30,2.0,1109.10,1.0,0.06
99974,5.69,154.0,893.06,15.0,0.08,5.69,153.0,918.40,14.0,0.07,...,5.70,162.0,866.82,14.0,0.08,5.70,162.0,866.82,14.0,0.08
99991,5.19,3.0,471.94,1.0,0.13,5.04,2.0,471.94,1.0,0.13,...,5.19,3.0,471.94,1.0,0.13,5.19,3.0,471.94,1.0,0.13
99994,4.44,6.0,754.15,2.0,0.09,4.44,6.0,754.15,2.0,0.09,...,4.44,5.0,754.15,2.0,0.09,4.44,5.0,754.15,2.0,0.09


<IPython.core.display.Javascript object>