### Importing required packages

In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from math import radians, cos, sin, asin, sqrt
import os as os

import geopandas as gpd
from shapely.geometry import Point
from shapely.ops import unary_union, transform
import geoplot as gplt
import geoplot.crs as gcrs
import contextily as ctx
import pyproj

import pyarrow

In [7]:
# erg_amd regions and their city centers coordinate
# i selected these to be the old town house or if it is a district, i used the administration HQ
labor_cbd = {
    "Düsseldorf": Point (51.22711728567024, 6.7704764810080285),
    "Dortmund": Point (51.51196891948861, 7.465735137553109),
    "Köln": Point (50.9383704300498, 6.958988982887606),
    "Bonn": Point (50.73503728470361, 7.102899338032732),
    "Wuppertal": Point(51.25720839905365, 7.147030279060111),
    "Kleve": Point(51.78986243597705, 6.139172228340007),
    "Aachen": Point(50.77617939993918, 6.083815384434001),
    "Olpe": Point(51.0290892957023, 7.842437784445013),
    "Bochum": Point(51.48237533776603, 7.215204469121292),
    "Münster": Point(51.96168110267523, 7.627936719409362),
    "Borken": Point(51.84629832565887, 6.856618165343862),
    "Bielefeld": Point(52.021106487272874, 8.53506557404487),
    "Minden": Point(52.28904491110588, 8.918139143885831),
    "Höxter": Point(51.77405976264992, 9.382296465563085),
    "Soest": Point(51.57208234093718, 8.10774532825182),
    "Hagen": Point(51.3590612611924, 7.476178297951798),
    "Siegen": Point(50.87471554261671, 8.025504599781803),
    "Flensburg": Point(54.78112058289352, 9.43368318272154),
    "Kiel": Point(54.324349161055245, 10.132771475265042),
    "Lübeck": Point(53.86683849951886, 10.684445801235235),
    "Hamburg": Point(53.55053637354619, 9.992413966295869),
    "Dithmarschen": Point(53.998398215910576, 9.26221515095139),
    "Nordvorpommern": Point(54.315645487935036, 13.090413437466674),
    "Osnabrück": Point(52.277596432503344, 8.0426167689563),
    "Emsland": Point(52.69108709182285, 7.291898910496305),
    "Koblenz": Point(50.36019271249548, 7.59830079140032),
    "Mecklenburgische Seenplatte": Point(53.55753657547626, 13.267629993279508),
    "Hannover": Point(52.36757907151532, 9.73858717873911),
    "Stade": Point(53.60190976749639, 9.476056426015225),
    "Bremerhaven": Point(53.56655274899323, 8.586052866272288),
    "Bremen": Point(53.07614133973519, 8.809096178739113),
    "Südvorpommern": Point(54.095465494584346, 13.38196544993105),
    "Kaiserslautern": Point(49.447695362140124, 7.7684226678988475),
    "Gießen": Point(50.58399865300909, 8.680642400158868),
    "Freyung-Grafenau": Point(48.807750689457464, 13.546026564418153),
    "Frankfurt (Oder)": Point(52.34245499984809, 14.55285088270641),
    "Schwerin": Point(53.633320051434275, 11.417534776794618),
    "Wolfsburg": Point(52.42052858024334, 10.787227749881628),
    "Trier": Point(49.75233644787979, 6.635069655465216),
    "Chemnitz": Point(50.83277156463772, 12.919178607001607),
    "Berlin": Point(52.518417864363485, 13.408789139316893),
    "Lüchow-Dannenberg": Point(52.96823714631543, 11.159116321506783),
    "Altötting": Point(48.22700916312251, 12.677318721578633),
    "Ravensburg": Point(47.78178074437734, 9.615115495938467),
    "Landshut": Point(48.53648423673718, 12.153267868956299),
    "Kassel": Point(51.31230568072729, 9.49273893532578),
    "Hameln": Point(52.1064641740211, 9.360485420264386),
    "Rostock": Point(54.08854541750767, 12.136781838799111),
    "Nürnberg": Point(49.45696309285592, 11.077004632451601),
    "München": Point(48.13753572282894, 11.576132170492698),
    "Jena": Point(50.92873008448338, 11.587185047837472),
    "Hof": Point(50.322008526185854, 11.918394852781205),
    "Würzburg": Point(49.79378723295822, 9.92807730859968),
    "Landau": Point(49.16925783323122, 8.074816093254457),
    "Passau": Point(48.575475619066474, 13.468276837435983),
    "Göttingen": Point(51.532891133251226, 9.934931477176558),
    "Eisenach": Point(50.975018666047085, 10.320424820236628),
    "Braunschweig": Point(52.26518706317302, 10.52650341390602),
    "Goslar": Point(51.905904736262826, 10.42968614117496),
    "Göppingen": Point(48.703505809887865, 9.654627352753714),
    "Stuttgart": Point(48.77503606420815, 9.177901430760599),
    "Fulda": Point(50.5532491293909, 9.678401215310402),
    "Traunstein": Point(47.86921091146352, 12.648180945326857),
    "Celle": Point(52.62433484595192, 10.081743448123177),
    "Bad Kreuznach": Point(49.84937898658614, 7.860570888749339),
    "Weißenburg-Gunzenhausen": Point(49.034902320999315, 10.971036439680041),
    "Schwäbisch Hall": Point(49.112575413851715, 9.738142673700692),
    "Dresden": Point(51.04819280446458, 13.740432144096395),
    "Uelzen": Point(52.96320651623364, 10.559753841174965),
    "Emden": Point(53.366905125803136, 7.206652871151812),
    "Oldenburg": Point(53.13923001663253, 8.21516846816467),
    "Wilhelmshaven": Point(53.52687812144386, 8.110516458360499),
    "Teltow-Fläming": Point(52.08798219714457, 13.17567601204308),
    "Vechta": Point(52.72774741679917, 8.288751018299749),
    "Prignitz": Point(53.079416727793976, 11.854920106722943),
    "Essen": Point(51.458542411196476, 7.016304424566899),
    "Ingolstadt": Point(48.7630970352964, 11.426572481659527),
    "Weilheim-Schongau": Point(47.83897791860396, 11.14465479573185),
    "Bitburg": Point(49.9746920999758, 6.52410901861842),
    "Altenkirchen": Point(50.689479604846674, 7.649155657670099),
    "Heidelberg": Point(49.411772365846296, 8.711839416431182),
    "Halle": Point(51.4825064735239, 11.971578980539862),
    "Mainz": Point(50.002001443480665, 8.276536487710137),
    "Ludwigshafen": Point(49.48110553853744, 8.45013478217706),
    "Darmstadt": Point(49.8728741693272, 8.651653451967146),
    "Donau-Ries": Point(48.85172104439563, 10.488390090773295),
    "Ulm": Point(48.39713510954155, 9.994361483504864),
    "Ansbach": Point(49.302797331803994, 10.572774183957991),
    "Karlsruhe": Point(49.014655444087225, 8.404506819567747),
    "Saarbrücken": Point(49.23459980297805, 6.9968427759786405),
    "Aschaffenburg": Point(49.97387835230972, 9.145654518917652),
    "Amberg": Point(49.44567237464058, 11.85965546816467),
    "Heilbronn": Point(49.14273288774531, 9.21940009699972),
    "Frankfurt": Point(50.111028954442254, 8.689236052749862),
    "Cham": Point(49.219205237327216, 12.665757728849895),
    "Memmingen": Point(47.98715576034545, 10.181262743975003),
    "Suhl": Point(50.61065902639905, 10.693367822527438),
    "Bayreuth": Point(49.947261563056934, 11.575549130237679),
    "Gera": Point(50.87860411040458, 12.081290449674505),
    "Limburg-Weilburg": Point(50.388825830669774, 8.061303729941384),
    "Heidenheim": Point(48.67702671370914, 10.152956483286566),
    "Reutlingen": Point(48.49180713503567, 9.21113365185631),
    "Nordhausen": Point(51.50218022263445, 10.794032778183224),
    "Ortenaukreis": Point(48.466185839487046, 7.941534908993413),
    "Pirmasens": Point(49.1926444655647, 7.60789235385791),
    "Erfurt": Point(50.97827951228787, 11.029490469691774),
    "Waldeck-Frankenberg": Point(51.05880652264637, 8.802291708410097),
    "Coburg": Point(50.255263700302955, 10.958823194304012),
    "Waldshut": Point(47.63792144902148, 8.276401385394987),
    "Bamberg": Point(49.89242756094573, 10.887011048758838),
    "Vulkaneifel": Point(50.202041886221345, 6.818814926045207),
    "Uckermark": Point(53.396415099201995, 13.856302628901238),
    "Pforzheim": Point(48.891457022613515, 8.703141236909238),
    "Kronach": Point(50.241996868977544, 11.325169742803372),
    "Kempten": Point(47.72691946144008, 10.316880243722004),
    "Erlangen": Point(49.59130762330065, 11.008054481938599),
    "Elbe-Elster": Point(51.85761742192571, 13.241771618290773),
    "Freiburg": Point(47.9965876440608, 7.8496267641263415),
    "Saalfeld-Rudolstadt": Point(50.647779498770674, 11.361451185009555),
    "Schweinfurt": Point(50.044908153862416, 10.235048977654966),
    "Lörrach": Point(47.6152246189973, 7.66463050074816),
    "Bautzen": Point(51.181987203086166, 14.423874068391397),
    "Konstanz": Point(47.66062558892475, 9.173600810544482),
    "Böblingen": Point(48.68583266540676, 9.011348183496336),
    "Rottweil": Point(48.168490073858734, 8.62558313812377),
    "Zollernalbkreis": Point(48.273110530626724, 8.851339922217837),
    "Deggendorf": Point(48.83384334172955, 12.962169011421837),
    "Magdeburg": Point(52.13157777761276, 11.640334089331567),
    "Sigmaringen": Point(48.08705197653663, 9.217282387277978),
    "Augsburg": Point(48.36895075420482, 10.898748229347198),
    "Regensburg": Point(49.02049173607306, 12.095643768393286),
    "Märkisch-Oderland": Point(52.53442150094389, 14.382878681954669),
    "Leipzig": Point(51.335960443934816, 12.372089897234963),
    "Oberhavel": Point(52.7565627956842, 13.236311545139271),
    "Potsdam-Mittelmark": Point(52.41185005243036, 13.061622540108594),
    "Cottbus": Point(51.83075623808454, 14.599340072128378),
    "Havelland": Point(52.60494880780431, 12.344015081958336),
    "Ostprignitz-Ruppin": Point(52.92087128699491, 12.802603978389287),
    "Dessau-Roßlau": Point(51.835991375837374, 12.246139945000458),
    "Stendal": Point(52.60626928862782, 11.859487002258335),
    "Unstrut-Hainich": Point(51.23965271909035, 10.482615191165207)  
}

### Haversine function

In [8]:
# This formula calculates the distance between two points on the earth's surface
def haversine(point1, point2):
    """
    Calculate the great circle distance between two points 
    on the earth (specified in decimal degrees)
    """
    
    lat1, lon1 = point1.bounds[0], point1.bounds[1]
    lat2, lon2 = point2.bounds[0], point2.bounds[1]

    # convert decimal degrees to radians 
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])

    # haversine formula 
    dlon = lon2 - lon1 
    dlat = lat2 - lat1
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * asin(sqrt(a))
    r = 6371 # Radius of earth in kilometers
    # print('Distance from beginning to end of route in km: ',round((c * r), 2),'\n')
    return c * r

### Including the city hall center of the central business districts of a labour region

In [9]:
df = pd.read_csv(r"../datasets/Intermediate/ger-3-apartment-rents.csv", low_memory = False)

In [10]:
laea_proj_str = "+proj=laea +lat_0=52 +lon_0=10 +x_0=4321000 +y_0=3210000 +ellps=GRS80 +units=m +no_defs"
wgs84_proj_str = "+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs"

def transform_coordinates_to_wgs84(row):
    # Create a PyProj Transformer to perform the transformation
    transformer = pyproj.Transformer.from_crs(laea_proj_str, wgs84_proj_str)

    # Transform the coordinates to WGS84
    lon, lat = transformer.transform(row['xvar'], row['yvar'])

    # Create a Point in WGS84 format
    wgs84_point = Point(lat, lon)

    return wgs84_point


# Apply the transformation function to each row in the DataFrame
df['wgs84_geometry'] = df.apply(transform_coordinates_to_wgs84, axis=1)

# Create a GeoDataFrame from the DataFrame with the transformed points
gdf = gpd.GeoDataFrame(df, geometry='wgs84_geometry', crs=wgs84_proj_str)

### Creating the geodata-frame

In [11]:
cbd_centers = pd.DataFrame(labor_cbd.items(), columns = ['erg_amd', 'city_center'])
cbd_centers = gpd.GeoDataFrame(cbd_centers, geometry = cbd_centers.city_center, crs = "epsg:4326")
cbd_centers = cbd_centers \
    .drop(columns = ['city_center']) \
    .rename(columns = {"geometry": 'city_center'} )

gdf  = gdf.merge(cbd_centers,
               how = 'inner',
               on = 'erg_amd')

In [12]:
gdf['distance'] = np.vectorize(haversine)(gdf['city_center'], gdf['wgs84_geometry'])


In [13]:
gdf['log_distance'] = np.log(1 + gdf['distance'])

In [14]:
gdf.to_csv(r"../datasets/Intermediate/ger-3-apartment-rent-coord.csv", index= False)

In [16]:
gdf.head()

Unnamed: 0.1,Unnamed: 0,kid2019,edate,X.1,X,obid,freiab,plz,mietekalt,mietewarm,...,keller_miss,garten_miss,aufzug_miss,balkon_miss,einbaukueche_miss,bauer_pi_app_rent,wgs84_geometry,city_center,distance,log_distance
0,1,1001,2018-02-01,326063.0,326063.0,102801922.0,nach Vereinbarung,24941,449.0,Other missing,...,0.0,0.0,0.0,0.0,0.0,100.129785,POINT (54.77546 9.42522),POINT (54.78112 9.43368),0.830783,0.604744
1,2,1001,2018-02-01,326264.0,326264.0,102417290.0,ab sofort,24939,320.0,Other missing,...,0.0,0.0,0.0,0.0,0.0,100.129785,POINT (54.79344 9.42497),POINT (54.78112 9.43368),1.479014,0.907861
2,3,1001,2018-02-01,327058.0,327058.0,102836033.0,01.05.2018,24943,225.0,Other missing,...,0.0,0.0,0.0,0.0,0.0,100.129785,POINT (54.78452 9.44063),POINT (54.78112 9.43368),0.584316,0.460153
3,4,1001,2018-02-01,324850.0,324850.0,102946030.0,ab sofort,24937,465.0,Other missing,...,0.0,0.0,0.0,0.0,0.0,100.129785,POINT (54.77539 9.40969),POINT (54.78112 9.43368),1.665588,0.980425
4,5,1001,2018-02-01,325000.0,325000.0,102315598.0,01.04.2018,24939,400.0,Other missing,...,0.0,0.0,0.0,0.0,0.0,100.129785,POINT (54.80235 9.40930),POINT (54.78112 9.43368),2.831281,1.343199
