# Geocoding apartment addresses using the geoadmin API

## Libraries and settings

In [1]:
# Libraries
import os
import requests
import json
import urllib
import fnmatch
import folium
import pandas as pd
from IPython.display import clear_output

# Ignore warnings
import warnings
warnings.filterwarnings('ignore')

# Get current working directory
print(os.getcwd())

/workspaces/DA_Project/Project/Additional/3


## Geocoding a single address

### Define base url for address search

In [2]:
# Define base url for address search
base_url= "https://api3.geo.admin.ch/rest/services/api/SearchServer?"

# Set up search parameters: address, origins and type
parameters = {"searchText": "8400",
              "origins": "address",
              "type": "locations",
             }

# Urllib.parse.urlencode turns parameters into url
print(f"{base_url}{urllib.parse.urlencode(parameters)}")

https://api3.geo.admin.ch/rest/services/api/SearchServer?searchText=8400&origins=address&type=locations


### Server request & response

In [3]:
# Server request
r = requests.get(f"{base_url}{urllib.parse.urlencode(parameters)}")

# Get data in json-format
data = json.loads(r.content)
data

# Take only the first server response, convert to data frame with relevant infos
df = pd.DataFrame.from_dict(list(data.values())[0][0], orient='columns')
df.iloc[[1,4,5,6,11,12],:1]

Unnamed: 0,attrs
featureId,1150440_0
label,Kreuzstrasse 1 <b>8400 Winterthur</b>
lat,47.503555
lon,8.72975
x,262222.875
y,697271.5


## Geocoding multiple addresses

### Importing apartment data

In [4]:
# Get current working directory
print(os.getcwd())

# Show all files in the directory
flist = fnmatch.filter(os.listdir('.'), '*.csv')
for i in flist:
    print(i)

# Read the data to a pandas data frame
df = pd.read_csv('bmw_data_prepared.csv', 
                 sep=',', 
                 encoding='utf-8')[ ['web-scraper-order', 
                'model_raw',
                'Location',
                'PLZ',
                'Year',
                'price',
                'km',
                'luxury',
                'competition',
                'xDrive',
                'automat',
                'km_cat',
                'km_per_year']][:1000] # first 1000 BMW adresses

# Get number of rows and columns
print(df.shape)

# Show first records
df.head(10)



/workspaces/DA_Project/Project/Additional/3
bmw_data_geocoded.csv
bmw_data_prepared.csv
(990, 13)


Unnamed: 0,web-scraper-order,model_raw,Location,PLZ,Year,price,km,luxury,competition,xDrive,automat,km_cat,km_per_year
0,1705078003-1,X3 xDrive 20d xLine Steptronic,GE,1227,2016,22900,119500.0,0,0,1,1,50-120k,14937.5
1,1705078003-2,540i,BL,4461,2006,13750,124900.0,0,0,0,1,120k+,6938.89
2,1705078003-3,225xe iPerformance Active Tourer Steptronic M ...,BL,4460,2019,28800,28950.0,0,0,0,1,0-50k,5790.0
3,1705078003-4,316i Compact,ZG,6300,2000,1100,248013.0,0,0,0,1,120k+,10333.88
4,1705078003-5,530xi Steptronic,LU,6010,2008,10900,160000.0,0,0,0,1,120k+,10000.0
5,1705078003-6,X5 xDrive 40d Steptronic,TI,6964,2015,33900,125900.0,0,0,1,1,120k+,13988.89
6,1705078003-7,525d Touring,FR,1700,2004,3300,336000.0,0,0,0,1,120k+,16800.0
7,1705078003-8,225i xDrive Active Tourer Steptronic,BE,3400,2017,13500,97000.0,0,0,1,1,50-120k,13857.14
8,1705078003-9,435i Coupé xDrive Sport Line Steptronic,ZH,8157,2014,22950,112500.0,0,0,1,1,50-120k,11250.0
9,1705078003-10,X5 xDrive 48V 40d M Sport Pro Steptronic,VD,1530,2024,112300,,0,0,1,1,,


### Geocoding multiple apartment addresses using the geoadmin API

In [5]:
# Define base url
base_url= "https://api3.geo.admin.ch/rest/services/api/SearchServer?"

# Geocode list of adresses
geolocation = []
n = 1
for i in df['PLZ'].astype(str):
    
    print('Geocoding address', 
          n, 
          'out of', 
          len(df['PLZ']), 
          ':', 
          i)
    n=n+1
    clear_output(wait=True)
    
    try:
        # Set up search parameters - address, origins and type
        parameters = {"searchText": i,
                      "origins": "address",
                      "type": "locations",
                     }

        # Server request
        r = requests.get(f"{base_url}{urllib.parse.urlencode(parameters)}")

        # Get data
        data = json.loads(r.content)

        # Take first server response, convert to df with relevant infos
        df_loc = pd.DataFrame.from_dict(list(data.values())[0][0], 
                                        orient='columns')
        geolocation.append(df_loc.iloc[[5,6],0].astype(float))
    
    except:
        geolocation.append(pd.Series(data={'lat': None, 'lon': None}))
        

# Write lat and lon to df
df_loc = pd.DataFrame(geolocation, 
                      columns=("lat", "lon"), 
                      index=range(len(df['PLZ'])))
df['lat'] = df_loc['lat']
df['lon'] = df_loc['lon']
df.head(5)

Unnamed: 0,web-scraper-order,model_raw,Location,PLZ,Year,price,km,luxury,competition,xDrive,automat,km_cat,km_per_year,lat,lon
0,1705078003-1,X3 xDrive 20d xLine Steptronic,GE,1227,2016,22900,119500.0,0,0,1,1,50-120k,14937.5,46.183849,6.138751
1,1705078003-2,540i,BL,4461,2006,13750,124900.0,0,0,0,1,120k+,6938.89,47.462429,7.835723
2,1705078003-3,225xe iPerformance Active Tourer Steptronic M ...,BL,4460,2019,28800,28950.0,0,0,0,1,0-50k,5790.0,47.467308,7.848388
3,1705078003-4,316i Compact,ZG,6300,2000,1100,248013.0,0,0,0,1,120k+,10333.88,47.186596,8.499633
4,1705078003-5,530xi Steptronic,LU,6010,2008,10900,160000.0,0,0,0,1,120k+,10000.0,47.044533,8.272661


### Plot addresses on map

In [6]:
# Initialisierung der Map
m = folium.Map(location=[47.44, 8.65], zoom_start=10)

# Add lat/lon of addresses
df_sub = df.dropna()
for i in range(0, len(df_sub)):
    folium.Marker(location=(df_sub.iloc[i]['lat'], 
                            df_sub.iloc[i]['lon']), 
                  popup=df_sub.iloc[i]['PLZ']).add_to(m)

# Layer control
folium.LayerControl().add_to(m)

# Plot map
m

### Save data to file

In [7]:
df.to_csv('bmw_data_geocoded.csv', 
           sep=",", 
           encoding='utf-8',
           index=False)

### Jupyter notebook --footer info-- (please always provide this at the end of each submitted notebook)

In [8]:
import os
import platform
import socket
from platform import python_version
from datetime import datetime

print('-----------------------------------')
print(os.name.upper())
print(platform.system(), '|', platform.release())
print('Datetime:', datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
print('Python Version:', python_version())
print('-----------------------------------')

-----------------------------------
POSIX
Linux | 6.2.0-1018-azure
Datetime: 2024-01-13 12:26:59
Python Version: 3.10.13
-----------------------------------
