In [1]:
import pandas as pd
import requests
import time

from pathlib import Path
p = Path.home() / "projects" / "okapi" / "towns" / "towns_step3.csv"

df = pd.read_csv(p)

# Function to get coordinates for a city name
def get_coordinates(city_name):
    try:
        # Using Nominatim API for geocoding
        url = f"https://nominatim.openstreetmap.org/search?q={city_name},Germany&format=json&limit=1"
        response = requests.get(url, headers={'User-Agent': 'CityCoordinatesLookup/1.0'})
        data = response.json()
        
        if data:
            lat = float(data[0]['lat'])
            lon = float(data[0]['lon'])
            print(city_name, lat, lon)
            return lat, lon
        else:
            return None, None
    except Exception as e:
        print(f"Error getting coordinates for {city_name}: {e}")
        return None, None

# Create new columns for latitude and longitude
df['latitude'] = None
df['longitude'] = None

# Get coordinates for each city
for idx, row in df.iterrows():
    city = row['stadt_name']
    lat, lon = get_coordinates(city)
    df.at[idx, 'latitude'] = lat
    df.at[idx, 'longitude'] = lon
    # Sleep to respect API rate limits
    time.sleep(0.1)

df

Berlin 52.510885 13.3989367
Hamburg 53.550341 10.000654
München 48.1371079 11.5753822
Köln 50.938361 6.959974
Frankfurt am Main 50.1106444 8.6820917
Stuttgart 48.7784485 9.1800132
Düsseldorf 51.2254018 6.7763137
Leipzig 51.3406321 12.3747329
Dortmund 51.5142273 7.4652789
Essen 51.4582235 7.0158171
Bremen 53.0758196 8.8071646
Dresden 51.0493286 13.7381437
Hannover 52.3744779 9.7385532
Nürnberg 49.453872 11.077298
Duisburg 51.434999 6.759562
Bochum 51.4818111 7.2196635
Wuppertal 51.264018 7.1780374
Bielefeld 52.0191005 8.531007
Bonn 50.7352621 7.1024635
Münster 51.9625101 7.6251879
Mannheim 49.4892913 8.4673098
Karlsruhe 49.0068705 8.4034195
Augsburg 48.3690341 10.8979522
Wiesbaden 50.0820384 8.2416556
Mönchengladbach 51.1947131 6.4353792
Gelsenkirchen 51.5110321 7.0960124
Aachen 50.776351 6.083862
Braunschweig 52.2646577 10.5236066
Chemnitz 50.8323531 12.918914
Kiel 54.3227085 10.135555
Halle (Saale) 51.4824354 11.9712985
Magdeburg 52.1315889 11.6399609
Freiburg im Breisgau 47.9960901 7

Unnamed: 0,flaeche_km2,pop_insgesamt,pop_maennlich,pop_weiblich,pop_density,stadt_name,latitude,longitude
0,891.12,3782202,1860115,1922087,4244,Berlin,52.510885,13.398937
1,755.09,1910160,936740,973420,2530,Hamburg,53.550341,10.000654
2,310.70,1510378,734925,775453,4861,München,48.137108,11.575382
3,405.02,1087353,527728,559625,2685,Köln,50.938361,6.959974
4,248.31,775790,382226,393564,3124,Frankfurt am Main,50.110644,8.682092
...,...,...,...,...,...,...,...,...
2054,8.25,615,303,312,75,Ziegenrück,50.614321,11.650713
2055,23.70,521,259,262,22,Schnackenburg,53.036474,11.564476
2056,8.67,482,253,229,56,Neumark,51.078964,11.245972
2057,15.73,463,237,226,29,Ummerstadt,50.259334,10.811659


In [6]:
p = Path.home() / "projects" / "okapi" / "towns" / "towns_step4.csv"
df.to_csv(p, index=False)

In [8]:
p = Path.home() / "projects" / "okapi" / "towns" / "towns_step5_more_than_90k.csv"
df90 = df[df['pop_insgesamt'] >= 90000].copy()
df90.to_csv(p, index=False)