# Get Geodata from Wikipedia

## Example 1: Get Nuclear Power Plants (in service)

https://en.wikipedia.org/wiki/List_of_nuclear_power_stations

In [None]:
import pandas as pd

df = pd.read_html("https://en.wikipedia.org/wiki/List_of_nuclear_power_stations")

print(len(df))
      

In [None]:
df[2]

In [None]:
df[1].to_csv("geodata/powerplants.csv")

In [None]:
def removenote(s,k=120):
    for i in range(1,k+1):
        s = s.replace(f"[note {i}]","")
        s = s.replace(f"[dp {i}]","")
    return s

In [None]:
def numbersonly(s):
    return "".join(ch for ch in s if ch in "0123456789 .")

In [None]:
import string
import csv

file = open("geodata/powerplants.csv", encoding="utf-8")
outfile = open("geodata/powerplants2023.csv", "w", encoding="utf-8")

outfile.write("id,name,units,capacity,country,geometry\n")

reader = csv.reader(file)
next(reader)

for line in reader:  
    uid = line[0]
    name = line[1]
    units = removenote(line[2])
    capacity = removenote(line[3])
    capacity = capacity.replace(",",".")
    country = line[4]
    
    pos = line[5]
    pos = numbersonly(pos)
    while pos.startswith(".") or pos.startswith(" "):
        pos = pos[1:]
   
    lnglat = pos.split(" ")
    lat = float(lnglat[3])
    lng = float(lnglat[4])
    geometry = f"POINT ({lng} {lat})"
        
    outfile.write(f"{uid},{name},{units},{capacity},{country},{geometry}\n")

file.close()
outfile.close()

In [None]:
import geopandas as gpd
import pandas as pd
from shapely import wkt

df = pd.read_csv("geodata/powerplants2023.csv", encoding="utf-8")

df['geometry'] = df['geometry'].apply(wkt.loads)

gdf = gpd.GeoDataFrame(df, geometry="geometry")

In [None]:
gdf.query('country == "Czech Republic"')

Export to GeoJSON:

In [None]:
geojson = gdf.to_json()
file = open("geodata/powerplants2023.json", "w", encoding="utf-8")
file.write(geojson)
file.close()

## Example 2: Get Highest Mountain Peaks

In [None]:
df = pd.read_html("https://en.wikipedia.org/wiki/List_of_highest_mountains_on_Earth")
print(len(df))

In [None]:
df[2].to_csv("geodata/mountainpeaks.csv")  # index may change...

In [None]:
import string
import csv

file = open("geodata/mountainpeaks.csv", encoding="utf-8")
outfile = open("geodata/mountainpeaks2023.csv", "w", encoding="utf-8")

outfile.write("id,name,height,country,geometry\n")

reader = csv.reader(file)
next(reader)
next(reader)
next(reader)

for line in reader:  
    uid = line[0]
    name = line[2]
    height = removenote(line[3])
    country = removenote(line[13])
    country = country.replace(",","/")

    pos = line[8]
    pos = numbersonly(pos)
    while pos.startswith(".") or pos.startswith(" "):
        pos = pos[1:]
        
    lnglat = pos.split(" ")
    lat = float(lnglat[3])
    lng = float(lnglat[4])
    geometry = f"POINT ({lng} {lat})"
        
    outfile.write(f"{uid},{name},{height},{country},{geometry}\n")

file.close()
outfile.close()