In [14]:
import pyproj
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm

WGS84 = "epsg:4326"
LV95 = "epsg:2056"


transformer = pyproj.Transformer.from_crs(LV95, WGS84)

# https://de.wikipedia.org/wiki/Geographische_Extrempunkte_der_Schweiz

LAT_MIN_WGS84 = 45.81796
LAT_MAX_WGS84 = 47.80845
LON_MIN_WGS84 = 5.95590
LON_MAX_WGS84 = 10.49219


In [15]:
%matplotlib widget

pv_data = pd.read_csv("../data/pv_data_merged.csv")


In [16]:
def swapped(row) -> bool:
    lat, lon = transformer.transform(row._x, row._y)
    if lat < LAT_MIN_WGS84 or lon > LON_MAX_WGS84 or lat > LAT_MAX_WGS84 or lon < LON_MIN_WGS84:
        return True
    return False

pv_data["swapped"] = pv_data.apply(swapped, axis=1)


In [19]:
pv_data.loc[pv_data["swapped"], ['_x', '_y']] = pv_data.loc[pv_data["swapped"], ['_y', '_x']].values

In [24]:
pv_data["lat"], pv_data["lon"] = transformer.transform(pv_data["_x"], pv_data["_y"])

In [30]:
pv_data["zipcode"] = pv_data["PostCode"]
pv_data["plant_id"] = pv_data["xtf_id"]
pv_data["power_kw"] = float("nan")
pv_data["id"] = pv_data["Unnamed: 0"]

In [31]:
data_clean = pv_data[["id", "plant_id", "zipcode", "lat", "lon", "power_kw"]]
data_clean.to_pickle("../data/pv_data_clean.pkl")