In [2]:
import os
import pandas as pd

## read data of Windkraftanlagen

In [3]:
Windanlagen = "../data/Windkraftanlagen/_Onshore_Windkraftanlagen_in_Deutschland.csv"
df = pd.read_csv(Windanlagen)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5208 entries, 0 to 5207
Data columns (total 15 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   X                         5194 non-null   float64
 1   Y                         5194 non-null   float64
 2   FID                       5208 non-null   int64  
 3   Bundesland                5208 non-null   object 
 4   Name                      5208 non-null   object 
 5   Baujahr                   4668 non-null   object 
 6   Gesamtleistung__MW_       2634 non-null   object 
 7   Anzahl                    2637 non-null   object 
 8   Typ__WKA_                 4569 non-null   object 
 9   Ort                       3248 non-null   object 
 10  Landkreis                 2683 non-null   object 
 11  Breitengrad               5194 non-null   float64
 12  Längengrad                5194 non-null   float64
 13  Projektierer___Betreiber  2210 non-null   object 
 14  Bemerkun

## Prepare Data

In [4]:
#drop windparks without X and Y coordinates
df.dropna(axis=0, subset=["X", "Y"], inplace=True)

unusefulCols = ["Typ__WKA_", "Projektierer___Betreiber", "Bemerkungen", "Breitengrad", "Längengrad", "Landkreis", "Ort"]
df.drop(columns=unusefulCols, inplace=True)

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 5194 entries, 0 to 5193
Data columns (total 8 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   X                    5194 non-null   float64
 1   Y                    5194 non-null   float64
 2   FID                  5194 non-null   int64  
 3   Bundesland           5194 non-null   object 
 4   Name                 5194 non-null   object 
 5   Baujahr              4654 non-null   object 
 6   Gesamtleistung__MW_  2621 non-null   object 
 7   Anzahl               2624 non-null   object 
dtypes: float64(2), int64(1), object(5)
memory usage: 365.2+ KB


In [6]:
#convert Anzahl, Baujahr and Gesamtleistung to floats
for exp in ["unklar", "9[36]", "5[51]"]:
    df.Anzahl.replace({exp:None}, inplace=True)
for key, value in {"2013–2014[33]":2014, "2015–2017":2017, "1990–1991":1991, "2017–2018":2018, "1994–1995":1995, "2015/2016":2016, "1997-2001":2001, "2013/2017–2018":2018, "2002 2005":2005, "2005-2006":2006, "2005-2007":2007, "2004-2005":2005, "2016–2017":2017, "2002–2003":2003, "2008-2010":2010, "2015–2016":2016,"2012–2013":2013, "2004–2005":2005, "2010–2011":2011, "2007–2008":2008, "2017-2018":2018, "1999–2001":2001, "2003–2004":2004, "1997–1998":1998, "2001–2002":2002, "1996–1997":1997, "2011–2012":2012, "1999–2000":2000, "1998–1999":1999, "2010–2016":2016, "2008–2009":2009, "2014–2015":2015, "2005–2006": 2006, "2009–2010":2010, "2006–2007":2007, "2000–2001":2001, "2013–2014":2014}.items():
    df.Baujahr.replace({key:value}, inplace=True)

df.Gesamtleistung__MW_ = df.Gesamtleistung__MW_.str.replace(',','.')
for key, value in {"35,45":80, "unklar":None, "27.0[34][35]":27}.items():
    df.Gesamtleistung__MW_.replace({key:value}, inplace=True)  

df = df.astype({"Anzahl":float, "Baujahr":float, "Gesamtleistung__MW_":float})

In [8]:
df.head()

Unnamed: 0,X,Y,FID,Bundesland,Name,Baujahr,Gesamtleistung__MW_,Anzahl
0,8.961806,54.895278,1,Schleswig-Holstein,Windpark Ellhöft-Westre,2000.0,35.45,13.0
1,8.961806,54.895278,2,Schleswig-Holstein,Windpark Ellhöft-Westre,2007.0,,
2,8.961806,54.895278,3,Schleswig-Holstein,Windpark Ellhöft-Westre,2009.0,,
3,8.641111,54.858611,4,Schleswig-Holstein,Windpark Friedrich-Wilhelm-Lübke-Koog[10],1990.0,71.5,34.0
4,8.641111,54.858611,5,Schleswig-Holstein,Windpark Friedrich-Wilhelm-Lübke-Koog[10],1998.0,,


## Remove rows with duplicate locations values

In [9]:
#create string with X and Y coordinates
df["loc"]= df.Y.astype(str)+ ", "+ df.X.astype(str)

In [10]:
#create new data frame with the combined values for each location
ndf = None
for x in df["loc"].unique():
    #print(x,y)
    df1 = df[(df["loc"]==x)]
    #print(len(df1))
    if len(df1) > 1:
        #combine the values
        anzahl = df1.Anzahl.sum()
        leistung = df1.Gesamtleistung__MW_.sum()
        baujahr = max(df1.Baujahr.to_list())
        
        #remove duplicates and set new values
        df1 = df1.drop_duplicates(subset=["loc"])
        df1["Baujahr"] = baujahr
        df1.Gesamtleistung__MW_ = leistung
        df1.Anzahl = anzahl
        
        #add new entry to ndf
        if ndf is None:
            ndf = df1
        else:
            ndf = ndf.append(df1)
    else:
        if ndf is None:
            ndf = df1
        else:
            ndf = ndf.append(df1)

In [11]:
del df, df1
ndf = ndf.reset_index()
ndf.to_csv("../prep_data/Windkraftanlagen.csv", index=False)

In [12]:
a = [x.split(", ") for x in ndf["loc"].to_list()]
b= []
for x,y in a:
    b.append([float(x), float(y)])
WinkraftPoints = [tuple(x) for x in b]

In [13]:
WinkraftPoints

[(54.895277778, 8.96180555600006),
 (54.8586111110001, 8.64111111100004),
 (54.823611111, 8.88194444400006),
 (54.809888889, 9.53294444400007),
 (54.8019722220001, 9.34977777800003),
 (54.7718055560001, 9.45633333300003),
 (54.765833333, 8.75944444400005),
 (54.763333333, 9.68094444400003),
 (54.7616944440001, 9.81),
 (54.7461111110001, 8.56861111100005),
 (54.736222222, 9.87347222200003),
 (54.720833333, 9.29305555600007),
 (54.6699166670001, 9.87630555600003),
 (54.667, 9.84130555600007),
 (54.6630555560001, 13.336388889),
 (54.6525, 13.2855555560001),
 (54.648611111, 9.17638888900007),
 (54.6315277780001, 9.99944444400006),
 (54.6116666670001, 9.14166666700004),
 (54.6111111110001, 8.90361111100003),
 (54.594805556, 13.286111111),
 (54.52475, 11.131),
 (54.511111111, 8.89611111100004),
 (54.5106388890001, 11.0731111110001),
 (54.510277778, 11.0567222220001),
 (54.499638889, 11.178222222),
 (54.497944444, 11.125138889),
 (54.4958333330001, 9.17277777800007),
 (54.4841666670001, 11.22