In [30]:
import cartopy
from collections import defaultdict
import glob
import matplotlib.pyplot as plt
from matplotlib.pyplot import cm
import matplotlib as mpl
import numpy as np
import pandas as pd
import os
import sys

from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

sys.path.append("../src")
from installations.plot import plot_gantt, plot_durations
from marinetraffic.vesseltracks import read_vesseltracks_file
from marinetraffic.plot import plot_vesseltracks_cartopy
from metocean.era5 import weather_df_from_era5

plt.style.use('seaborn-v0_8-paper')
print(mpl.rcParams['font.family'])
from matplotlib.font_manager import findfont, FontProperties
font = findfont(FontProperties(family=['sans-serif']))

%matplotlib notebook

installation_data_dir = "../data/windfarms/matching_windfarms//"

['sans-serif']


In [2]:
installations_metadata = pd.read_csv("../data/windfarms/matching_windfarms/matching_windfarms.csv")
windfarm_database = pd.read_excel("../data/windfarms/windfarms-complete_turbines.ods", engine="odf")
windfarm_database.set_index("index", inplace=True)
windfarm_database.sort_index(inplace=True)

In [3]:
windfarm_database.columns

Index(['name', 'latitude', 'longitude', 'n_turbines', 'turbine_power',
       'windfarm_capacity', 'turbine_installation_start',
       'turbine_installation_end', 'turbine_installation_vessel',
       'days_per_foundation_lit', 'days_per_turbine_lit', 'days_set_lit',
       'Source', 'Unnamed: 14', 'Unnamed: 15'],
      dtype='object')

In [4]:
installations = dict()
for infile in sorted(glob.glob(os.path.join(installation_data_dir, "*cluster-*.csv"))):
    installation_index = int(os.path.basename(infile).split("_")[0])
    vessel_name = installations_metadata.loc[installation_index].vessel_name
    windfarm_database_index = installations_metadata.loc[installation_index].known_windfarms_index
    windfarm = os.path.basename(infile).split("_")[1]
    installations[windfarm_database_index] = pd.read_csv(infile)
    installations[windfarm_database_index].begin = pd.to_datetime(installations[windfarm_database_index].begin)
    installations[windfarm_database_index].end = pd.to_datetime(installations[windfarm_database_index].end)
    installations[windfarm_database_index].insert(loc=0, column='vessel', value=[vessel_name]*len(installations[windfarm_database_index]))
    installations[windfarm_database_index].insert(loc=0, column='windfarm', value=[windfarm]*len(installations[windfarm_database_index]))
    installations[windfarm_database_index].sort_values('begin', inplace=True)
    installations[windfarm_database_index].set_index('index', inplace=True)
    installations[windfarm_database_index].reset_index(drop=True, inplace=True)

In [5]:
for i, (key, windfarm) in enumerate(installations.items()):
    #print(i+1, windfarm_database.loc[key]['name'], len(windfarm))
    if len(windfarm) > (nd := windfarm_database.loc[key].n_turbines):
        print(f'oh oh: {len(windfarm)} > {nd} {windfarm_database.loc[key]["name"]}')

oh oh: 83 > 73 Humber Gateway
oh oh: 73 > 72 Sandbank
oh oh: 41 > 23 Northwester 2
oh oh: 42 > 39 EnBW Baltic II (MP)


In [6]:
def extract_farm_stats(installations, windfarm_database):
    for i, (key, windfarm) in enumerate(installations.items()):
        # 'latitude', 'longitude', 'n_turbines', 'turbine_power', 'windfarm_capacity', 'vessel'
        yield [
            windfarm_database.loc[key]['name'],
            int(windfarm_database.loc[key]['n_turbines']),
            windfarm_database.loc[key]['turbine_power'],
            windfarm_database.loc[key]['windfarm_capacity'],
            len(windfarm),
            ' '.join(windfarm.iloc[0].vessel.split('-')[1:]),
        ]
        

In [7]:
pd.DataFrame(
    data = extract_farm_stats(installations, windfarm_database), 
    columns = [
        "Wind Farm",
        "Turbines",
        "Turbine Power (MW)",
        "Wind Farm Capacity (MW)",
        "Observed Installations",
        "Vessel"
    ]).to_latex("../manuscript/windfarms.txt", index=False, float_format="%.2f")

  pd.DataFrame(


In [8]:
all_installations = pd.concat([installation for _, installation in installations.items()])
all_installations.sort_values("begin", inplace=True)
all_installations.reset_index(drop=True, inplace=True)
#all_installations.insert(loc = len(all_installations.columns), column = 'cumsum_duration', value = all_installations.duration.cumsum())
print(f"available turbine installations: {len(all_installations)}")

available turbine installations: 1365


In [47]:
all_weather = defaultdict(dict)

for location in all_installations.itertuples():
    possible_matches = glob.glob(os.path.join("../data/metocean/", f'*_{location.windfarm}_{location.location_key}_*.nc'))
    if possible_matches:
        all_weather[location.windfarm][location.location_key] = weather_df_from_era5(possible_matches[0])
        all_weather[location.windfarm][location.location_key].insert(loc=17, column='abs_wind_100', value = np.sqrt(np.power(all_weather[location.windfarm][location.location_key].u100, 2) + np.power(all_weather[location.windfarm][location.location_key].v100, 2)))

In [51]:
metocean_parameters = all_weather['albatros']['235090598-blue-tern_cluster-11_location-0'].columns

# Material and Methods

## Wind Farm Clustering to Extract Single Turbines

In [31]:
# exemplary data set: brave tern, cluster 10 (horns rev 3)
locations = dict()
for location in sorted(glob.glob(os.path.join("../data/marinetraffic/clustered/229044000-brave-tern/cluster-10/*location-*.csv"))):
    loc_key = location.split("_")[-1].split(".")[0]
    locations[loc_key] = read_vesseltracks_file(location)
    
n_locations = len(locations)
margin = 0.1

min_lat = min([x.latitude.min() for _, x in locations.items()]) - margin
max_lat = max([x.latitude.max() for _, x in locations.items()]) + margin
min_lon = min([x.longitude.min() for _, x in locations.items()]) - margin
max_lon = max([x.longitude.max() for _, x in locations.items()]) + margin

figure = plt.figure(figsize=(16,9))
ax = figure.add_subplot(1,1,1, projection=cartopy.crs.Mercator())
ax.set_extent([min_lon, max_lon, min_lat, max_lat])
ax.add_feature(cartopy.feature.BORDERS)
ax.gridlines(draw_labels=True, dms=True, x_inline=False, y_inline=False)
ax.coastlines(resolution='10m')
#plt.scatter(vessel_tracks['longitude'], vessel_tracks['latitude'], transform=cartopy.crs.PlateCarree())
color = cm.rainbow(np.linspace(0, 1, n_locations))
for c, (_, loc) in zip(color, locations.items()):
    plt.scatter(loc.longitude, loc.latitude, transform=cartopy.crs.PlateCarree())
plt.tight_layout()
plt.savefig("../manuscript/figures/horns-rev-3_turbines.png",dpi=300)
    

<IPython.core.display.Javascript object>

In [32]:
average_installation = all_installations.duration.mean()
median_installation = all_installations.duration.median()
std_installation = all_installations.duration.std()
print(f"average installation time: {average_installation} (median: {median_installation}) +- {std_installation}")

average installation time: 47.33259747659748 (median: 27.049722222222226) +- 58.36211200923583


In [33]:
np.percentile(all_installations.duration, 90)

99.32405555555589

In [34]:
plt.figure(figsize=(9,5))
n, bins, patches = plt.hist(all_installations.duration, bins=[x for x in range(12, 720)])
plt.xlim([10, 200])
plt.xlabel("duration (h)")
plt.ylabel("number of installations")
plt.axvline(x = all_installations.duration.median(), label=f'median = {all_installations.duration.median():1.2f} h', color='k', linestyle='-')
plt.axvline(x = all_installations.duration.mean(), label=f'mean = {all_installations.duration.mean():1.2f} h', color='k', linestyle='--')
plt.axvline(x = np.percentile(all_installations.duration, 90), label=f'90th percentile = {np.percentile(all_installations.duration, 90):1.2f} h', color='k', linestyle='dotted')
plt.axvline()
plt.legend()
plt.tight_layout()
plt.savefig("../manuscript/figures/duration-distribution.png", dpi=300)

<IPython.core.display.Javascript object>

In [35]:
plt.figure(figsize=(9,5))
n, bins, patches = plt.hist(all_installations.duration, bins=[x for x in range(12, 720)], label='installation duration', cumulative=True)
plt.xlim([10, 200])
plt.xlabel("duration (h)")
plt.ylabel("number of installations")
plt.axvline(x = all_installations.duration.median(), label=f'median = {all_installations.duration.median():1.2f} h', color='k', linestyle='-')
plt.axvline(x = all_installations.duration.mean(), label=f'mean = {all_installations.duration.mean():1.2f} h', color='k', linestyle='--')
plt.axvline(x = np.percentile(all_installations.duration, 90), label=f'90th percentile = {np.percentile(all_installations.duration, 90):1.2f} h', color='k', linestyle='dotted')
plt.axvline()
plt.legend()
plt.tight_layout()
plt.savefig("../manuscript/figures/duration-cumulative-distribution.png", dpi=300)

<IPython.core.display.Javascript object>

In [36]:
plt.figure(figsize=(9, 5))
plt.plot(all_installations.sort_values('duration').duration, all_installations.sort_values('duration').duration.cumsum(), label='cumulative installation time')
plt.axvline(x = np.percentile(all_installations.duration, 90), label=f'90th percentile = {np.percentile(all_installations.duration, 90):1.2f} h', color='k', linestyle='dotted')
plt.xlabel("duration (h)")
plt.ylabel("cumulative duration (h)")
plt.legend()
plt.tight_layout()
plt.savefig("../manuscript/figures/cumulative-duration.png", dpi=300)

<IPython.core.display.Javascript object>

In [37]:
plt.figure(figsize=(9, 5))
#p1 = plt.boxplot(
#    [x for x in range(len(installations))], 
#    [x.duration.median() for _, x in installations.items()],
#)
plt.boxplot([x.duration for x in installations.values()], showmeans=True, showfliers=False)
plt.xticks(
    [x + 1 for x in range(len(installations))],
    [ f"{windfarm_database.loc[i]['name']} ({len(y)} WTG)" for i, y in installations.items()],
    rotation=90
)
ax = plt.gca()
#ax.bar_label(p1, [ f"{windfarm_database.loc[i].turbine_power:1.1f}" for i, x in installations.items()], label_type='edge')
plt.ylabel("median installation duration (h)")
plt.tight_layout()
plt.savefig("../manuscript/figures/installations-overview.png", dpi=300)

<IPython.core.display.Javascript object>

In [38]:
plt.figure(figsize=(9,5))
for i, installation in installations.items():
    power = windfarm_database.loc[i].turbine_power
    plt.scatter(power, installation.duration.median(), color='#30a2da', s=75)
plt.xlabel("turbine rated power (MW)")
plt.ylabel("median installation duration (h)")
plt.ylim([0, 60])
plt.tight_layout()
plt.savefig("../manuscript/figures/durations-rated-power.png", dpi=300)

<IPython.core.display.Javascript object>

## waves
### max(Hmax)

In [40]:
plt.figure(figsize=(9, 5))
for location in all_installations.itertuples():
    if location.windfarm in all_weather:
        if location.location_key in all_weather[location.windfarm]:
            plt.scatter(all_weather[location.windfarm][location.location_key].hmax.max(), location.duration, color='#30a2da', s=75, alpha=0.5)
plt.xlabel('max. wave height (m)')
plt.ylabel('duration (h)')
plt.tight_layout()
plt.savefig("../manuscript/figures/duration-max-wave-height.png", dpi=300)

<IPython.core.display.Javascript object>

### max(wind speed)

In [45]:
plt.figure(figsize=(9,5))
for location in all_installations.itertuples():
    if location.windfarm in all_weather:
        if location.location_key in all_weather[location.windfarm]:
            plt.scatter(all_weather[location.windfarm][location.location_key].abs_wind_100.max(), location.duration, color='#30a2da', s=75, alpha=0.5)
plt.xlabel('max. wind speed @ 100 m (m/s)')
plt.ylabel('duration (h)')
plt.tight_layout()
plt.savefig("../manuscript/figures/duration-max-wind-speed.png", dpi=300)

<IPython.core.display.Javascript object>

## all metocean parameters, mean

In [55]:
for param in metocean_parameters:
    plt.figure(figsize=(9,5))
    for location in all_installations.itertuples():
        if location.windfarm in all_weather:
            if location.location_key in all_weather[location.windfarm]:
                plt.scatter(all_weather[location.windfarm][location.location_key][param].mean(), location.duration, color='#30a2da', s=75, alpha=0.5)
    plt.title(f"mean({param})")
    plt.xlabel(f"mean({param})")
    plt.ylabel('duration (h)')
    plt.tight_layout()
    plt.savefig(f"../manuscript/figures/duration-{param}-mean.png", dpi=300)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [54]:
for param in metocean_parameters:
    plt.figure(figsize=(9,5))
    for location in all_installations.itertuples():
        if location.windfarm in all_weather:
            if location.location_key in all_weather[location.windfarm]:
                plt.scatter(all_weather[location.windfarm][location.location_key][param].max(), location.duration, color='#30a2da', s=75, alpha=0.5)
    plt.title(param)
    plt.xlabel(param)
    plt.ylabel('duration (h)')
    plt.tight_layout()
    plt.savefig(f"../manuscript/figures/duration-{param}-max.png", dpi=300)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [46]:
for key, windfarm in installations.items():
    plot_gantt(windfarm, title=windfarm_database.loc[key]['name'], save_fig=f"../manuscript/figures/gantt/{windfarm_database.loc[key]['name'].replace('/', '-')}.png", figsize=(9, 5))

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>