Reproduce figures and tables from paper:


J. Exenberger, M. Di Salvo, T. Hirsch, F. Wotawa, G. Schweiger. *Generalizable Temperature Nowcasting with Physics-Constrained RNNs for Predictive Maintenance of Wind Turbine Components*, ICLR Tackling Climate Change with Machine Learning Workshop 2024.

In [None]:
import numpy as np
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
rdir = Path("paper_results")

colors = {
    'PCRNN': 'coral',
    'RNN': 'skyblue',
    'Linear': 'plum'
}

plant_names = {
    'PlantA': 'Plant A',
    'PlantB': 'Plant B',
    'PlantC': 'Plant C'
}

Test set results

In [None]:
# Plot test set RMSE (average RMSE over iterations)

models = ['PCRNN', 'RNN', 'Linear']
plants = ["PlantA", "PlantB", "PlantC"]
n_turbines = [1, 3, 6, 9] 

ticklabel_fs = 9
axlabel_fs = 9
title_fs = 9
legend_fs = 8

fig, ax = plt.subplots(1, len(plants), figsize=(8, 2.5), sharey=True)
for i, plant in enumerate(plants):
    result_dir = Path(rdir, plant)
    df_iter = []
    for p in result_dir.iterdir():
        df = pd.read_csv(Path(p, "results_test_set.csv"))
        n_train = int(p.stem.split('_')[1])
        if n_train in n_turbines:
            df['n_train_turbines'] = n_train
            df_iter.append(df)
    df_iter = pd.concat(df_iter)
    
    df_iter['rmse'] = np.sqrt(df_iter['mse'])
    sns.lineplot(data=df_iter[df_iter['model_name'].isin(models)], x="n_train_turbines", y='rmse', hue='model_name', ax=ax[i], palette=colors, errorbar='se', linewidth=.8)
    df_mean = df_iter.groupby(['model_name', 'n_train_turbines'])['rmse'].mean().reset_index()
    sns.scatterplot(data=df_mean[df_mean['model_name'].isin(models)], x="n_train_turbines", y='rmse', hue='model_name', ax=ax[i], legend=False, palette=colors, s=10, edgecolor=None)
    
    ax[i].set_title(plant_names[plant], fontsize=title_fs)
    ax[i].set_xticks([1,3,6,9])
    ax[i].set_xticklabels([1,3,6,9])
    ax[i].set_xlabel('N train turbines', fontsize=axlabel_fs)
    ax[i].tick_params(axis='both', width=.5, labelsize=ticklabel_fs)
    ax[i].set_ylim(0.5, 1.2)

    for loc in ax[i].spines.keys():
        ax[i].spines[loc].set_linewidth(.5)
for i, a in enumerate(ax):
    handles, labels = a.get_legend_handles_labels()
    new_labels = models
    a.legend(handles=handles, labels=new_labels, ncols=3, fontsize=legend_fs, columnspacing=.9, handlelength=1, loc='lower right', framealpha=0.4)

ax[0].set_ylabel("RMSE", fontsize=axlabel_fs)
fig.tight_layout(pad=0.1)
plt.show()

In [None]:
# Generalization RMSE (average RMSE over iterations)

ticklabel_fs = 9
axlabel_fs = 9
title_fs = 9
legend_fs = 8

models = ['PCRNN', 'RNN', 'Linear']
plants = ["PlantA", "PlantB", "PlantC"]
test_plants = ["PlantA", "PlantB", "PlantC"]
n_turbines = [6]
for nturb in n_turbines:
    fig, ax = plt.subplots(1, len(plants), figsize=(8, 3), sharey=True, sharex=True)
    for i, plant in enumerate(plants):
        
        result_dir = Path(rdir, plant)
        df_iter = []
        for p in result_dir.iterdir():
            df = pd.read_csv(Path(p, "results_generalization.csv"))
            n_train = int(p.stem.split('_')[1])
            if n_train in n_turbines:
                df['n_train_turbines'] = n_train
                df_iter.append(df)
        df_iter = pd.concat(df_iter)

        # df = pd.concat([pd.read_csv(f) for f in Path(rdir, plant, f"{plant}_{nturb}").rglob("generalization_mean_iter_gen_stats.csv")], ignore_index=True)
        df = df_iter
        df['rmse'] = np.sqrt(df['mse'])

        # df = pd.concat([pd.read_csv(f) for f in Path(rdir, plant, f"{plant}_{nturb}").rglob("generalization_mean_iter_gen_stats.csv")], ignore_index=True)
        # df = df[df['test_plant'].isin(test_plants)]

        flierprops = dict(marker='o', markerfacecolor='None', markersize=3,  markeredgecolor='black', markeredgewidth=0.5)
        sns.boxplot(data=df[df['model_name'].isin(models)], x='test_plant', y='rmse', hue='model_name', ax=ax[i], 
                    palette=colors, flierprops=flierprops, width=0.6, linewidth=0.5, gap=.05)
        ax[i].set_xlabel(None)
        ax[i].set_title(plant_names[plant], fontsize=title_fs)
        if i == 0:
            ax[i].set_ylabel("RMSE", fontsize=axlabel_fs)
        else:
            ax[i].set_ylabel(None)
        ax[i].tick_params(axis='both', width=.5, labelsize=ticklabel_fs)
        for loc in ax[i].spines.keys():
            ax[i].spines[loc].set_linewidth(.5)
        
        ax[i].axvspan(i-0.5, i+0.5 , color='lightgrey', alpha=0.3, edgecolor=None,zorder=0)
    for i, a in enumerate(ax):
            handles, labels = a.get_legend_handles_labels()
            a.legend(handles=handles, labels=labels, ncols=1, fontsize=legend_fs, columnspacing=.9, loc='upper right', framealpha=0.4)

    ax[0].set_ylabel("RMSE", fontsize=axlabel_fs)
    fig.tight_layout(pad=0.2)
    plt.show()

In [None]:
# Generalization RMSE (average over iterations)

models = ['PCRNN', 'RNN', 'Linear']
train_plants = ["PlantA", "PlantB", "PlantC"]
test_plants = ["PlantA", "PlantB", "PlantC"]
n_train_turbines = [1, 3, 6, 9]

ticklabel_fs = 9
axlabel_fs = 9
title_fs = 9
legend_fs = 8
for plant in train_plants:
    print(plant)
    result_dir = Path(rdir, plant)
    dfs = []
    for p in result_dir.iterdir():
        d = pd.read_csv(Path(p, "results_generalization.csv"))
        n_train = int(p.stem.split('_')[1])
        if n_train in n_train_turbines:
            d['n_train_turbines'] = n_train
            dfs.append(d)
    df = pd.concat(dfs)
    df['rmse'] = np.sqrt(df['mse'])

    fig, ax = plt.subplots(1, len(train_plants), figsize=(8, 2.5), sharey=True)

    for i, tp in enumerate(test_plants):
        df_iter = df[(df['model_name'].isin(models)) & (df['test_plant'] == tp) & (df['n_train_turbines'].isin(n_train_turbines))]
        sns.lineplot(data=df_iter[df_iter['model_name'].isin(models)], x="n_train_turbines", y='rmse', hue='model_name', ax=ax[i], palette=colors, errorbar='se', linewidth=.8)
        df_mean = df_iter.groupby(['model_name', 'n_train_turbines'])['rmse'].mean().reset_index()
        sns.scatterplot(data=df_mean[df_mean['model_name'].isin(models)], x="n_train_turbines", y='rmse', hue='model_name', ax=ax[i], legend=False, palette=colors, s=10, edgecolor=None)
        
        ax[i].set_title(plant_names[tp], fontsize=title_fs)
        ax[i].set_xticks([1,3,6,9])
        ax[i].set_xticklabels([1,3,6,9])
        ax[i].set_xlabel('N train turbines', fontsize=axlabel_fs)
        ax[i].tick_params(axis='both', width=.5, labelsize=ticklabel_fs)

        for loc in ax[i].spines.keys():
            ax[i].spines[loc].set_linewidth(.5)
    for i, a in enumerate(ax):
        handles, labels = a.get_legend_handles_labels()
        a.legend(handles=handles, labels=labels, ncols=3, fontsize=legend_fs, columnspacing=.9, handlelength=1, framealpha=0.4)

    ax[0].set_ylabel("RMSE", fontsize=axlabel_fs)
    fig.tight_layout(pad=0.1)
    plt.show()

Latex Tables

In [None]:
n_turbines = [1, 3, 6, 9]
for n in n_turbines:
    df_iter = []
    for result_dir in Path(rdir).iterdir():
        plant = result_dir.stem
        for p in result_dir.iterdir():
            n_train = int(p.stem.split('_')[1])
            df = pd.read_csv(Path(p, "results_test_set.csv"))
            df['n_train_turbines'] = n_train
            df['test_plant'] = plant
            df_iter.append(df)
    x = pd.concat(df_iter, ignore_index=True)
    x['rmse'] = np.sqrt(x['mse'])
    # x = pd.concat([pd.read_csv(f) for f in Path(rdir).rglob('test_set_results_mean_iter.csv')], ignore_index=True)
    x = x[x['n_train_turbines'] == n]
    x = x[x['model_name'].isin(['PCRNN', 'RNN', 'Linear'])]
    x = x.groupby(['test_plant', 'model_name', 'n_train_turbines'])['rmse'].agg(['mean', 'sem']).reset_index()
    x = x.pivot(index=['n_train_turbines', 'model_name'], columns=['test_plant'], values=['mean', 'sem'])
    #x = x.sort_index(axis='columns', level=(0,1))
    #x.columns.names = [None, 'Plant', 'N train devices']
    x.index.names = ['N train', 'Model']
    x.columns.names = [None, None]
    x['mean'] = x['mean'].round(3)
    x['sem'] = x['sem'].round(3)
    x_new = x.copy(deep=True)
    x_new = x_new.astype(str)
    for c in x['mean'].columns:
        x_new.loc[:, ('mean', c)] = x['mean'][c].astype(str) + r' ($\pm$ ' + x['sem'][c].astype(str) + ')'
    sortkeys = {'Linear': 0, 'RNN': 1, 'PCRNN': 2}
    x_new = x_new.sort_values('Model', key=lambda x: x.map(sortkeys))
    s = x_new['mean'].style.highlight_min(axis=0, props='textbf:--rwrap')
    #s.to_latex(f'tables/tab_test_results_{n}.tex', multicol_align='c')
    display(x_new['mean'].style.highlight_min(axis=0, props='font-weight:bold;'))

In [None]:
# Generalization results

plants = ['PlantA', 'PlantB', 'PlantC']
n_turbines = [1, 3, 6, 9]

for n in n_turbines:
    df_iter = []
    for tp in plants:
        for p in Path(rdir, tp).iterdir():
            n_train = int(p.stem.split('_')[1])
            df = pd.read_csv(Path(p, "results_generalization.csv"))
            df['n_train_turbines'] = n_train
            df['train_plant'] = tp
            df_iter.append(df)
    x = pd.concat(df_iter, ignore_index=True)
    x['rmse'] = np.sqrt(x['mse'])

    x = x[x['n_train_turbines'] == n]
    x = x.groupby(['train_plant', 'test_plant', 'model_name', 'n_train_turbines'])['rmse'].agg(['mean', 'sem']).reset_index()
    # x = x[x['train_plant'].isin(tplant)]
    x = x[x['model_name'].isin(['Linear', 'RNN', 'PCRNN'])]
    x = x.pivot(index=['n_train_turbines', 'train_plant', 'model_name'], columns=['test_plant'], values=['mean', 'sem'])
    x = x.sort_index(axis='columns', level=(0,1))
    x.index.names = ['N train', 'Train plant', 'Model']
    x.columns.names = [None, 'Test plant']
    x['mean'] = x['mean'].round(3)
    x['sem'] = x['sem'].round(3)
    x_new = x.copy(deep=True)
    x_new = x_new.astype(str)
    for c in x['mean'].columns:
        x_new.loc[:, ('mean', c)] = x['mean'][c].astype(str) + r' ($\pm$ ' + x['sem'][c].astype(str) + ')'
    for tp in plants:
        xx = x_new['mean'].loc[(n, tp, pd.IndexSlice[:]), :]
        sortkeys = {'Linear': 0, 'RNN': 1, 'PCRNN': 2}
        xx = xx.sort_values('Model', key=lambda x: x.map(sortkeys))
        s = xx.style.highlight_min(axis=0, props='textbf:--rwrap')
        # s.to_latex(f"tables/tab_gen_results_{''.join([plant_names[p] for p in tplant])}_{n}.tex", multicol_align='c')
        display(xx.style.highlight_min(axis=0, props='font-weight:bold;'))

In [None]:
# Generalization results

plants = ['PlantA', 'PlantB', 'PlantC']
n_turbines = [1, 3, 6, 9]

dfs = []
for plant in plants:
    for p in Path(rdir, plant).iterdir():
        n_train = int(p.stem.split('_')[1])
        df = pd.read_csv(Path(p, "results_generalization.csv"))
        df['n_train_turbines'] = n_train
        df['train_plant'] = plant
        dfs.append(df)
x = pd.concat(dfs, ignore_index=True)
x['rmse'] = np.sqrt(x['mse'])
x = x.groupby(['train_plant', 'test_plant', 'model_name', 'n_train_turbines'])['rmse'].agg(['mean', 'sem']).reset_index()
# x = x[x['train_plant'] == plant]
x = x.pivot(index=['n_train_turbines', 'model_name'], columns=['train_plant', 'test_plant'], values=['mean', 'sem'])
x = x.sort_index(axis='columns', level=(0,1))
x.index.names = ['N train', 'Model']
x.columns.names = [None, 'Train plant', 'Test plant']
x['mean'] = x['mean'].round(3)
x['sem'] = x['sem'].round(3)
x_new = x.copy(deep=True)
x_new = x_new.astype(str)
for c in x['mean'].columns:
    x_new.loc[:, ('mean', *c)] = x['mean'][c].astype(str) + r' ($\pm$ ' + x['sem'][c].astype(str) + ')'
for n in n_turbines:
    xx = x['mean'].loc[(n, pd.IndexSlice[:]), :]
    s = xx.style.highlight_min(axis=0, props='textbf:--rwrap')
    # s.to_latex(f"tables/tab_gen_results_{''.join([plant_names[p] for p in tplant])}_{n}.tex", multicol_align='c')
    display(xx.style.highlight_min(axis=0, props='font-weight:bold;'))