In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from sklearn.metrics import mean_squared_error as mse
from sklearn.linear_model import LinearRegression as OLS
from sklearn.preprocessing import StandardScaler
import os
import multiprocessing
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

In [None]:
# read data
df = pd.read_csv('/kaggle/input/electric-motor-temperature/measures_v2.csv')
df.drop('torque', axis=1, inplace=True)
target_features = ['pm', 'stator_tooth', 'stator_yoke', 'stator_winding']
PROFILE_ID_COL = 'profile_id'

df.head(10)

In [None]:
extra_feats = {
     'i_s': lambda x: np.sqrt(x['i_d']**2 + x['i_q']**2),  # Current vector norm
     'u_s': lambda x: np.sqrt(x['u_d']**2 + x['u_q']**2),  # Voltage vector norm
     #'S_el': lambda x: x['i_s']*x['u_s'],                  # Apparent power
     #'P_el': lambda x: x['i_d'] * x['u_d'] + x['i_q'] *x['u_q'],  # Effective power
     #'i_s_x_w': lambda x: x['i_s']*x['motor_speed'],
     #'S_x_w': lambda x: x['S_el']*x['motor_speed'],
}
df = df.assign(**extra_feats)
x_cols = [x for x in df.columns.tolist() if x not in target_features + [PROFILE_ID_COL]]

In [None]:
spans = [6360, 3360, 1320, 9480]  # these values correspond to cutoff-frequencies in terms of low pass filters, or half-life in terms of EWMAs, respectively

def dig_into_rolling_features(_df):
    """_df corresponds to a unique measurement session"""

    # get max lookback
    max_lookback = max(spans)
    # prepad default values until max lookback in order to get unbiased
    # rolling lookback feature during first observations
    dummy = pd.DataFrame(np.zeros((max_lookback, len(_df.columns))),
                         columns=_df.columns)

    temperature_cols = [c for c in ['ambient', 'coolant'] if c in _df]
    dummy.loc[:, temperature_cols] = _df.loc[0, temperature_cols].values

    # prepad
    _df = pd.concat([dummy, _df], axis=0, ignore_index=True)

    ew_mean = [_df.ewm(span=lb).mean()
                   .rename(columns=lambda c: c+'_ewma_'+str(lb))
               for lb in spans]
    ew_std = pd.concat(
        [_df.ewm(span=lb).std().fillna(0).astype(np.float32)
             .rename(columns=lambda c: c+'_ewms_'+str(lb))
         for lb in spans], axis=1)

    concat_l = [pd.concat(ew_mean, axis=1).astype(np.float32),
                ew_std,
                ]
    ret = pd.concat(concat_l, axis=1).iloc[max_lookback:, :]\
        .reset_index(drop=True)
    return ret

In [None]:
# smooth input temperatures (mitigate artifacts)
cols_to_smooth = ['ambient', 'coolant']
smoothing_window = 100
orig_x = df.loc[:, cols_to_smooth]
x_smoothed = [x.rolling(smoothing_window,
                        center=True).mean() for p_id, x in
              df[cols_to_smooth + [PROFILE_ID_COL]]
                  .groupby(PROFILE_ID_COL, sort=False)]
df.loc[:, cols_to_smooth] = pd.concat(x_smoothed).fillna(orig_x)

p_df_list = [meas.drop(PROFILE_ID_COL, axis=1).reset_index(drop=True)
             for _, meas in df[x_cols + [PROFILE_ID_COL]].groupby([PROFILE_ID_COL], sort=False)]
# add EWMA and EWMS
df = pd.concat([df, 
                pd.concat([dig_into_rolling_features(p) for p in p_df_list], ignore_index=True)],
               axis=1).dropna().reset_index(drop=True)

x_cols = [x for x in df.columns.tolist() if x not in target_features + [PROFILE_ID_COL]]
y_cols = target_features

### helper functions and classes
A small collection of formatting and visualization helper functions.
Please feel free to reuse for easier comparability.

In [None]:
from sklearn.metrics import mean_squared_error as mse, mean_squared_log_error\
    as msle, mean_absolute_error as mae, r2_score
from matplotlib.colors import rgb2hex


def print_scores(y_true, y_pred):
    if hasattr(y_true, 'values'):
        y_true = y_true.values
    if hasattr(y_pred, 'values'):
        y_pred = y_pred.values
    print(f'MSE: {mse(y_true, y_pred):.6} K²')
    print(f'MAE: {mae(y_true, y_pred):.6} K')
    print(f'MaxAbsDev: {np.max(np.abs(y_pred - y_true)):.6} K')
    print(f'R2 : {r2_score(y_true, y_pred):.6}')

class Report:
    """Summary of an experiment/trial"""

    param_map = {'pm': '{PM}',
                 'stator_tooth': '{ST}',
                 'stator_yoke': '{SY}',
                 'stator_winding': '{SW}',
                 'motor_speed': 'motor speed',
                 'ambient': 'ambient temperature',
                 'coolant': 'coolant temperature'}
    output_param_map = {'pm': 'magnet temperature',
                        'stator_tooth': 'stator tooth temperature',
                        'stator_yoke': 'stator yoke temperature',
                        'stator_winding': 'stator winding temperature'}

    def __init__(self, uid, yhat=None, actual=None, history=None,
                 used_loss=None, model=None,):

       
        self.yhat_te = yhat
        self.actual = actual
        self.history = history
        self.uid = uid
        self.yhat_tr = None
        self.start_time = datetime.now().strftime("%Y-%m-%d %H:%M")
        self.used_loss = used_loss
        self.model = model

        clr_sets =\
            {'dark_background': {k: v for k, v in zip(
                ['turquoise', 'yellow', 'violet',
                 'red', 'blue', 'orange', 'green'] +
                 ['other_{}'.format(i) for i in range(5)],
                 [rgb2hex(c[:3]) for c in plt.cm.Set3(np.linspace(0, 1, 12))])}
             }
        self.clrs = clr_sets['dark_background']
        
    def plot(self, show=True, with_input=False):
        plt.style.use('dark_background')

        self.plot_history()
        self.plot_compact_testset_error(with_input)
        self.plot_residual_over_y_range()

        if show:
            plt.show() 
    
    def plot_history(self):
        if self.history is not None:
            history = self.history.history
            plt.figure(figsize=(6, 4))
            plt.plot(history['loss'], label='train loss')
            plt.plot(history['val_loss'], label='validation loss')
            plt.xlabel('epoch')
            plt.ylabel(f'{self.used_loss} in K²')
            plt.title(f'Training/Validation Score over Epochs of Experiment '
                      f'{self.uid}')
            plt.semilogy()
            plt.legend()
    
    def plot_compact_testset_error(self, with_input=True):
        n_targets = len(self.actual.columns)

        rows = 2
        tst_df = None
        input_cols = ['motor_speed', 'torque', 'ambient', 'coolant']

        plot_length = 2 * rows
        fig, axes = plt.subplots(rows, n_targets, sharex=True, sharey='row',
                                 figsize=(12, plot_length))

        for i, c in enumerate(self.actual):
            # plot signal measured and estimated
            # todo: Having only 1 target will break here
            #  axes is 1d then
            ax = axes[0, i]
            diff = self.yhat_te[c] - self.actual[c]
            ax.set_title(r'$\vartheta_{}$'.format(self.param_map[c]),
                         fontdict=dict(fontsize=12))
            ax.plot(self.actual[c], color=self.clrs['turquoise'],
                    label='ground truth',
                    linestyle='-')
            ax.plot(self.yhat_te[c], color=self.clrs['orange'],
                    label='prediction',
                    linestyle='-')
            ax.set_xlim(-1000, np.around(len(self.actual), -3) + 300)
            tcks = np.arange(0, np.around(len(self.actual), -3), 7200)
            tcks_lbls = tcks // 7200
            if i == 0:
                ax.set_ylabel('Measured and\nestimated\ntemp. °C')
                ax.legend(ncol=1, loc='lower left')
            ax.set_xticks(tcks)
            ax.set_xticklabels(tcks_lbls)
            ax.text(0.5, 0.95,
                    s=f'MSE: {(diff ** 2).mean():.2f} (°C)²',
                    #bbox={'facecolor': 'white',
                    #      'edgecolor': 'black'},
                    transform=ax.transAxes,
                    verticalalignment='top', horizontalalignment='center')
            ax.grid(alpha=0.5)
            # plot signal estimation error
            ax = axes[1, i]
            ax.plot(diff, color=self.clrs['red'],
                    label='Temperature Estimation error ' +
                          r'$\vartheta_{}$'.format(self.param_map[c]))
            if i == 0:
                ax.set_ylabel('Temperature\nestimation\nerror °C')
            ax.text(0.5, 0.95,
                    #bbox={'facecolor': 'white', 'edgecolor': 'black'},
                    transform=ax.transAxes,
                    s=r'$||e||_\infty$: ' + f'{diff.abs().max():.2f} °C',
                    verticalalignment='top', horizontalalignment='center')
            ax.grid(alpha=0.5)
            if not with_input:
                ax.set_xlabel('Time in hours')
        
        fig.tight_layout()
        
    def plot_residual_over_y_range(self):
        n_targets = len(self.actual.columns)
        rows = 1
        plot_length = 3 * rows
        fig, axes = plt.subplots(rows, n_targets, sharex=True, sharey='row',
                                 figsize=(n_targets*12/4, plot_length))
        for i, (c, ax) in enumerate(zip(self.actual, axes.flatten())):
            # plot signal measured and estimated
            residuals = \
                (pd.DataFrame({c + '_true': self.actual[c],
                               c + '_pred': self.yhat_te[c]})
                 .sort_values(c + '_true')
                 )
            ax.scatter(residuals[f'{c}_true'],
                        residuals[f'{c}_pred'] - residuals[f'{c}_true'],
                        s=1, label=c, color=self.clrs['red'])
            ax.axhline(color='white', ls='--')
            ax.set_xlabel(r'$\vartheta_{}$'.format(self.param_map[c]) +
                       ' ground truth °C')
            ax.set_title(r'$\vartheta_{0}$ prediction'.format(self.param_map[c]))
            if i == 0:
                ax.set_ylabel('prediction error °C')
            ax.grid(alpha=0.5)

        fig.tight_layout()
    
    def print(self):
        print('')
        print('#' * 5 + ' Trial Report ' + '#'*5)
        print(f"Trial ID: {self.uid}")
        print_scores(self.actual, self.yhat_te)
        print('#' * 20)

### Model training

We present the most basic statistical model, the ordinary least squares (OLS) approach.

In [None]:
test_set_profiles = [65, 72]
trainset = df.loc[~df.profile_id.isin(test_set_profiles), :].reset_index(drop=True)
testset = df.loc[df.profile_id.isin(test_set_profiles), :].reset_index(drop=True)

x_train = trainset.loc[:, x_cols]
y_train = trainset.loc[:, target_features]
x_test = testset.loc[:, x_cols]
y_test = testset.loc[:, target_features]

# standardize
scaler = StandardScaler()
y_scaler = StandardScaler()
x_train = pd.DataFrame(scaler.fit_transform(x_train), columns=x_cols)
x_test = pd.DataFrame(scaler.transform(x_test), columns=x_cols)
y_train = pd.DataFrame(y_scaler.fit_transform(y_train), columns=y_cols)

ols = OLS(fit_intercept=False)
print('Start fitting OLS...')
ols.fit(x_train, y_train)
print('Predict with OLS...')
pred = ols.predict(x_test)
pred = pd.DataFrame(y_scaler.inverse_transform(pred), columns=y_test.columns)

In [None]:
# The Report class can be used to have a quick performance overview
report = Report('OLS', pred, y_test)
report.plot()
report.print()