In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.stats as stats
from sklearn import preprocessing

sns.set()

In [3]:
# load datasets

train_df = pd.read_csv('/content/drive/MyDrive/Thesis/TEP_FaultFree_Training.csv').iloc[:,1:]
test_df = pd.read_csv('/content/drive/MyDrive/Thesis/TEP_Faulty_Training.csv').iloc[:,1:]

In [11]:
# rename stuff according to this paper: https://www.sciencedirect.com/science/article/pii/S0098135414000969?via%3Dihub

X_dict = {
'XMEAS_1':'A_feed_stream',
'XMEAS_2':'D_feed_stream',
'XMEAS_3':'E_feed_stream',
'XMEAS_4':'Total_fresh_feed_stripper',
'XMEAS_5':'Recycle_flow_into_rxtr',
'XMEAS_6':'Reactor_feed_rate',
'XMEAS_7':'Reactor_pressure',
'XMEAS_8':'Reactor_level',
'XMEAS_9':'Reactor_temp',
'XMEAS_10':'Purge_rate',
'XMEAS_11':'Separator_temp',
'XMEAS_12':'Separator_level',
'XMEAS_13':'Separator_pressure',
'XMEAS_14':'Separator_underflow',
'XMEAS_15':'Stripper_level',
'XMEAS_16':'Stripper_pressure',
'XMEAS_17':'Stripper_underflow',
'XMEAS_18':'Stripper_temperature',
'XMEAS_19':'Stripper_steam_flow',
'XMEAS_20':'Compressor_work',
'XMEAS_21':'Reactor_cooling_water_outlet_temp',
'XMEAS_22':'Condenser_cooling_water_outlet_temp',
'XMEAS_23':'Composition_of_A_rxtr_feed',
'XMEAS_24':'Composition_of_B_rxtr_feed',
'XMEAS_25':'Composition_of_C_rxtr_feed',
'XMEAS_26':'Composition_of_D_rxtr_feed',
'XMEAS_27':'Composition_of_E_rxtr_feed',
'XMEAS_28':'Composition_of_F_rxtr_feed',
'XMEAS_29':'Composition_of_A_purge',
'XMEAS_30':'Composition_of_B_purge',
'XMEAS_31':'Composition_of_C_purge',
'XMEAS_32':'Composition_of_D_purge',
'XMEAS_33':'Composition_of_E_purge',
'XMEAS_34':'Composition_of_F_purge',
'XMEAS_35':'Composition_of_G_purge',
'XMEAS_36':'Composition_of_H_purge',
'XMEAS_37':'Composition_of_D_product',
'XMEAS_38':'Composition_of_E_product',
'XMEAS_39':'Composition_of_F_product',
'XMEAS_40':'Composition_of_G_product',
'XMEAS_41':'Composition_of_H_product',
'XMV_1':'D_feed_flow_valve',
'XMV_2':'E_feed_flow_valve',
'XMV_3':'A_feed_flow_valve',
'XMV_4':'Total_feed_flow_stripper_valve',
'XMV_5':'Compressor_recycle_valve',
'XMV_6':'Purge_valve',
'XMV_7':'Separator_pot_liquid_flow_valve',
'XMV_8':'Stripper_liquid_product_flow_valve',
'XMV_9':'Stripper_steam_valve',
'XMV_10':'Reactor_cooling_water_flow_valve',
'XMV_11':'Condenser_cooling_water_flow_valve',
'XMV_12':'Agitator_speed'
   }

train_df = train_df.rename(columns = lambda x:X_dict[x.upper()] if x.upper() in X_dict.keys()  else x)
test_df = test_df.rename(columns = lambda x:X_dict[x.upper()] if x.upper() in X_dict.keys()  else x)

In [16]:
train_df_sim1 = train_df[train_df.simulationRun == 1].iloc[:,3:]

In [27]:
fig, ax = plt.subplots(13,4, figsize=(30,90))

alpha = 0.2 # smoothing factor; 0 < alpha < 1

for i in range(train_df_sim1.shape[1]):

    x = train_df_sim1.iloc[:, i]
    mean = x.mean()
    std = x.std(ddof=1)

    ewma_values = [mean] # initialize
    for j in range(1,train_df_sim1.shape[1]):
        ewma = alpha * x[i] + (1 - alpha) * ewma_values[-1]
        ewma_values.append(ewma)

    lowerlim = mean - 3*std
    upperlim = mean + 3*std

    x.plot(ax=ax.ravel()[i])

    ax.ravel()[i].plot(ewma_values)
    ax.ravel()[i].axhline(mean, c='k')
    ax.ravel()[i].axhline(lowerlim, ls='--', c='r')
    ax.ravel()[i].axhline(upperlim, ls='--', c='r')
    ax.ravel()[i].set_title(train_df.columns[i])


Output hidden; open in https://colab.research.google.com to view.

In [26]:
train_df_sim1


Unnamed: 0,A_feed_stream,D_feed_stream,E_feed_stream,Total_fresh_feed_stripper,Recycle_flow_into_rxtr,Reactor_feed_rate,Reactor_pressure,Reactor_level,Reactor_temp,Purge_rate,...,E_feed_flow_valve,A_feed_flow_valve,Total_feed_flow_stripper_valve,Compressor_recycle_valve,Purge_valve,Separator_pot_liquid_flow_valve,Stripper_liquid_product_flow_valve,Stripper_steam_valve,Reactor_cooling_water_flow_valve,Condenser_cooling_water_flow_valve
0,0.25038,3674.0,4529.0,9.2320,26.889,42.402,2704.3,74.863,120.41,0.33818,...,53.744,24.657,62.544,22.137,39.935,42.323,47.757,47.510,41.258,18.447
1,0.25109,3659.4,4556.6,9.4264,26.721,42.576,2705.0,75.000,120.41,0.33620,...,53.414,24.588,59.259,22.084,40.176,38.554,43.692,47.427,41.359,17.194
2,0.25038,3660.3,4477.8,9.4426,26.875,42.070,2706.2,74.771,120.42,0.33563,...,54.357,24.666,61.275,22.380,40.244,38.990,46.699,47.468,41.199,20.530
3,0.24977,3661.3,4512.1,9.4776,26.758,42.063,2707.2,75.224,120.39,0.33553,...,53.946,24.725,59.856,22.277,40.257,38.072,47.541,47.658,41.643,18.089
4,0.29405,3679.0,4497.0,9.3381,26.889,42.650,2705.1,75.388,120.39,0.32632,...,53.658,28.797,60.717,21.947,39.144,41.955,47.645,47.346,41.507,18.461
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,0.24602,3651.9,4504.4,9.4721,26.600,42.312,2707.2,74.623,120.40,0.32381,...,54.132,23.663,59.588,22.481,38.952,33.489,45.203,49.900,41.057,16.448
496,0.23457,3663.0,4529.5,9.3440,26.872,42.434,2704.1,75.123,120.40,0.35454,...,54.028,22.776,60.566,22.237,42.978,38.017,42.637,50.075,41.339,17.435
497,0.23068,3656.4,4493.0,9.2978,26.920,42.536,2700.3,74.942,120.39,0.35859,...,54.465,23.158,62.102,21.639,42.483,41.016,46.782,50.098,41.277,18.315
498,0.24263,3632.5,4521.5,9.3915,26.956,42.317,2701.9,74.900,120.37,0.34057,...,54.532,23.386,63.164,22.661,39.558,37.892,47.468,50.312,40.557,19.198


In [22]:
#import matplotlib.pyplot as plt

alpha = 0.2  # Smoothing factor; 0 < alpha < 1

# Create subplots with 13 rows and 4 columns
fig, ax = plt.subplots(13, 4, figsize=(30, 90))

for i in range(train_df_sim1.shape[1]):

    x = train_df_sim1.iloc[:, i]  # Use train_df instead of train_df_sim1
    mean = x.mean()
    std = x.std(ddof=1)

    ewma_values = [mean]  # Initialize with the mean value
    for j in range(1, len(x)):
        ewma = alpha * x[j] + (1 - alpha) * ewma_values[-1]
        ewma_values.append(ewma)

    lowerlim = mean - 3 * std
    upperlim = mean + 3 * std

    # Plot the original data
    ax[i // 4, i % 4].plot(x, label='Original Data', color='blue')

    # Plot the EWMA values
    ax[i // 4, i % 4].plot(ewma_values, label='EWMA', color='orange')

    # Add horizontal lines for mean and control limits
    ax[i // 4, i % 4].axhline(mean, c='k', label='Mean')
    ax[i // 4, i % 4].axhline(lowerlim, ls='--', c='r', label='Lower Control Limit')
    ax[i // 4, i % 4].axhline(upperlim, ls='--', c='r', label='Upper Control Limit')

    ax[i // 4, i % 4].set_title(train_df.columns[i])
    ax[i // 4, i % 4].legend()

plt.tight_layout()
plt.show()


IndexError: ignored

Error in callback <function flush_figures at 0x7f0a301cc550> (for post_execute):


KeyboardInterrupt: ignored