In [16]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical

In [17]:
df = pd.read_csv("/content/Dataset_timeseries.csv")
data = df[['Time (s)', 'SOPAS', 'Number']]

# Standardize the features
scaler = StandardScaler()
data[['Time (s)', 'SOPAS']] = scaler.fit_transform(data[['Time (s)', 'SOPAS']])

In [18]:
def create_sequences(data, seq_length):
    X = []
    y = []
    for i in range(len(data) - seq_length):
        X.append(data.iloc[i:i+seq_length, :-1].values)  # Exclude the target column
        y.append(data.iloc[i+seq_length, -1])  # Target is the 'Number' column
    return np.array(X), np.array(y)

# Set sequence length
SEQ_LENGTH = 5

# Create sequences
X, y = create_sequences(data, SEQ_LENGTH)

In [19]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# One-hot encode the labels
y_train = to_categorical(y_train, num_classes=4)
y_test = to_categorical(y_test, num_classes=4)

In [22]:
# Build the LSTM model
model = Sequential()
model.add(LSTM(100, return_sequences=True, input_shape=(SEQ_LENGTH, 2)))
model.add(LSTM(50))
model.add(Dense(4, activation='softmax'))

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_8 (LSTM)               (None, 5, 100)            41200     
                                                                 
 lstm_9 (LSTM)               (None, 50)                30200     
                                                                 
 dense_4 (Dense)             (None, 4)                 204       
                                                                 
Total params: 71604 (279.70 KB)
Trainable params: 71604 (279.70 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [23]:
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [24]:
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test Loss: {loss:.4f}')
print(f'Test Accuracy: {accuracy:.4f}')

Test Loss: 0.4446
Test Accuracy: 0.7859


## NOISE ADDITION PROCESS
- Add noise at once to all features
- Add noise to just one feature
- Test Resilience of different features to noise
- See how different models are resilient to noise addition

In [25]:
def add_noise(data, noise_level=0.01): # GAUSSIAN NOISE FUNCTION
    noise = np.random.normal(loc=0, scale=noise_level, size=data.shape)
    return data + noise

# data_noisy = add_noise(data[['Time (s)', 'SOPAS']].values, noise_level=0.01)
# data[['Time (s)', 'SOPAS']] = data_noisy


In [26]:
# Both featues have noise
data1 = df[['Time (s)', 'SOPAS', 'Number']]
data_noisy1 = add_noise(data1[['Time (s)', 'SOPAS']].values, noise_level=0.01)
data1[['Time (s)', 'SOPAS']] = data_noisy1

scaler = StandardScaler()
data1[['Time (s)', 'SOPAS']] = scaler.fit_transform(data1[['Time (s)', 'SOPAS']])

def create_sequences(data1, seq_length):
    X = []
    y = []
    for i in range(len(data1) - seq_length):
        X.append(data1.iloc[i:i+seq_length, :-1].values)  # Exclude the target column
        y.append(data1.iloc[i+seq_length, -1])  # Target is the 'Number' column
    return np.array(X), np.array(y)

# Set sequence length
SEQ_LENGTH = 5

# Create sequences
X, y = create_sequences(data1, SEQ_LENGTH)
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# One-hot encode the labels
y_train = to_categorical(y_train, num_classes=4)
y_test = to_categorical(y_test, num_classes=4)
# Build the LSTM model
model = Sequential()
model.add(LSTM(100, return_sequences=True, input_shape=(SEQ_LENGTH, 2)))
model.add(LSTM(50))
model.add(Dense(4, activation='softmax'))

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()
print("BOTH FEATURES HAVE NOISE")
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test Loss: {loss:.4f}')
print(f'Test Accuracy: {accuracy:.4f}')

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_10 (LSTM)              (None, 5, 100)            41200     
                                                                 
 lstm_11 (LSTM)              (None, 50)                30200     
                                                                 
 dense_5 (Dense)             (None, 4)                 204       
                                                                 
Total params: 71604 (279.70 KB)
Trainable params: 71604 (279.70 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
BOTH FEATURES HAVE NOISE
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test Loss: 0.4447
Test Accuracy: 0.7902


In [27]:
# Time column has noise
data2 = df[['Time (s)', 'SOPAS', 'Number']]
data_noisy2 = add_noise(data2[['Time (s)']].values, noise_level=0.01)
data2[['Time (s)']] = data_noisy2

scaler = StandardScaler()
data2[['Time (s)', 'SOPAS']] = scaler.fit_transform(data2[['Time (s)', 'SOPAS']])

def create_sequences(data2, seq_length):
    X = []
    y = []
    for i in range(len(data2) - seq_length):
        X.append(data2.iloc[i:i+seq_length, :-1].values)  # Exclude the target column
        y.append(data2.iloc[i+seq_length, -1])  # Target is the 'Number' column
    return np.array(X), np.array(y)

# Set sequence length
SEQ_LENGTH = 5

# Create sequences
X, y = create_sequences(data2, SEQ_LENGTH)
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# One-hot encode the labels
y_train = to_categorical(y_train, num_classes=4)
y_test = to_categorical(y_test, num_classes=4)
# Build the LSTM model
model = Sequential()
model.add(LSTM(100, return_sequences=True, input_shape=(SEQ_LENGTH, 2)))
model.add(LSTM(50))
model.add(Dense(4, activation='softmax'))

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()
print("TIME HAS NOISE")
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test Loss: {loss:.4f}')
print(f'Test Accuracy: {accuracy:.4f}')

Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_12 (LSTM)              (None, 5, 100)            41200     
                                                                 
 lstm_13 (LSTM)              (None, 50)                30200     
                                                                 
 dense_6 (Dense)             (None, 4)                 204       
                                                                 
Total params: 71604 (279.70 KB)
Trainable params: 71604 (279.70 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
TIME HAS NOISE
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test Loss: 0.4482
Test Accuracy: 0.7888


In [28]:
# SOPAS column has noise
data3 = df[['Time (s)', 'SOPAS', 'Number']]
data_noisy3 = add_noise(data3[['Time (s)']].values, noise_level=0.01)
data3[['Time (s)']] = data_noisy3

scaler = StandardScaler()
data3[['Time (s)', 'SOPAS']] = scaler.fit_transform(data3[['Time (s)', 'SOPAS']])

def create_sequences(data3, seq_length):
    X = []
    y = []
    for i in range(len(data3) - seq_length):
        X.append(data3.iloc[i:i+seq_length, :-1].values)  # Exclude the target column
        y.append(data3.iloc[i+seq_length, -1])  # Target is the 'Number' column
    return np.array(X), np.array(y)

# Set sequence length
SEQ_LENGTH = 5

# Create sequences
X, y = create_sequences(data3, SEQ_LENGTH)
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# One-hot encode the labels
y_train = to_categorical(y_train, num_classes=4)
y_test = to_categorical(y_test, num_classes=4)
# Build the LSTM model
model = Sequential()
model.add(LSTM(100, return_sequences=True, input_shape=(SEQ_LENGTH, 2)))
model.add(LSTM(50))
model.add(Dense(4, activation='softmax'))

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()
print("SOPAS HAS NOISE")
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test Loss: {loss:.4f}')
print(f'Test Accuracy: {accuracy:.4f}')

Model: "sequential_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_14 (LSTM)              (None, 5, 100)            41200     
                                                                 
 lstm_15 (LSTM)              (None, 50)                30200     
                                                                 
 dense_7 (Dense)             (None, 4)                 204       
                                                                 
Total params: 71604 (279.70 KB)
Trainable params: 71604 (279.70 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
SOPAS HAS NOISE
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test Loss: 0.4407
Test Accuracy: 0.7927
