In [30]:
import pandas as pd
import numpy as np 
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
import torch.nn as nn
import torch

In [3]:
df = pd.read_csv("../data/engine_knock_data_minute.csv")

In [4]:
df.head()

Unnamed: 0,Timestamp,Knock,RPM,IgnitionTiming,CylinderPressure,BurnRate,Vibration,EGOVoltage,TempSensor
0,2025-01-01 00:00:00,True,3049.671415,10.0,27.824252,5.80016,0.530093,0.3,90.012747
1,2025-01-01 00:01:00,False,2988.355225,10.0,27.376702,8.290575,0.101943,0.45,88.775253
2,2025-01-01 00:02:00,False,3069.132122,10.0,22.180672,9.703642,0.093855,0.45,99.303394
3,2025-01-01 00:03:00,False,3158.847783,10.0,18.106361,5.032846,-0.019272,0.45,80.344908
4,2025-01-01 00:04:00,False,2985.310866,10.0,12.967104,1.99719,0.128391,0.45,88.456489


In [5]:
df = df.drop(columns=['Knock', 'IgnitionTiming'])

In [6]:
df.head()

Unnamed: 0,Timestamp,RPM,CylinderPressure,BurnRate,Vibration,EGOVoltage,TempSensor
0,2025-01-01 00:00:00,3049.671415,27.824252,5.80016,0.530093,0.3,90.012747
1,2025-01-01 00:01:00,2988.355225,27.376702,8.290575,0.101943,0.45,88.775253
2,2025-01-01 00:02:00,3069.132122,22.180672,9.703642,0.093855,0.45,99.303394
3,2025-01-01 00:03:00,3158.847783,18.106361,5.032846,-0.019272,0.45,80.344908
4,2025-01-01 00:04:00,2985.310866,12.967104,1.99719,0.128391,0.45,88.456489


In [7]:
df['Timestamp'] = pd.to_datetime(df['Timestamp'])

In [8]:
df.isna().sum()

Timestamp           0
RPM                 0
CylinderPressure    0
BurnRate            0
Vibration           0
EGOVoltage          0
TempSensor          0
dtype: int64

In [13]:
# Initialize the scaler
scaler = MinMaxScaler()

# Normalize each column
scaled_data = scaler.fit_transform(df.drop(columns=['Timestamp']))

# Convert back to a DataFrame with the same index and columns
df_scaled = pd.DataFrame(scaled_data, index=df.index, columns=df.columns[1:])

In [17]:
df_scaled['Timestamp'] = df['Timestamp']

# Reorder columns to put Timestamp first
df_scaled = df_scaled[['Timestamp'] + [col for col in df_scaled.columns if col != 'Timestamp']]

In [18]:
df_scaled.head()

Unnamed: 0,Timestamp,RPM,CylinderPressure,BurnRate,Vibration,EGOVoltage,TempSensor
0,2025-01-01 00:00:00,0.525701,0.725391,0.575539,0.868208,2.329492e-10,0.307286
1,2025-01-01 00:01:00,0.491755,0.71349,0.822658,0.347564,1.0,0.278614
2,2025-01-01 00:02:00,0.536475,0.575316,0.962874,0.337729,1.0,0.522542
3,2025-01-01 00:03:00,0.586144,0.466971,0.4994,0.200164,1.0,0.083291
4,2025-01-01 00:04:00,0.490069,0.330306,0.198177,0.379726,1.0,0.271229


In [20]:
df.shape

(172800, 7)

In [19]:
# Define the sequence length (e.g., use 60 timesteps to predict the next one)
sequence_length = 60

def create_sequences(data, seq_length):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i + seq_length])  # Sequence of seq_length values
        y.append(data[i + seq_length])    # Next value to predict
    return np.array(X), np.array(y)

# Example: Create sequences for the 'RPM' column
rpm_data = df_scaled['RPM'].values
X_rpm, y_rpm = create_sequences(rpm_data, sequence_length)

# Print shapes to verify
print("X_rpm shape:", X_rpm.shape)
print("y_rpm shape:", y_rpm.shape)

X_rpm shape: (172740, 60)
y_rpm shape: (172740,)


In [27]:
class LSTMModel(nn.Module):
    def __init__(self, input_size=1, hidden_size=50, num_layers=1, output_size=1):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        # Initialize hidden state with zeros
        batch_size = x.size(0)  # Dynamically get batch size from input
        h0 = torch.zeros(self.num_layers, batch_size, self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, batch_size, self.hidden_size).to(x.device)
        
        # Forward propagate LSTM
        out, _ = self.lstm(x, (h0, c0))  # out: tensor of shape (batch_size, seq_length, hidden_size)
        
        # Get the outputs for the last time step
        out = out[:, -1, :]
        out = self.fc(out)
        return out

# Example: Instantiate the model for a feature
model = LSTMModel(input_size=1, hidden_size=50, num_layers=1, output_size=1)
print(model)

LSTMModel(
  (lstm): LSTM(1, 50, batch_first=True)
  (fc): Linear(in_features=50, out_features=1, bias=True)
)


In [23]:
# Define loss function and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [28]:
# Split the RPM data into train and test sets (98-20 split)
feature = "RPM"
X_rpm, y_rpm = create_sequences(rpm_data, sequence_length)  # Assume this function is defined
train_size = int(len(X_rpm) * 0.8)
X_train = X_rpm[:train_size]
y_train = y_rpm[:train_size]
X_test = X_rpm[train_size:]
y_test = y_rpm[train_size:]

# Convert to PyTorch tensors
X_train = torch.FloatTensor(X_train)
y_train = torch.FloatTensor(y_train)
X_test = torch.FloatTensor(X_test)
y_test = torch.FloatTensor(y_test)

# Reshape X_train and X_test to (batch_size, sequence_length, features)
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

# Number of epochs
num_epochs = 50
batch_size = 32

# Create DataLoader for training
train_dataset = torch.utils.data.TensorDataset(X_train, y_train)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=False)

In [31]:
# Training loop with detailed logging
for epoch in range(num_epochs):
    # Training phase
    model.train()
    total_train_loss = 0
    train_predictions = []
    train_targets = []

    for batch_X, batch_y in train_loader:
        # Forward pass
        outputs = model(batch_X)
        loss = criterion(outputs.squeeze(), batch_y)
        
        # Backward pass and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        total_train_loss += loss.item()
        
        # Collect predictions and targets for R² score
        train_predictions.extend(outputs.squeeze().detach().cpu().numpy())
        train_targets.extend(batch_y.detach().cpu().numpy())

    # Compute average training loss
    avg_train_loss = total_train_loss / len(train_loader)

    # Compute training R² score
    train_r2 = r2_score(train_targets, train_predictions)

    # Testing phase
    model.eval()
    with torch.no_grad():
        # Forward pass on test set
        y_pred = model(X_test)
        test_loss = criterion(y_pred.squeeze(), y_test)
        
        # Compute test R² score
        test_predictions = y_pred.squeeze().detach().cpu().numpy()
        test_targets = y_test.detach().cpu().numpy()
        test_r2 = r2_score(test_targets, test_predictions)

    # Print detailed logs for each epoch
    print(f'Epoch [{epoch+1}/{num_epochs}]')
    print(f'  Training Loss: {avg_train_loss:.4f}')
    print(f'  Training R² Score: {train_r2:.4f}')
    print(f'  Test Loss: {test_loss.item():.4f}')
    print(f'  Test R² Score: {test_r2:.4f}')
    print('-' * 40)

# Final evaluation after training
model.eval()
with torch.no_grad():
    y_pred = model(X_test)
    final_test_loss = criterion(y_pred.squeeze(), y_test)
    final_test_predictions = y_pred.squeeze().detach().cpu().numpy()
    final_test_targets = y_test.detach().cpu().numpy()
    final_test_r2 = r2_score(final_test_targets, final_test_predictions)
    print('Final Results:')
    print(f'  Final Test Loss: {final_test_loss.item():.4f}')
    print(f'  Final Test R² Score: {final_test_r2:.4f}')

Epoch [1/50]
  Training Loss: 0.4183
  Training R² Score: -9.1033
  Test Loss: 0.4179
  Test R² Score: -9.1332
----------------------------------------
Epoch [2/50]
  Training Loss: 0.4183
  Training R² Score: -9.1033
  Test Loss: 0.4179
  Test R² Score: -9.1332
----------------------------------------
Epoch [3/50]
  Training Loss: 0.4183
  Training R² Score: -9.1033
  Test Loss: 0.4179
  Test R² Score: -9.1332
----------------------------------------


KeyboardInterrupt: 