In [1]:
seq_len = 20
batch_size = 50
dropout = 0.5

In [3]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import torch
from torch import nn, optim
from torch.utils.data import DataLoader, TensorDataset
import math
import csv
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error

# Paths to datasets
file_paths = [
    r"./../data/raw/wind_speed_11_n.csv",
    r"./../data/raw/wind_speed_13_n.csv",
    r"./../data/raw/wind_speed_15_n.csv",
    r"./../data/raw/wind_speed_17_n.csv",
    r"./../data/raw/wind_speed_19_n.csv"
]

# Load datasets
datasets = [pd.read_csv(file) for file in file_paths]

In [None]:
#datasets = [dataset.iloc[:int(0.1 * len(dataset))] for dataset in datasets] #To get as much of the data as you need

In [None]:
# Define features and targets
features = ["Mx1", "Mx2", "Mx3", "My1", "My2", "My3", "Theta", "Vwx", "Vwy", "Vwz",
            "beta1", "beta2", "beta3", "dbeta1", "dbeta2", "dbeta3", "omega_r"]
targets = ["Mz1", "Mz2", "Mz3"]

train_data_x = []
train_data_y = []
val_data_x = []
val_data_y = []
test_data = {}

# Collect all training features and targets
all_train_features = []
all_train_targets = []

for dataset in datasets:
    n = len(dataset)
    train_end_idx = int(0.6 * n)
    val_end_idx = int(0.8 * n)

    # Sequential splits
    train_segment = dataset.iloc[:train_end_idx]
    val_segment = dataset.iloc[train_end_idx:val_end_idx]
    test_segment = dataset.iloc[val_end_idx:]

    # Append training features and targets to the list
    all_train_features.append(train_segment[features].values)
    all_train_targets.append(train_segment[targets].values)

# Combine all training data into single arrays
all_train_features = np.concatenate(all_train_features, axis=0)
all_train_targets = np.concatenate(all_train_targets, axis=0)

# Fit scalers on all training data
scaler_x = MinMaxScaler()
scaler_y = MinMaxScaler()

scaler_x.fit(all_train_features)
scaler_y.fit(all_train_targets)

def create_sequences(data, targets, seq_len):
    X_seq, y_seq = [], []
    for i in range(len(data) - seq_len):
        X_seq.append(data[i:i + seq_len])  # Add sequence of features
        y_seq.append(targets[i + seq_len - 1])  # Predict the target at the end of the sequence
    return np.array(X_seq), np.array(y_seq)

# Split datasets
i = 0
for dataset in datasets:
    n = len(dataset)
    train_end_idx = int(0.6 * n)
    val_end_idx = int(0.8 * n)

    # Sequential splits
    train_segment = dataset.iloc[:train_end_idx]
    val_segment = dataset.iloc[train_end_idx:val_end_idx]
    test_segment = dataset.iloc[val_end_idx:]

    train_segment_x = scaler_x.transform(train_segment[features].values)
    train_segment_y = scaler_y.transform(train_segment[targets].values)

    val_segment_x = scaler_x.transform(val_segment[features].values)
    val_segment_y = scaler_y.transform(val_segment[targets].values)

    train_seq_x, train_seq_y = create_sequences(train_segment_x, train_segment_y, seq_len)
    val_seq_x, val_seq_y = create_sequences(val_segment_x, val_segment_y, seq_len)

    train_data_x.append(train_seq_x)
    train_data_y.append(train_seq_y)
    val_data_x.append(val_seq_x)
    val_data_y.append(val_seq_y)
    test_data[i] = test_segment
    i += 1

train_data_x = np.concatenate(train_data_x, axis=0)
train_data_y = np.concatenate(train_data_y, axis=0)
val_data_x = np.concatenate(val_data_x, axis=0)
val_data_y = np.concatenate(val_data_y, axis=0)

In [None]:
# Convert to PyTorch tensors
X_train_tensor = torch.tensor(train_data_x, dtype=torch.float32)
y_train_tensor = torch.tensor(train_data_y, dtype=torch.float32)
X_val_tensor = torch.tensor(val_data_x, dtype=torch.float32)
y_val_tensor = torch.tensor(val_data_y, dtype=torch.float32)

# Wrap tensors in DataLoader for batching
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

In [None]:
# Define Feedforward Neural Network
class FeedforwardNN(nn.Module):
    def __init__(self, input_dim, output_dim, dropout=0.1):
        super(FeedforwardNN, self).__init__()
        self.fc1 = nn.Linear(input_dim * seq_len, 512)
        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout(dropout)
        self.fc2 = nn.Linear(512, 256)
        self.relu2 = nn.ReLU()
        self.dropout2 = nn.Dropout(dropout)
        self.fc3 = nn.Linear(256, output_dim)

    def forward(self, x):
        x = x.view(x.size(0), -1)  # Flatten the input
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.dropout1(x)
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.dropout2(x)
        x = self.fc3(x)
        return x

input_dim = len(features)
output_dim = len(targets)

model = FeedforwardNN(input_dim=input_dim, output_dim=output_dim, dropout=dropout)

In [None]:
# Training setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

In [None]:
import matplotlib.pyplot as plt

# Initialize lists to store loss values
train_losses = []
val_losses = []

epochs = 50
for epoch in range(epochs):
    model.train()
    train_loss = 0.0
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)

        optimizer.zero_grad()
        predictions = model(X_batch)
        loss = criterion(predictions, y_batch)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

    train_loss /= len(train_loader)
    train_losses.append(train_loss)  # Store the training loss

    # Evaluate on validation set
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            predictions = model(X_batch)
            loss = criterion(predictions, y_batch)
            val_loss += loss.item()

    val_loss /= len(val_loader)
    val_losses.append(val_loss)  # Store the validation loss

    print(f"Epoch {epoch+1}/{epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")

# Plot the losses
plt.figure(figsize=(10, 5))
plt.plot(range(1, len(train_losses) + 1), train_losses, label='Training Loss')
plt.plot(range(1, len(val_losses) + 1), val_losses, label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Validation Loss Over Epochs')
plt.legend()
plt.grid()
plt.show()


In [None]:
# Save the Model
torch.save(model.state_dict(), "ffn1.pth")
print("Model saved as ffn1.pth")

In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error

mse_per_dataset = {}
total_mse = 0
num_datasets = len(test_data)

i = 0
for test_df in test_data.values():

  # Load the validation dataset
  t_test = test_df["t"].values  # Extract the 't' column for plotting
  features_test = test_df.drop(["t", "Mz1", "Mz2", "Mz3"], axis=1).values  # Input features
  targets_test = test_df[["Mz1", "Mz2", "Mz3"]].values  # Ground truth for comparison

  # Preprocess the validation data
  features_test_scaled = scaler_x.transform(features_test)
  targets_test_scaled = scaler_y.transform(targets_test)
  X_test, y_test = create_sequences(features_test_scaled, targets_test_scaled, seq_len)  # Sequence preparation

  # Generate predictions
  model.eval()  # Set model to evaluation mode
  with torch.no_grad():
      X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
      X_test_tensor = X_test_tensor.to(device)
      y_test_pred = model(X_test_tensor).cpu().numpy()  # Predictions from the model

  # Align `t` for plotting
  # Since the sequences shorten the dataset, align 't' with the predictions
  t_test_aligned = t_test[seq_len:]  # Remove the first `total_len - 1` timesteps to match the predictions
  target_test = targets_test[seq_len:]
  # Plot results
  inversed_pred = scaler_y.inverse_transform(y_test_pred)
  inversed_true = scaler_y.inverse_transform(y_test)
  mse = mean_squared_error(inversed_true, inversed_pred)
  mse_per_dataset[f"Dataset {i+1}"] = mse
  total_mse += mse

    # get only the first 1600 examples for a more cohesive plot
  max_examples = 1600
  t_test_aligned_plot = t_test_aligned[:max_examples]
  target_test_plot = target_test[:max_examples]
  inversed_pred_plot = inversed_pred[:max_examples]

  plt.figure(figsize=(12, 6))
  for j, target_name in enumerate(["Mz1", "Mz2", "Mz3"]):
      plt.plot(t_test_aligned_plot, target_test_plot[:, j], label=f"Ground Truth {target_name} (Dotted)", linestyle="dotted")
      plt.plot(t_test_aligned_plot, inversed_pred_plot[:, j], label=f"Prediction {target_name}")

  plt.xlabel("Time (t)")
  plt.ylabel("Torque")
  plt.title(f"Dataset {i+1}: Predictions vs Ground Truth for All Torques (Mz1, Mz2, Mz3)")
  plt.legend()
  plt.grid()
  plt.show()

  i += 1

print("Mean Squared Error (MSE) for each dataset:")
for dataset, mse in mse_per_dataset.items():
    print(f"{dataset}: {mse:.4f}")

average_mse = total_mse / num_datasets
print(f"\nAverage MSE across all datasets: {average_mse:.4f}")
