# Running Previous Code

In [None]:
if torch.cuda.is_available():
    device = torch.device('cuda')
    print("CUDA is available. Using GPU.")
else:
    device = torch.device('cpu')
    print("CUDA is not available. Using CPU.")

In [None]:
batch_size = config["model"]["batch_size"]
d_model = config["model"]["d_model"]
epochs = config["model"]["epochs"]
input_dim = config["model"]["input_dim"]
output_dim = config["model"]["output_dim"]
num_heads = config["model"]["num_heads"]
num_layers = config["model"]["num_layers"]
d_ff = config["model"]["d_ff"]
dropout = config["model"]["dropout"]

In [None]:
learning_rate = config["training"]["learning_rate"]
optimizer = config["training"]["optimizer"]
save_model_path = config["training"]["save_model_path"]

In [None]:
broadcast_clock_bias_scaler = MinMaxScaler()
correction_value_scaler = MinMaxScaler()

In [None]:
broadcast_clock_bias_scaler.fit(combined_broadcast_clock_bias)
correction_value_scaler.fit(combined_correction_value)


In [None]:
train_broadcast_clock_bias_scaled = broadcast_clock_bias_scaler.transform(train_broadcast_clock_bias.reshape(-1, 1))
val_broadcast_clock_bias_scaled = broadcast_clock_bias_scaler.transform(val_broadcast_clock_bias.reshape(-1, 1))
test_broadcast_clock_bias_scaled = broadcast_clock_bias_scaler.transform(test_broadcast_clock_bias.reshape(-1, 1))


In [None]:
train_correction_value_scaled = correction_value_scaler.transform(train_correction_value.reshape(-1, 1))
val_correction_value_scaled = correction_value_scaler.transform(val_correction_value.reshape(-1, 1))
test_correction_value_scaled = correction_value_scaler.transform(test_correction_value.reshape(-1, 1))


In [None]:
train_broadcast_clock_bias = train_broadcast_clock_bias_scaled.squeeze()
val_broadcast_clock_bias = val_broadcast_clock_bias_scaled.squeeze()
test_broadcast_clock_bias = test_broadcast_clock_bias_scaled.squeeze()


In [None]:
x_train, y_train, train_dataset = create_dataset(train_broadcast_clock_bias, train_correction_value, input_dim, output_dim)
x_val, y_val, val_dataset = create_dataset(val_broadcast_clock_bias, val_correction_value, input_dim, output_dim)
x_test, y_test, test_dataset = create_dataset(test_broadcast_clock_bias, test_correction_value, input_dim, output_dim)


In [None]:
print(f'Shape of X_train: {x_train.shape}')
print(f'Shape of X_val: {x_val.shape}')
print(f'Shape of y_train: {y_train.shape}')
print(f'Shape of y_val: {y_val.shape}')
print(f'Shape of X_test: {x_test.shape}')
print(f'Shape of y_test: {y_test.shape}')

In [None]:
train_dataloader = DataLoader(train_dataset, batch_size = batch_size, shuffle = True)
val_dataloader = DataLoader(val_dataset, batch_size = batch_size, shuffle = False)
test_dataloader = DataLoader(test_dataset, batch_size = batch_size, shuffle = False)

In [None]:
for batch in train_dataloader:
    inputs, targets = batch
    print(f"Input shape: {inputs.shape}")
    print(f"Target shape: {targets.shape}")
    break  # To check the shape of just one batch

In [None]:
import torch.nn as nn


In [None]:
# model = nn.Transformer(d_model = 240 ).to(device)    # input_dim, output_dim, d_model, num_heads, num_layers, num_layers, d_ff, dropout
model = TransformerModel(input_dim, output_dim, d_model, num_heads, num_layers, d_ff, dropout).to(device)    

In [None]:
batch_size = config["model"]["batch_size"]
epochs = config["model"]["epochs"]
input_dim = config["model"]["input_dim"]
output_dim = config["model"]["output_dim"]
num_heads = config["model"]["num_heads"]
num_layers = config["model"]["num_layers"]
d_ff = config["model"]["d_ff"]
dropout = config["model"]["dropout"]

In [None]:
criterion = RMSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
num_epochs = epochs

In [None]:
train_losses, train_mae, val_losses, val_mae = train_model(model, train_dataloader, val_dataloader, criterion, optimizer, device, num_epochs)

In [None]:
csv_train_file_path = f'/Users/marilyn/Library/Mobile Documents/com~apple~CloudDocs/Documents/doctorate/ssdl/gps_bias_journal/train_val_metrics'

#  mo{dataset}_his_{timesteps}_corr_{corr_timestep}_bs_{BS}_ep_{EPOCHS}_dmod_{D_MODEL}_nhead_{N_HEAD}_nlayer_{NUM_LAYERS}_ff_{DIM_FEEDFORWARD}.csv'

In [None]:
import csv

with open(csv_train_file_path, mode='w', newline='') as file:
    writer = csv.writer(file)
    
    # Write the header
    writer.writerow(['Epoch', 'Train Loss', 'Validation Loss', 'Train MAE', 'Validation MAE'])
    
    # Write the data
    for epoch in range(num_epochs):
        writer.writerow([epoch, train_losses[epoch], val_losses[epoch], train_mae[epoch], val_mae[epoch]])

In [None]:
    print(f"Training CSV file saved to {csv_train_file_path}")

In [None]:
from evaluate_model import evaluate_model

In [None]:
test_loss, mae, predictions, actuals = evaluate_model(model, test_dataloader, device, criterion)

In [None]:
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error    

predictions_array = np.array(predictions).reshape(-1, 1)
actuals_array = np.array(actuals).reshape(-1, 1)

predictions_original = correction_value_scaler.inverse_transform(predictions_array)
actuals_original = correction_value_scaler.inverse_transform(actuals_array)

rmse_ns = mean_squared_error(actuals_original, predictions_original, squared=False)

r2 = r2_score(actuals_original, predictions_original)

print(f'RMSE (ns): {rmse_ns}')
print(f'R2: {r2}')

In [None]:
csv_test_file_path = f'/Users/marilyn/Library/Mobile Documents/com~apple~CloudDocs/Documents/doctorate/ssdl/gps_bias_journal/test_metrics'

In [None]:
with open(csv_test_file_path, mode='w', newline='') as file:
    writer = csv.writer(file)
    
    # Write test_loss, mae, and r2 in rows 1, 2, and 3
    writer.writerow(['Test Loss', test_loss])
    writer.writerow(['MAE', mae])
    writer.writerow(['R² Score', r2])
    
    # Skip row 4 (writing an empty row)
    writer.writerow([])
    
    # Write headers for predictions and actuals in row 5
    writer.writerow(['Predictions', 'Actuals'])
    
    # Write predictions and actuals starting from row 6
    for i in range(len(predictions_original)):
        writer.writerow([predictions_original[i], actuals_original[i]])

print(f"CSV file saved to {csv_test_file_path}")

In [None]:
import torch
import torch.nn as nn

In [None]:
transformer_model = nn.Transformer(nhead=16, num_encoder_layers=12, batch_first=False)
src = torch.rand((10, 32, 512))
tgt = torch.rand((20, 32, 512))
out = transformer_model(src, tgt)

# Testing Input Shape

In [1]:
import pandas as pd
import numpy as np
import scipy.io
import torch
from sklearn.preprocessing import MinMaxScaler
import yaml
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from torch.utils.data import TensorDataset, DataLoader

In [2]:
from loss_functions import RMSELoss
from mat_utils import MatUtils
from transformer_model import TransformerModel

In [3]:
from dataset_utils import create_dataset
from evaluate_model import evaluate_model
from performance_metrics import r2, mae
from train import train_model

In [4]:
with open("config.yaml", "r") as file:
    config = yaml.safe_load(file)

In [5]:
train_filepath = config["data"]["file_paths"]["train"]
val_filepath = config["data"]["file_paths"]["validate"]
test_filepath = config["data"]["file_paths"]["test"]

In [6]:
print(train_filepath)
print(val_filepath)
print(test_filepath)

/Users/marilyn/Library/Mobile Documents/com~apple~CloudDocs/Documents/doctorate/ssdl/gps_bias/src/data/train/train_small.mat
/Users/marilyn/Library/Mobile Documents/com~apple~CloudDocs/Documents/doctorate/ssdl/gps_bias/src/data/validate/val_small.mat
/Users/marilyn/Library/Mobile Documents/com~apple~CloudDocs/Documents/doctorate/ssdl/gps_bias/src/data/test/test_small.mat


In [7]:
train_data = scipy.io.loadmat(train_filepath)
val_data = scipy.io.loadmat(val_filepath)
test_data = scipy.io.loadmat(test_filepath)

In [8]:
train_utils_instance = MatUtils(train_filepath)
validate_utils_instance = MatUtils(val_filepath)
test_utils_instance = MatUtils(test_filepath)

In [9]:
train_utils_instance.print_mat_file_content()
validate_utils_instance.print_mat_file_content()
test_utils_instance.print_mat_file_content()

Contents of /Users/marilyn/Library/Mobile Documents/com~apple~CloudDocs/Documents/doctorate/ssdl/gps_bias/src/data/train/train_small.mat:
Variable name: None, Shape: (1,)
Variable name: processed_final_clock_bias, Shape: (1, 304416)
Variable name: processed_broadcast_clock_bias, Shape: (1, 304416)
Variable name: processed_correction_value, Shape: (1, 304416)
Variable name: original_correction_value, Shape: (1, 304416)
Contents of /Users/marilyn/Library/Mobile Documents/com~apple~CloudDocs/Documents/doctorate/ssdl/gps_bias/src/data/validate/val_small.mat:
Variable name: None, Shape: (1,)
Variable name: processed_final_clock_bias, Shape: (1, 43488)
Variable name: processed_broadcast_clock_bias, Shape: (1, 43488)
Variable name: processed_correction_value, Shape: (1, 43488)
Variable name: original_correction_value, Shape: (1, 43488)
Contents of /Users/marilyn/Library/Mobile Documents/com~apple~CloudDocs/Documents/doctorate/ssdl/gps_bias/src/data/test/test_small.mat:
Variable name: original

In [10]:
train_broadcast_clock_bias = train_data.get('processed_broadcast_clock_bias')[0]
train_correction_value = train_data.get('processed_correction_value')[0]
val_broadcast_clock_bias = val_data.get('processed_broadcast_clock_bias')[0]
val_correction_value = val_data.get('processed_correction_value')[0]
test_broadcast_clock_bias = test_data.get('broadcast_clock_bias')[0]
test_correction_value = test_data.get('correction_value')[0]

In [11]:
combined_broadcast_clock_bias = np.concatenate((
    train_broadcast_clock_bias.reshape(-1, 1),
    val_broadcast_clock_bias.reshape(-1, 1),
    test_broadcast_clock_bias.reshape(-1, 1)
))

In [12]:
combined_correction_value = np.concatenate((
    train_correction_value.reshape(-1, 1),
    val_correction_value.reshape(-1, 1),
    test_correction_value.reshape(-1, 1)
))

In [13]:
print(combined_broadcast_clock_bias.shape)
print(combined_correction_value.shape)

(434880, 1)
(434880, 1)


In [14]:
# Create DataFrame with both features
df = pd.DataFrame({
    'broadcast_clock_bias': combined_broadcast_clock_bias.reshape(-1),
    'correction_value': combined_correction_value.reshape(-1)
})

In [15]:
sequence_length = 360  # 240 past + 120 future

# Create list to store windows
windows = []

# Iterate through DataFrame, sliding row by row
for i in range(len(df) - sequence_length):
    window = df.iloc[i : i + sequence_length].values  # Extract rows as NumPy array (shape: (360, 2))
    windows.append(window)

# Convert list to NumPy array
X = np.array(windows)  # Final shape: (num_samples, 360, 2)

## CHANGE LIST TO NP ARRAY HANDLING


In [29]:
test_model = nn.Transformer(d_model=256, batch_first=True)

In [17]:
import torch
from torch.utils.data import TensorDataset, DataLoader

batch_size = 64

# Convert NumPy array to PyTorch tensor
X_tensor = torch.tensor(X)  # Shape: (434520, 360, 2)

In [18]:
src = X_tensor[:, :240, 0].unsqueeze(-1)  # First 240 time steps as source

In [19]:
src.shape

torch.Size([434520, 240, 1])

In [20]:
tgt = X_tensor[:, 240:, 1].unsqueeze(-1)  # Last 120 time steps as target (only correction_value)

In [21]:
tgt.shape

torch.Size([434520, 120, 1])

In [22]:
src.float()
tgt.float()

tensor([[[1.4762e-09],
         [1.4762e-09],
         [1.4763e-09],
         ...,
         [1.4805e-09],
         [1.4806e-09],
         [1.4806e-09]],

        [[1.4762e-09],
         [1.4763e-09],
         [1.4763e-09],
         ...,
         [1.4806e-09],
         [1.4806e-09],
         [1.4806e-09]],

        [[1.4763e-09],
         [1.4763e-09],
         [1.4764e-09],
         ...,
         [1.4806e-09],
         [1.4806e-09],
         [1.4807e-09]],

        ...,

        [[1.5919e-09],
         [1.5919e-09],
         [1.5919e-09],
         ...,
         [1.5936e-09],
         [1.5936e-09],
         [1.5936e-09]],

        [[1.5919e-09],
         [1.5919e-09],
         [1.5919e-09],
         ...,
         [1.5936e-09],
         [1.5936e-09],
         [1.5936e-09]],

        [[1.5919e-09],
         [1.5919e-09],
         [1.5920e-09],
         ...,
         [1.5936e-09],
         [1.5936e-09],
         [1.5936e-09]]])

In [23]:
input_projection_dim = 256
input_projection = nn.Linear(1, input_projection_dim)  # Project input to higher dimension

In [24]:
dataset = TensorDataset(src, tgt)


In [25]:
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

In [26]:
loss_fn = RMSELoss()

In [30]:
import torch.optim as optim

# Define the optimizer (Adam in this case)
optimizer = optim.Adam(test_model.parameters(), lr=1e-4)  # Example learning rate


In [31]:
for batch_src, batch_tgt in dataloader:
    # Apply input projection to src
    batch_src = input_projection(batch_src.float())
    batch_tgt = input_projection(batch_tgt.float())

    output = test_model(batch_src, batch_tgt)

    # Calculate the loss (e.g., MSELoss or CrossEntropyLoss, depending on your task)
    loss = loss_fn(output, batch_tgt)

    # Backpropagation and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # Optionally, print out loss or other metrics
    print(f"Loss: {loss.item()}")




Loss: 1.1486486196517944
Loss: 0.9030851721763611
Loss: 0.7603463530540466
Loss: 0.6812127232551575
Loss: 0.6281575560569763
Loss: 0.5834471583366394
Loss: 0.5516277551651001
Loss: 0.5290178656578064
Loss: 0.5134270191192627
Loss: 0.5022948384284973
Loss: 0.4941255450248718
Loss: 0.4884794354438782


KeyboardInterrupt: 