In [1]:
import torch
import os
import wandb
import torchvision
import numpy as np
import pandas as pd
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
import torchvision.transforms as transforms
from torch.utils.data import DataLoader,random_split
from torchvision.datasets import ImageFolder
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.metrics import f1_score, precision_score, recall_score,accuracy_score


# Check for GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu')
print(f"Using device: {device}")

# set repeatability
seed = 42
np.random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed(seed)     # Set a random seed for CUDA operations.
    torch.cuda.manual_seed_all(seed) # if you are using multi-GPU.
    
    # Ensure deterministic behavior for CUDA operations (note: If you are not concerned with reportable reproducibility, set deterministic to false, and benchmark to true - as it can choose faster algorithms).
    torch.backends.cudnn.deterministic = True  # Set cuDNN to deterministic mode - it will now only select algorithms that are known to be deterministic.
    torch.backends.cudnn.benchmark = False  # Disable cuDNN benchmarking - it may select the best algorithms for the hardware, but it doesn't guarantee deterministic results.   

Using device: cuda


In [2]:
# Load the data with column headers
column_names = ['id', 'longitude', 'latitude', 'value']
data = pd.read_csv('3D_spatial_network.txt', header=None, names=column_names)

# If you need to save as a proper CSV
data.to_csv('3D_spatial_network.csv', index=False)

# For RNN preparation
# Assuming you want to predict 'value' based on sequence of coordinates
# You'll need to create sequences from your data

In [3]:
def create_sequences(data, sequence_length):
    sequences = []
    targets = []
    for i in range(len(data) - sequence_length):
        seq = data[i:i+sequence_length]
        label = data[i+sequence_length][2]  # Assuming value is target
        sequences.append(seq[:, :2])  # Using just coordinates as features
        targets.append(label)
    return np.array(sequences), np.array(targets)

# Normalize the data first
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(data[['longitude', 'latitude', 'value']])

# Create sequences
sequence_length = 10  # Adjust based on your needs
X, y = create_sequences(scaled_data, sequence_length)

# Split into train/test
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# For RNN input shape: [samples, timesteps, features]
# X_train shape will be (n_samples, sequence_length, 2) if using just coordinates

In [4]:
# Initialize wandb
wandb_api_key = os.getenv("WANDB_API_KEY")
if wandb_api_key:
    wandb.login(key=wandb_api_key)
else:
    raise ValueError("WANDB_API_KEY environment variable not set!")

# Load the API key from the environment variable
wandb.init(project="Assignment6", entity="usf-magma", config={
    "learning_rate": 0.00085,
    "dropout_percentage": 0.45,
    "batch_size": 64,
    "epochs": 3,
    "momentum": (0.9, 0.92),
    "weight_decay": 1e-5,
    "optimizer": "Adam",
    "criterion": "MSELoss",  # Changed to MSELoss for regression
    "input_size": 1,
    "hidden_size": 4
})

config = wandb.config

wandb: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
wandb: Appending key for api.wandb.ai to your netrc file: C:\Users\maurb\_netrc
wandb: Currently logged in as: bermudezm (usf-magma) to https://api.wandb.ai. Use `wandb login --relogin` to force relogin


In [10]:
class DualRNN(nn.Module):
    def __init__(self, input_size=1, hidden_size=4, dropout=0.45):
        super(DualRNN, self).__init__()
        # First RNN for longitude sequences
        self.rnn_long = nn.RNN(
            input_size=input_size, 
            hidden_size=hidden_size, 
            num_layers=1, 
            batch_first=True
        )
        # Second RNN for latitude sequences
        self.rnn_lat = nn.RNN(
            input_size=input_size, 
            hidden_size=hidden_size, 
            num_layers=1, 
            batch_first=True
        )
        self.dropout = nn.Dropout(dropout)
        # Fully connected layer combines both RNN outputs
        self.fc = nn.Linear(hidden_size * 2, 1)  # 2x hidden_size because we concatenate

    def forward(self, x):
        # x shape: [batch_size, sequence_length, 2]
        # Split into longitude and latitude features
        x_long = x[:, :, 0].unsqueeze(-1)  # shape: [batch_size, sequence_length, 1]
        x_lat = x[:, :, 1].unsqueeze(-1)   # shape: [batch_size, sequence_length, 1]
        
        # Process longitude sequence
        rnn_long_out, _ = self.rnn_long(x_long)
        last_long = rnn_long_out[:, -1, :]  # Take last timestep
        
        # Process latitude sequence
        rnn_lat_out, _ = self.rnn_lat(x_lat)
        last_lat = rnn_lat_out[:, -1, :]    # Take last timestep
        
        # Combine features
        combined = torch.cat((last_long, last_lat), dim=1)
        combined = self.dropout(combined)
        
        # Final prediction
        prediction = self.fc(combined)
        return prediction

In [11]:
# First split: 70% train, 30% temp
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)

# Second split: 50% of temp (15% of total) for validation, 50% (15% of total) for test
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Print the sizes to verify
print(f"Training set size: {len(X_train)}")
print(f"Validation set size: {len(X_val)}")
print(f"Test set size: {len(X_test)}")

Training set size: 304404
Validation set size: 65230
Test set size: 65230


In [12]:
def prepare_dataloader(X, y, batch_size, shuffle=True):
    # Convert numpy arrays to PyTorch tensors
    X_tensor = torch.from_numpy(X).float()
    y_tensor = torch.from_numpy(y).float()
    
    # Create dataset and dataloader
    dataset = torch.utils.data.TensorDataset(X_tensor, y_tensor)
    dataloader = torch.utils.data.DataLoader(
        dataset, 
        batch_size=batch_size,
        shuffle=shuffle
    )
    return dataloader

In [13]:
# Prepare all DataLoaders with appropriate settings
train_loader = prepare_dataloader(X_train, y_train, 
                                batch_size=config.batch_size, 
                                shuffle=True)  # Shuffle for training

val_loader = prepare_dataloader(X_val, y_val, 
                            batch_size=config.batch_size, 
                            shuffle=False)  # No shuffle for validation

test_loader = prepare_dataloader(X_test, y_test, 
                            batch_size=config.batch_size, 
                            shuffle=False)  # No shuffle for testing

# Verify the DataLoader shapes
sample_train_batch = next(iter(train_loader))
print(f"Train batch - inputs: {sample_train_batch[0].shape}, targets: {sample_train_batch[1].shape}")

sample_val_batch = next(iter(val_loader))
print(f"Val batch - inputs: {sample_val_batch[0].shape}, targets: {sample_val_batch[1].shape}")

sample_test_batch = next(iter(test_loader))
print(f"Test batch - inputs: {sample_test_batch[0].shape}, targets: {sample_test_batch[1].shape}")

Train batch - inputs: torch.Size([64, 10, 2]), targets: torch.Size([64])
Val batch - inputs: torch.Size([64, 10, 2]), targets: torch.Size([64])
Test batch - inputs: torch.Size([64, 10, 2]), targets: torch.Size([64])


In [15]:
# Train Test Val Split
def train_model(model, train_loader, val_loader, test_loader, criterion, optimizer, num_epochs=config.epochs):
    # Track best performance
    best_loss = float('inf')
    
    for epoch in range(num_epochs):
        print(f'Epoch {epoch+1}/{num_epochs}')
        print('-' * 10)
        
        # Training phase
        model.train()
        running_loss = 0.0
        all_train_preds = []
        all_train_targets = []
        
        for batch_idx, (data, target) in enumerate(train_loader):
            # Move data to device
            data = data.to(device).float()  # shape: (batch, seq_len, 2)
            target = target.to(device).float().unsqueeze(1)  # shape: (batch, 1)
            
            # Zero the parameter gradients
            optimizer.zero_grad()
            
            # Forward pass
            outputs = model(data)
            loss = criterion(outputs, target)
            
            # Backward pass and optimize
            loss.backward()
            optimizer.step()
            
            # Statistics
            running_loss += loss.item() * data.size(0)
            
            # Store predictions and targets for metrics
            all_train_preds.extend(outputs.detach().cpu().numpy())
            all_train_targets.extend(target.detach().cpu().numpy())
            
            # Log batch statistics (every 100 batches)
            if batch_idx % 100 == 99:
            # Calculate batch loss (MSE is already your loss function)
                batch_loss = running_loss / ((batch_idx + 1) * train_loader.batch_size)
    
                # Calculate batch MSE (since your loss is MSE, this is the same as batch_loss)
                batch_mse = batch_loss  # Because you're using MSELoss
    
                # Calculate batch MAE (Mean Absolute Error) as an additional metric
                batch_mae = np.mean(np.abs(np.array(all_train_preds[-100*train_loader.batch_size:]) - np.array(all_train_targets[-100*train_loader.batch_size:])))
                print(f'Batch {batch_idx+1}, Loss (MSE): {batch_loss:.4f}, MAE: {batch_mae:.4f}')
                wandb.log({
                    "train_batch_loss": batch_loss,
                    "train_batch_mse": batch_mse,  # Same as loss in this case
                    "train_batch_mae": batch_mae,
                    "epoch": epoch + batch_idx/len(train_loader)
                })
        
        # Calculate epoch statistics for training
        train_loss = running_loss / len(train_loader.dataset)
        train_mae = np.mean(np.abs(np.array(all_train_preds) - np.array(all_train_targets)))
        train_mse = np.mean((np.array(all_train_preds) - np.array(all_train_targets))**2)
        
        print(f'Train Loss: {train_loss:.4f}, Train MAE: {train_mae:.4f}, Train MSE: {train_mse:.4f}')
        
        # Validation phase
        model.eval()
        val_loss = 0.0
        all_val_preds = []
        all_val_targets = []
        
        with torch.no_grad():
            for data, target in val_loader:
                data = data.to(device).float()
                target = target.to(device).float().unsqueeze(1)
                
                # Forward pass
                outputs = model(data)
                loss = criterion(outputs, target)
                
                # Statistics
                val_loss += loss.item() * data.size(0)
                
                # Store predictions and targets
                all_val_preds.extend(outputs.cpu().numpy())
                all_val_targets.extend(target.cpu().numpy())
        
        # Calculate validation statistics
        val_loss = val_loss / len(val_loader.dataset)
        val_mae = np.mean(np.abs(np.array(all_val_preds) - np.array(all_val_targets)))
        val_mse = np.mean((np.array(all_val_preds) - np.array(all_val_targets))**2)
        val_r2 = 1 - (val_mse / np.var(all_val_targets))
        
        print(f'Validation Loss: {val_loss:.4f}, Val MAE: {val_mae:.4f}, Val MSE: {val_mse:.4f}, R²: {val_r2:.4f}')
        
        # Test phase (only at the end of training or periodically as needed)
        if epoch == num_epochs - 1 or epoch % 5 == 0:  # Test every 5 epochs and at the end
            test_loss = 0.0
            all_test_preds = []
            all_test_targets = []
            
            with torch.no_grad():
                for data, target in test_loader:
                    data = data.to(device).float()
                    target = target.to(device).float().unsqueeze(1)
                    
                    outputs = model(data)
                    loss = criterion(outputs, target)
                    
                    test_loss += loss.item() * data.size(0)
                    all_test_preds.extend(outputs.cpu().numpy())
                    all_test_targets.extend(target.cpu().numpy())
            
            test_loss = test_loss / len(test_loader.dataset)
            test_mae = np.mean(np.abs(np.array(all_test_preds) - np.array(all_test_targets)))
            test_mse = np.mean((np.array(all_test_preds) - np.array(all_test_targets))**2)
            test_r2 = 1 - (test_mse / np.var(all_test_targets))
            
            print(f'Test Loss: {test_loss:.4f}, Test MAE: {test_mae:.4f}, Test MSE: {test_mse:.4f}, R²: {test_r2:.4f}')
        else:
            test_loss = None
        
        # Log epoch statistics to W&B
        log_dict = {
            "epoch": epoch + 1,
            "train_loss": train_loss,
            "train_mae": train_mae,
            "train_mse": train_mse,
            "val_loss": val_loss,
            "val_mae": val_mae,
            "val_mse": val_mse,
            "val_r2": val_r2,
        }
        
        if test_loss is not None:
            log_dict.update({
                "test_loss": test_loss,
                "test_mae": test_mae,
                "test_mse": test_mse,
                "test_r2": test_r2,
            })
        
        wandb.log(log_dict)
        
        # Save model if it's the best so far (based on validation loss)
        if val_loss < best_loss:
            best_loss = val_loss
            model_save_path = os.path.join(save_dir, f'dual_rnn_best_epoch_{epoch+1}.pth')
            torch.save(model.state_dict(), model_save_path)
            wandb.save(model_save_path)
            
            # Log best model metrics to W&B summary
            wandb.run.summary["best_val_loss"] = best_loss
            wandb.run.summary["best_epoch"] = epoch + 1
            wandb.run.summary["best_val_mae"] = val_mae
            wandb.run.summary["best_val_r2"] = val_r2
    
    print(f'Best validation loss: {best_loss:.4f}')
    return model

# Main execution
if __name__ == "__main__":
    # Create a folder for saving models if it doesn't exist
    save_dir = "saved_models"
    os.makedirs(save_dir, exist_ok=True)
    
    # Prepare data loaders
    train_loader = prepare_dataloader(X_train, y_train, config.batch_size)
    val_loader = prepare_dataloader(X_val, y_val, config.batch_size)
    test_loader = prepare_dataloader(X_test, y_test, config.batch_size)
    
    # Initialize model
    model = DualRNN(
        input_size=config.input_size,
        hidden_size=config.hidden_size,
        dropout=config.dropout_percentage
    ).to(device)
    
    # Define loss and optimizer
    criterion = nn.MSELoss()
    optimizer = optim.Adam(
        model.parameters(),
        lr=config.learning_rate,
        betas=config.momentum,
        weight_decay=config.weight_decay
    )
    
    # Train the model
    model = train_model(model, train_loader, val_loader, test_loader, criterion, optimizer, config.epochs)
    
    # Save final model
    final_model_path = os.path.join(save_dir, "dual_rnn_final.pth")
    torch.save(model.state_dict(), final_model_path)
    wandb.save(final_model_path)
    
    wandb.finish()

Epoch 1/3
----------


Batch 100, Loss (MSE): 0.0572, MAE: 0.1891
Batch 200, Loss (MSE): 0.0448, MAE: 0.1404
Batch 300, Loss (MSE): 0.0383, MAE: 0.1236
Batch 400, Loss (MSE): 0.0341, MAE: 0.1135
Batch 500, Loss (MSE): 0.0314, MAE: 0.1105
Batch 600, Loss (MSE): 0.0294, MAE: 0.1076
Batch 700, Loss (MSE): 0.0278, MAE: 0.1043
Batch 800, Loss (MSE): 0.0265, MAE: 0.1043
Batch 900, Loss (MSE): 0.0256, MAE: 0.1053
Batch 1000, Loss (MSE): 0.0248, MAE: 0.1024
Batch 1100, Loss (MSE): 0.0241, MAE: 0.1032
Batch 1200, Loss (MSE): 0.0235, MAE: 0.1024
Batch 1300, Loss (MSE): 0.0229, MAE: 0.1010
Batch 1400, Loss (MSE): 0.0224, MAE: 0.1004
Batch 1500, Loss (MSE): 0.0221, MAE: 0.1029
Batch 1600, Loss (MSE): 0.0217, MAE: 0.1009
Batch 1700, Loss (MSE): 0.0214, MAE: 0.1006
Batch 1800, Loss (MSE): 0.0211, MAE: 0.1003
Batch 1900, Loss (MSE): 0.0209, MAE: 0.1023
Batch 2000, Loss (MSE): 0.0207, MAE: 0.1028
Batch 2100, Loss (MSE): 0.0205, MAE: 0.1014
Batch 2200, Loss (MSE): 0.0203, MAE: 0.0998
Batch 2300, Loss (MSE): 0.0202, MAE: 0.10

0,1
epoch,▁▁▁▁▂▂▃▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇████
test_loss,█▁
test_mae,█▁
test_mse,█▁
test_r2,▁█
train_batch_loss,█▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_batch_mae,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_batch_mse,█▆▅▄▃▃▃▃▃▃▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_loss,█▂▁
train_mae,█▂▁

0,1
best_epoch,3.0
best_val_loss,0.0161
best_val_mae,0.10009
best_val_r2,0.04839
epoch,3.0
test_loss,0.01623
test_mae,0.10038
test_mse,0.01623
test_r2,0.04776
train_batch_loss,0.01638
