In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm  # Import tqdm for progress tracking


class TrajectoryDataset(Dataset):
    def __init__(self, dataframe, window_length=100):
        # Perform the custom transformation
        sliced_df = self.custom_transformation(dataframe.to_numpy(), window_length=window_length)
        self.data = torch.tensor(sliced_df, dtype=torch.float32)

    def __len__(self):
        # Return the number of trajectories
        return self.data.shape[0]

    def __getitem__(self, idx):
        # Get the trajectory at the given index
        return self.data[idx]

    def custom_transformation(self, dataframe_array, window_length):
        num_rows, num_cols = dataframe_array.shape
        window_length += 1  # get one more column as targets

        # Preallocate memory for the slices
        sliced_data = np.lib.stride_tricks.sliding_window_view(dataframe_array, window_shape=(window_length,), axis=1)
        
        # Reshape into a flat 2D array for DataFrame-like output
        sliced_data = sliced_data.reshape(-1, window_length)

        return sliced_data
    
# Implement your model
# Implement your model
class MLP(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(MLP, self).__init__()
        
        # Layer dimensions
        self.h1_size = hidden_size
        self.h2_size = hidden_size // 2
        self.h3_size = hidden_size // 4
        
        # Input processing
        self.input_norm = nn.LayerNorm(input_size)
        self.input_dropout = nn.Dropout(0.1)
        
        # Main layers
        self.fc1 = nn.Linear(input_size, self.h1_size)
        self.bn1 = nn.BatchNorm1d(self.h1_size)
        
        self.fc2 = nn.Linear(self.h1_size, self.h2_size)
        self.bn2 = nn.BatchNorm1d(self.h2_size)
        
        self.fc3 = nn.Linear(self.h2_size, self.h3_size)
        self.bn3 = nn.BatchNorm1d(self.h3_size)
        
        # Output layer
        self.fc_out = nn.Linear(self.h3_size, output_size)
        
        # Activation functions
        self.silu = nn.SiLU()  # More advanced than ReLU
        self.dropout = nn.Dropout(0.2)
        
        # Initialize weights
        self._init_weights()
    
    def _init_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
    
    def forward(self, x):
        # Input processing
        x = self.input_norm(x)
        x = self.input_dropout(x)
        
        # First block
        x = self.fc1(x)
        x = self.bn1(x)
        x = self.silu(x)
        x = self.dropout(x)
        
        # Second block
        x = self.fc2(x)
        x = self.bn2(x)
        x = self.silu(x)
        x = self.dropout(x)
        
        # Third block
        x = self.fc3(x)
        x = self.bn3(x)
        x = self.silu(x)
        x = self.dropout(x)
        
        # Output
        x = self.fc_out(x)
        
        return x

In [2]:
import os

# Get the relative path of a file in the current working directory
train_path = os.path.join('/home/pkancha3/Desktop/train.csv')
val_path = os.path.join('/home/pkancha3/Desktop/val.csv')
test_path = os.path.join('/home/pkancha3/Desktop/test.csv')

train_df = pd.read_csv(train_path, header = 0).drop('ids', axis=1)
val_df = pd.read_csv(val_path, header = 0).drop('ids', axis=1)
test_df = pd.read_csv(test_path, header = 0).drop('ids', axis=1)


In [3]:
# Check if MPS is available and set the device accordingly
device = torch.device('cpu')
if torch.cuda.is_available():
    device = torch.device("cuda")

window_length = 100  # Example window length
dataset = TrajectoryDataset(dataframe=train_df, window_length=window_length)
batch_size = 128
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# Model hyperparameters
input_size = window_length  # Window length minus 1 (since the last column is the target)
hidden_size = 512
output_size = 1  # Single output for time series forecast (next value)
learning_rate = 0.001
num_epochs = 1

# Instantiate the model, loss function, and optimizer
model = MLP(input_size, hidden_size, output_size).to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-5)

# Training loop with tqdm for progress tracking
for epoch in tqdm(range(num_epochs), desc="Epochs", unit="epoch"):
    model.train()
    running_loss = 0.0
    # Use tqdm to track batch progress within each epoch
    for batch_idx, data in tqdm(enumerate(dataloader), desc=f"Epoch {epoch + 1}", unit="batch", leave=False):
        # Separate inputs and targets
        inputs = data[:, :-1].to(device)  # All except last column
        targets = data[:, -1].to(device)  # Last column is the target (next value)
        
        # Zero the parameter gradients
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(inputs)
        
        # Compute the loss
        loss = criterion(outputs.squeeze(), targets)
        
        # Backward pass and optimize
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    # Print the average loss per epoch
    print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {running_loss / len(dataloader):.4f}')

Epochs:   0%|          | 0/1 [00:00<?, ?epoch/s]
Epoch 1: 0batch [00:00, ?batch/s][A
Epoch 1: 1batch [00:00,  2.27batch/s][A
Epoch 1: 51batch [00:00, 123.63batch/s][A
Epoch 1: 102batch [00:00, 222.45batch/s][A
Epoch 1: 153batch [00:00, 298.29batch/s][A
Epoch 1: 204batch [00:00, 355.33batch/s][A
Epoch 1: 255batch [00:00, 397.03batch/s][A
Epoch 1: 306batch [00:01, 427.19batch/s][A
Epoch 1: 357batch [00:01, 449.27batch/s][A
Epoch 1: 408batch [00:01, 465.13batch/s][A
Epoch 1: 459batch [00:01, 475.56batch/s][A
Epoch 1: 509batch [00:01, 482.23batch/s][A
Epoch 1: 559batch [00:01, 486.83batch/s][A
Epoch 1: 609batch [00:01, 490.29batch/s][A
Epoch 1: 659batch [00:01, 492.56batch/s][A
Epoch 1: 709batch [00:01, 494.34batch/s][A
Epoch 1: 759batch [00:01, 495.45batch/s][A
Epoch 1: 809batch [00:02, 496.20batch/s][A
Epoch 1: 859batch [00:02, 497.16batch/s][A
Epoch 1: 909batch [00:02, 497.58batch/s][A
Epoch 1: 959batch [00:02, 497.83batch/s][A
Epoch 1: 1009batch [00:02, 498.06batch

Epoch [1/1], Loss: 0.0016





In [11]:
# Convert validation and test data to tensors
train_set = torch.tensor(train_df.values[:,:].astype(np.float32), dtype=torch.float32).to(device)
val_set = torch.tensor(val_df.values[:,:].astype(np.float32), dtype=torch.float32).to(device)
test_set = torch.tensor(test_df.values[:,:].astype(np.float32), dtype=torch.float32).to(device)

# Define points to predict based on validation set shape
points_to_predict = val_set.shape[1]

def autoregressive_predict(model, input_matrix, prediction_length=None):
    """
    Perform memory-efficient autoregressive prediction using the trained model.
    
    Args:
    - model: The trained PyTorch model
    - input_matrix: Initial input matrix (shape: batch_size x window_length)
    - prediction_length: Length of future trajectory to predict
    
    Returns:
    - Predicted trajectories tensor
    """
    if prediction_length is None:
        prediction_length = points_to_predict
        
    model.eval()  # Set model to evaluation mode
    batch_size = input_matrix.shape[0]
    output_matrix = torch.empty(batch_size, 0, device=device)
    current_input = input_matrix.clone()
    
    # Use batch processing for memory efficiency
    with torch.no_grad():
        for step in tqdm(range(prediction_length), desc="Generating predictions"):
            # Process predictions in batches
            predictions = model(current_input)
            
            # Append predictions
            output_matrix = torch.cat((output_matrix, predictions), dim=1)
            
            # Update input sequence for next prediction
            current_input = torch.cat((current_input[:, 1:], predictions), dim=1)
            
            # Optional: Free up memory
            if step % 100 == 0:
                torch.cuda.empty_cache()
    
    return output_matrix

# Start evaluation process
print("Starting evaluation...")

# Get initial input sequence
initial_input = train_set[:, -window_length:]

# Generate predictions for validation set
print("Generating validation predictions...")
val_predictions = autoregressive_predict(model, initial_input)

# Calculate metrics
with torch.no_grad():
    # MSE Loss
    mse_loss = nn.MSELoss()
    validation_mse = mse_loss(val_predictions, val_set)
    
    # Additional metrics
    mae = torch.abs(val_predictions - val_set).mean()
    rmse = torch.sqrt(validation_mse)

print("\nEvaluation Metrics:")
print(f"MSE: {validation_mse.item():.6f}")
print(f"MAE: {mae.item():.6f}")
print(f"RMSE: {rmse.item():.6f}")

# Visualization for the first three sequences
# Create three separate plots
for i in range(3):
    plt.figure(figsize=(4,4))
    
    # Get trajectory for validation region (first 100 points)
    trajectory = val_predictions_np[i, :100]
    
    # Plot with specified formatting
    plt.plot(trajectory, 
             color='black',
             linewidth=3,
             linestyle='-')
    
    plt.title(f'Trajectory {i}')
    plt.xlabel('Time Step')
    plt.ylabel('Value')
    plt.grid(True)
    
    # Ensure consistent axis limits across plots
    plt.ylim(0, max(val_predictions_np[:3, :100].max() * 1.1, 0.3))
    
    plt.tight_layout()
    plt.savefig(f'trajectory_img{i}.png', dpi=200, bbox_inches='tight')
    plt.close()
# Generate test predictions
print("\nGenerating test predictions...")
initial_input = val_predictions[:, -window_length:]
test_predictions = autoregressive_predict(model, initial_input)

# Save predictions if needed
try:
    torch.save({
        'validation_predictions': val_predictions.cpu(),
        'test_predictions': test_predictions.cpu(),
        'metrics': {
            'mse': validation_mse.item(),
            'mae': mae.item(),
            'rmse': rmse.item()
        }
    }, 'prediction_results.pt')
except Exception as e:
    print(f"Error saving results: {str(e)}")

Starting evaluation...
Generating validation predictions...


Generating predictions: 100%|██████████| 1500/1500 [00:00<00:00, 2566.04it/s]



Evaluation Metrics:
MSE: 0.002015
MAE: 0.029032
RMSE: 0.044890

Generating test predictions...


Generating predictions: 100%|██████████| 1500/1500 [00:00<00:00, 2560.66it/s]


In [5]:
def generate_submissions_v4(pred_val_tensor, pred_test_tensor, original_val_path, original_test_path):
    """
    Generate competition submission file from model predictions.
    
    Args:
    - pred_val_tensor: Validation predictions tensor
    - pred_test_tensor: Test predictions tensor
    - original_val_path: Path to original validation CSV
    - original_test_path: Path to original test CSV
    """
    # Read the original validation and testing datasets
    original_val_df = pd.read_csv(original_val_path)
    original_test_df = pd.read_csv(original_test_path)
    
    # Verify tensor shapes
    assert pred_val_tensor.shape[0] * pred_val_tensor.shape[1] == original_val_df.shape[0] * (original_val_df.shape[1] - 1), \
        "Validation predictions shape mismatch"
    assert pred_test_tensor.shape[0] * pred_test_tensor.shape[1] == original_test_df.shape[0] * (original_test_df.shape[1] - 1), \
        "Test predictions shape mismatch"
    
    # Create empty lists to store ids and values
    ids = []
    values = []
    
    # Move tensors to CPU and convert to numpy if needed
    if torch.is_tensor(pred_val_tensor):
        pred_val_tensor = pred_val_tensor.cpu().numpy()
    if torch.is_tensor(pred_test_tensor):
        pred_test_tensor = pred_test_tensor.cpu().numpy()
    
    # Process validation set
    for col_idx, col in enumerate(original_val_df.columns[1:]):  # Skip the 'ids' column
        for row_idx, _ in enumerate(original_val_df[col]):
            ids.append(str(f"{col}_traffic_val_{row_idx}"))
            values.append(float(pred_val_tensor[row_idx, col_idx]))
    
    # Process testing set
    for col_idx, col in enumerate(original_test_df.columns[1:]):  # Skip the 'ids' column
        for row_idx, _ in enumerate(original_test_df[col]):
            ids.append(str(f"{col}_traffic_test_{row_idx}"))
            values.append(float(pred_test_tensor[row_idx, col_idx]))
    
    # Create the submissions dataframe
    submissions_df = pd.DataFrame({
        "ids": ids,
        "value": values
    })
    
    # Verify data quality
    # Check for NaN values
    if submissions_df.isna().any().any():
        print("Warning: NaN values detected in predictions. Filling with 100...")
        submissions_df.fillna(100, inplace=True)
    
    # Check for negative values
    if (submissions_df['value'] < 0).any():
        print("Warning: Negative values detected. Clipping to 0...")
        submissions_df['value'] = submissions_df['value'].clip(lower=0)
    
    # Final assertions
    assert submissions_df.shape[1] == 2, "Submission should have exactly 2 columns"
    assert submissions_df.shape[0] == (original_val_df.shape[0] * (original_val_df.shape[1] - 1)) + \
           (original_test_df.shape[0] * (original_test_df.shape[1] - 1)), "Incorrect number of predictions"
    assert "ids" in submissions_df.columns, "Missing 'ids' column"
    assert "value" in submissions_df.columns, "Missing 'value' column"
    
    # Save to CSV without index
    submissions_df.to_csv('submissions.csv', index=False)
    print(f"Submission file created successfully with {len(submissions_df)} predictions")
    
    # Print sample of predictions
    print("\nFirst few predictions:")
    print(submissions_df.head())
    print("\nLast few predictions:")
    print(submissions_df.tail())

# Generate submission file
generate_submissions_v4(
    val_predictions,
    test_predictions,
    '/home/pkancha3/Desktop/val.csv',
    '/home/pkancha3/Desktop/test.csv'
)

Submission file created successfully with 2889000 predictions

First few predictions:
                           ids     value
0  2008-11-20 01_traffic_val_0  0.019827
1  2008-11-20 01_traffic_val_1  0.021939
2  2008-11-20 01_traffic_val_2  0.021669
3  2008-11-20 01_traffic_val_3  0.016623
4  2008-11-20 01_traffic_val_4  0.018902

Last few predictions:
                                    ids     value
2888995  2009-03-31 00_traffic_test_958  0.098091
2888996  2009-03-31 00_traffic_test_959  0.081974
2888997  2009-03-31 00_traffic_test_960  0.087486
2888998  2009-03-31 00_traffic_test_961  0.090306
2888999  2009-03-31 00_traffic_test_962  0.056568
