In [2]:
import numpy as np
import pickle

# Set random seed for reproducibility
np.random.seed(1337)

# Define paths
image_path = '/home/maria/Documents/HarvardData/Images'
session_ims = pickle.load(open('/home/maria/Documents/HarvardData/processed_sessions_v3/Bo220226/session_images.p','rb'))

# Construct full image paths
image_paths = np.array([f"{image_path}/{im.split('/')[2]}" for im in session_ims])

# Total number of images
n_total = len(session_ims)
print(f"Total number of images: {n_total}")

# Define the number of training samples
n_train = 1000

# Ensure that n_train does not exceed n_total
if n_train > n_total:
    raise ValueError("Number of training samples exceeds the total number of available images.")

# Randomly select unique training indices without replacement
training_path_inds = np.random.choice(n_total, size=n_train, replace=False)
training_paths = image_paths[training_path_inds]

# Determine test indices as those not in training_path_inds
test_inds = np.setdiff1d(np.arange(n_total), training_path_inds)
test_paths = image_paths[test_inds]

# Print shapes to verify
print(f"Training indices shape: {training_path_inds.shape}")  # Should be (1000,)
print(f"Number of test samples: {len(test_paths)}")           # Should be n_total - 1000

# Optional: Verify no overlap between training and test sets
overlap = np.intersect1d(training_paths, test_paths)
print(f"Number of overlapping images between training and test sets: {len(overlap)}")  # Should be 0

Total number of images: 1250
Training indices shape: (1000,)
Number of test samples: 250
Number of overlapping images between training and test sets: 0


In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import r2_score
import pickle
import numpy as np

# Load data
dat = pickle.load(open('/home/maria/Documents/HarvardData/processed_sessions_v3/Bo220226/session_responses.p', 'rb'))
vit_train=np.load("Bo220226_training_set.npy")
vit_test=np.load("Bo220226_test_set.npy")
# Define the Mixture of Experts model
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import r2_score
import pickle
import numpy as np

# Load data
dat = pickle.load(open('/home/maria/Documents/HarvardData/processed_sessions_v3/Bo220226/session_responses.p', 'rb'))
dino = pickle.load(open('/home/maria/Documents/HarvardData/processed_sessions_v3/Bo220226/dinov2_features.p', 'rb'))

# Define the Mixture of Experts model
class MixtureOfExperts(nn.Module):
    def __init__(self, input_size, num_experts, hidden_size, output_size):
        super(MixtureOfExperts, self).__init__()
        self.num_experts = num_experts
        # Define experts
        self.experts = nn.ModuleList([
            nn.Sequential(
                nn.Linear(input_size, hidden_size),
                nn.ReLU(),
                nn.Linear(hidden_size, output_size)
            )
            for _ in range(num_experts)
        ])
        # Define gating network
        self.gate = nn.Sequential(
            nn.Linear(input_size, num_experts),
            nn.Softmax(dim=1)  # Outputs weights for each expert
        )
    
    def forward(self, x):
        # Get expert predictions
        expert_outputs = torch.stack([expert(x) for expert in self.experts], dim=1)  # Shape: [batch_size, num_experts, output_size]
        # Get gating weights
        gating_weights = self.gate(x)  # Shape: [batch_size, num_experts]
        # Combine expert outputs using gating weights
        out = torch.sum(gating_weights.unsqueeze(2) * expert_outputs, dim=1)  # Weighted sum, Shape: [batch_size, output_size]
        return out

# Preprocess data
X_train = torch.tensor(vit_train, dtype=torch.float32)  # Features
y_train = torch.tensor(dat[training_path_inds], dtype=torch.float32)  # Target with shape [n_samples, n_neurons]

X_test = torch.tensor(vit_test, dtype=torch.float32)  # Features
y_test = torch.tensor(dat[test_inds], dtype=torch.float32)  # Target with shape [n_samples, n_neurons]

# Hyperparameters
input_size = X_train.shape[1]
num_experts = 5  # Number of experts
hidden_size = 64
output_size = y_train.shape[1]  # Number of neurons
learning_rate = 0.001
num_epochs = 500
batch_size = 64

# Initialize model, loss, and optimizer
model = MixtureOfExperts(input_size, num_experts, hidden_size, output_size)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=10e-4)

# Training loop
for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0
    for i in range(0, X_train.shape[0], batch_size):
        X_batch = X_train[i:i+batch_size]
        y_batch = y_train[i:i+batch_size]
        
        # Forward pass
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        
        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()
    
    # Print average loss for the epoch
    #if (epoch+1) % 10 == 0:
        #print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss / (n_train // batch_size):.4f}")

# Evaluate variance explained (R²)
model.eval()
with torch.no_grad():
    y_train_pred = model(X_train).numpy()  # Shape [n_train, n_neurons]
    y_test_pred = model(X_test).numpy()    # Shape [n_test, n_neurons]

variance_explained_train = r2_score(y_train.numpy(), y_train_pred, multioutput="raw_values")
variance_explained_test = r2_score(y_test.numpy(), y_test_pred, multioutput="raw_values")

print(f"Variance explained (R²) on the training set: {variance_explained_train}")
print(f"Variance explained (R²) on the test set: {variance_explained_test}")

Variance explained (R²) on the training set: [0.8349749  0.8138107  0.42494756 0.73305345 0.68525225 0.842323
 0.8772022  0.7027802  0.7135692  0.65651894 0.8669124  0.6618984
 0.5850935  0.7388348  0.7991997  0.8575024  0.76986647 0.7032612
 0.5775132  0.82820976 0.68129796 0.67123973 0.8602554  0.47174907
 0.9310956  0.6202959  0.82132924 0.80876493 0.9222805  0.79376
 0.83649087 0.7181926  0.6347059  0.7531076  0.80820084 0.7126083
 0.72787476 0.84767586 0.5548817  0.82718146 0.6883861  0.8357412
 0.7818674  0.8684794  0.6813617  0.90233374 0.65151197 0.76616913
 0.8647225  0.8129101  0.68562996 0.68224835 0.9086205  0.6739396
 0.957567   0.99481875 0.823471   0.8300817  0.9682656  0.5404942
 0.79748523 0.92728287 0.8116495  0.633765  ]
Variance explained (R²) on the test set: [-0.23375165 -0.1251105  -0.3243153  -0.8235593  -0.3815062  -0.10669672
 -0.31467128 -0.41491437 -0.48566186 -0.24415779 -0.1787399  -0.66482234
 -0.41370142 -0.6764833  -0.5380199  -0.21142936 -0.29658878 -0

In [9]:
augmented_set=np.load("/home/maria/MousePipeline/harvard_pipeline/Bo220226_augmentations.npy")

y_augmented = model(torch.tensor(augmented_set, dtype=torch.float32))

In [10]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import r2_score
import pickle
import numpy as np

# Load data
dat = pickle.load(open('/home/maria/Documents/HarvardData/processed_sessions_v3/Bo220226/session_responses.p', 'rb'))
vit_train=np.load("Bo220226_training_set.npy")
vit_test=np.load("Bo220226_test_set.npy")
# Define the Mixture of Experts model
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import r2_score
import pickle
import numpy as np

# Load data
dat = pickle.load(open('/home/maria/Documents/HarvardData/processed_sessions_v3/Bo220226/session_responses.p', 'rb'))
dino = pickle.load(open('/home/maria/Documents/HarvardData/processed_sessions_v3/Bo220226/dinov2_features.p', 'rb'))

# Define the Mixture of Experts model
class MixtureOfExperts(nn.Module):
    def __init__(self, input_size, num_experts, hidden_size, output_size):
        super(MixtureOfExperts, self).__init__()
        self.num_experts = num_experts
        # Define experts
        self.experts = nn.ModuleList([
            nn.Sequential(
                nn.Linear(input_size, hidden_size),
                nn.ReLU(),
                nn.Linear(hidden_size, output_size)
            )
            for _ in range(num_experts)
        ])
        # Define gating network
        self.gate = nn.Sequential(
            nn.Linear(input_size, num_experts),
            nn.Softmax(dim=1)  # Outputs weights for each expert
        )
    
    def forward(self, x):
        # Get expert predictions
        expert_outputs = torch.stack([expert(x) for expert in self.experts], dim=1)  # Shape: [batch_size, num_experts, output_size]
        # Get gating weights
        gating_weights = self.gate(x)  # Shape: [batch_size, num_experts]
        # Combine expert outputs using gating weights
        out = torch.sum(gating_weights.unsqueeze(2) * expert_outputs, dim=1)  # Weighted sum, Shape: [batch_size, output_size]
        return out

# Preprocess data
X_train = torch.tensor(vit_train, dtype=torch.float32)  # Features
y_train = torch.tensor(dat[training_path_inds], dtype=torch.float32)  # Target with shape [n_samples, n_neurons]
X_augmented=torch.tensor(augmented_set, dtype=torch.float32)
# Concatenate the original training features with augmented features
X_train_combined = torch.cat((X_train, X_augmented), dim=0)  # Shape: [n_train + n_augmented, feature_dim]

# Concatenate the original training targets with augmented targets
y_train_combined = torch.cat((y_train, y_augmented), dim=0)  #

X_test = torch.tensor(vit_test, dtype=torch.float32)  # Features
y_test = torch.tensor(dat[test_inds], dtype=torch.float32)  # Target with shape [n_samples, n_neurons]

# Hyperparameters
input_size = X_train.shape[1]
num_experts = 5  # Number of experts
hidden_size = 64
output_size = y_train.shape[1]  # Number of neurons
learning_rate = 0.001
num_epochs = 500
batch_size = 64

# Initialize model, loss, and optimizer
model = MixtureOfExperts(input_size, num_experts, hidden_size, output_size)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=10e-4)

# Training loop
for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0
    for i in range(0, X_train.shape[0], batch_size):
        X_batch = X_train[i:i+batch_size]
        y_batch = y_train[i:i+batch_size]
        
        # Forward pass
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        
        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()
    
    # Print average loss for the epoch
    #if (epoch+1) % 10 == 0:
        #print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss / (n_train // batch_size):.4f}")

# Evaluate variance explained (R²)
model.eval()
with torch.no_grad():
    y_train_pred = model(X_train).numpy()  # Shape [n_train, n_neurons]
    y_test_pred = model(X_test).numpy()    # Shape [n_test, n_neurons]

variance_explained_train = r2_score(y_train.numpy(), y_train_pred, multioutput="raw_values")
variance_explained_test = r2_score(y_test.numpy(), y_test_pred, multioutput="raw_values")

print(f"Variance explained (R²) on the training set: {variance_explained_train}")
print(f"Variance explained (R²) on the test set: {variance_explained_test}")

Combined training features shape: torch.Size([2000, 768])
Combined training targets shape: torch.Size([2000, 64])
Training batches: 29
Validation batches: 4
MixtureOfExperts(
  (experts): ModuleList(
    (0-2): 3 x Sequential(
      (0): Linear(in_features=768, out_features=128, bias=True)
      (1): ReLU()
      (2): Dropout(p=0.3, inplace=False)
      (3): Linear(in_features=128, out_features=64, bias=True)
    )
  )
  (gate): Sequential(
    (0): Linear(in_features=768, out_features=3, bias=True)
    (1): Softmax(dim=1)
  )
)




RuntimeError: Trying to backward through the graph a second time (or directly access saved tensors after they have already been freed). Saved intermediate values of the graph are freed when you call .backward() or autograd.grad(). Specify retain_graph=True if you need to backward through the graph a second time or if you need to access saved tensors after calling backward.