In [11]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import r2_score
import pickle
import numpy as np

# Load data
dat = pickle.load(open('/home/maria/Documents/HarvardData/processed_sessions_v3/Bo220226/session_responses.p', 'rb'))
dino = pickle.load(open('/home/maria/Documents/HarvardData/processed_sessions_v3/Bo220226/dinov2_features.p', 'rb'))

# Define the Mixture of Experts model
class MixtureOfExperts(nn.Module):
    def __init__(self, input_size, num_experts, hidden_size, output_size):
        super(MixtureOfExperts, self).__init__()
        self.num_experts = num_experts
        # Define experts
        self.experts = nn.ModuleList([
            nn.Sequential(
                nn.Linear(input_size, output_size),
            )
            for _ in range(num_experts)
        ])
        # Define gating network
        self.gate = nn.Sequential(
            nn.Linear(input_size, num_experts),
            nn.Softmax(dim=1)  # Outputs weights for each expert
        )

    def forward(self, x):
        # Get expert predictions
        expert_outputs = torch.stack([expert(x) for expert in self.experts], dim=1)  # Shape: [batch_size, num_experts, output_size]
        # Get gating weights
        gating_weights = self.gate(x)  # Shape: [batch_size, num_experts]
        # Combine expert outputs using gating weights
        out = torch.sum(gating_weights.unsqueeze(2) * expert_outputs, dim=1)  # Weighted sum, Shape: [batch_size, output_size]
        return out

# Preprocess data
X = torch.tensor(dino, dtype=torch.float32)  # Features
y = torch.tensor(dat, dtype=torch.float32)  # Target with shape [n_samples, n_neurons]

# Leave the last 20% of the data for testing
n_samples = X.shape[0]
n_train = int(0.8 * n_samples)

X_train, X_test = X[:n_train], X[n_train:]
y_train, y_test = y[:n_train], y[n_train:]

# Hyperparameters
input_size = X_train.shape[1]
num_experts = 5  # Number of experts
hidden_size = 64
output_size = y.shape[1]  # Number of neurons
learning_rate = 0.001
num_epochs = 500
batch_size = 64

# Initialize model, loss, and optimizer
model = MixtureOfExperts(input_size, num_experts, hidden_size, output_size)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=10e-4)

# Training loop
for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0
    for i in range(0, X_train.shape[0], batch_size):
        X_batch = X_train[i:i+batch_size]
        y_batch = y_train[i:i+batch_size]
        
        # Forward pass
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        
        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()
    
    # Print average loss for the epoch
    #if (epoch+1) % 10 == 0:
        #print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss / (n_train // batch_size):.4f}")

# Evaluate variance explained (R²)
model.eval()
with torch.no_grad():
    y_train_pred = model(X_train).numpy()  # Shape [n_train, n_neurons]
    y_test_pred = model(X_test).numpy()    # Shape [n_test, n_neurons]

variance_explained_train = r2_score(y_train.numpy(), y_train_pred, multioutput="raw_values")
variance_explained_test = r2_score(y_test.numpy(), y_test_pred, multioutput="raw_values")

print(f"Variance explained (R²) on the training set: {variance_explained_train}")
print(f"Variance explained (R²) on the test set: {variance_explained_test}")


Variance explained (R²) on the training set: [0.9527528  0.9381931  0.96555436 0.9501871  0.943406   0.96696633
 0.95197284 0.95345265 0.94243646 0.96255726 0.926697   0.9516869
 0.9525622  0.95349556 0.9475264  0.9433099  0.9621101  0.94202834
 0.956881   0.9372827  0.96248966 0.9478694  0.94123274 0.9663038
 0.93710375 0.9557915  0.9467427  0.9403641  0.93300885 0.9386936
 0.93176067 0.933885   0.9637079  0.94783753 0.94989383 0.9743374
 0.9493982  0.9384288  0.9716013  0.9534048  0.9482876  0.956078
 0.9471043  0.9520186  0.9496851  0.9530029  0.9418854  0.94002986
 0.95057607 0.95658857 0.9579884  0.96112096 0.9460712  0.9479249
 0.92547154 0.71191645 0.96214634 0.9478723  0.9291166  0.96552646
 0.9309148  0.93525404 0.93012834 0.9540305 ]
Variance explained (R²) on the test set: [-0.85414577 -0.78867185 -1.2811143  -1.3728731  -1.2536304  -0.35286176
 -0.4562074  -0.9741708  -1.4264975  -1.2995806  -0.6586654  -1.4967833
 -1.5886977  -1.3701291  -0.6749073  -0.5448824  -0.982445  

In [19]:
import pickle
import numpy as np
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import r2_score

# Load data
dat = pickle.load(open('/home/maria/Documents/HarvardData/processed_sessions_v3/Bo220226/session_responses.p', 'rb'))
dino = pickle.load(open('/home/maria/Documents/HarvardData/processed_sessions_v3/Bo220226/dinov2_features.p', 'rb'))

# Preprocess data
X = dino  # Features
y = dat # Targets (neural activity) with shape [n_samples, n_neurons]

# Leave the last 20% of the data for testing
n_samples = X.shape[0]
n_train = int(0.8 * n_samples)

X_train, X_test = X[:n_train], X[n_train:]
y_train, y_test = y[:n_train], y[n_train:]

# Initialize a GradientBoostingRegressor for each neuron
n_neurons = y.shape[1]
models = []
r2_scores_train = []
r2_scores_test = []

#n_neurons=3
# Train a separate model for each neuron
for neuron_idx in range(n_neurons):
    model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3)
    #print(neuron_idx)
    model.fit(X_train, y_train[:, neuron_idx])  # Train on this neuron's activity
    models.append(model)
    
    # Predict for this neuron
    y_train_pred = model.predict(X_train)
    y_test_pred = model.predict(X_test)
    
    # Calculate R² scores
    r2_train = r2_score(y_train[:, neuron_idx], y_train_pred)
    r2_test = r2_score(y_test[:, neuron_idx], y_test_pred)
    
    r2_scores_train.append(r2_train)
    r2_scores_test.append(r2_test)

# Print variance explained (R²) for each neuron
print("Variance explained (R²) on the training set for each neuron:", r2_scores_train)
print("Variance explained (R²) on the test set for each neuron:", r2_scores_test)

# Mean R² across all neurons
print(f"Mean R² on the training set: {np.mean(r2_scores_train):.4f}")
print(f"Mean R² on the test set: {np.mean(r2_scores_test):.4f}")


Variance explained (R²) on the training set for each neuron: [0.7970113655192915, 0.8056333321882915, 0.7126843482731282, 0.7678243344359418, 0.7405105986964463, 0.8111625779034071, 0.799938590419078, 0.7354060569333043, 0.7469261273240286, 0.7361924925784435, 0.792606718347316, 0.7604521667122904, 0.713761461112131, 0.7754371433003728, 0.7512113936169246, 0.8059645743612212, 0.7701486538240004, 0.6947529816472339, 0.7214695850181305, 0.7641990240870085, 0.7487137200767173, 0.7243423020773492, 0.807718972549369, 0.7382277282053423, 0.8286858715052748, 0.7259220434148288, 0.7555905076094305, 0.7587126161129287, 0.810729815645135, 0.7311482582607586, 0.7475223441913976, 0.7454013832376967, 0.7231105634930435, 0.7465019782402849, 0.7639901980218509, 0.7890528488990551, 0.7615541104625876, 0.8178673375526321, 0.7375746551612632, 0.8085261521977188, 0.7499719965427833, 0.8084209077294616, 0.782997048309156, 0.8149794066550237, 0.7258519441784792, 0.8161828636395753, 0.6675897866469795, 0.76