In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_digits
from sklearn.preprocessing import StandardScaler
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import random_split
import random
import time
from torchvision import datasets, transforms

def set_seed(seed):
    random.seed(seed)  # Set seed for random module
    np.random.seed(seed)  # Set seed for NumPy
    torch.manual_seed(seed)  # Set seed for PyTorch CPU
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)  # Set seed for PyTorch GPU (if using CUDA)
        torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True  # Ensure deterministic behavior in PyTorch
    torch.backends.cudnn.benchmark = False  # Disable benchmark mode for reproducibility

# Set a specific seed value for reproducibility
set_seed(42)

In [None]:
class ThoughtLayer(nn.Module):
    def __init__(self, input_size, num_neurons):
        super(ThoughtLayer, self).__init__()
        # Initialize weights and biases for the neurons
        self.weights = nn.Parameter(torch.randn(num_neurons, input_size))
        self.biases = nn.Parameter(torch.randn(num_neurons))

        # Initialize positions for neurons in 1D space (linear positions)
        self.positions = nn.Parameter(torch.rand(num_neurons))  # 1D positions

        # Parameters for influence and movement
        self.activation_threshold = 0.8
        self.learning_rate_position = 0.01
        self.influence_radius = 0.5  # Radius for nearby neuron influence
        self.influence_strength = 0.2
        self.barnes_hut_theta = 0.7  # Larger value for faster approximation

    def forward(self, x):
        # Flatten the input if it's coming from an image (batch_size, 1, 28, 28) to (batch_size, 784)
        if len(x.shape) == 4:  # Check if input is 4D (batch_size, channels, height, width)
            x = x.view(x.size(0), -1)  # Flatten to (batch_size, 784)

        # Compute activations
        weighted_sum = torch.matmul(x, self.weights.T) + self.biases
        activations = torch.sigmoid(weighted_sum)

        # Update positions for high-value neurons
        self.update_positions(activations)

        # Apply Barnes-Hut approximation to influence neurons
        activations = self.influence_neurons(activations)

        return activations

    def update_positions(self, activations):
        high_value_neurons = activations > self.activation_threshold
        high_value_indices = high_value_neurons.nonzero(as_tuple=True)[0]

        if len(high_value_indices) > 1:
            # Vectorized average position update
            avg_position = self.positions[high_value_indices].mean()

            with torch.no_grad():
                self.positions[high_value_indices] -= self.learning_rate_position * (self.positions[high_value_indices] - avg_position)

    def influence_neurons(self, activations):
        # Vectorized distance calculation
        batch_size = activations.size(0)
        positions_expanded = self.positions.unsqueeze(0).expand(batch_size, -1)

        distances = torch.abs(positions_expanded.unsqueeze(2) - positions_expanded.unsqueeze(1))  # Shape: [batch_size, num_neurons, num_neurons]

        # Apply the influence only to nearby neurons using a mask
        nearby_mask = distances < self.influence_radius  # Boolean mask for nearby neurons
        distant_mask = distances >= self.influence_radius * self.barnes_hut_theta

        # Vectorized influence application for nearby neurons
        nearby_distances = distances * nearby_mask
        influence_nearby = self.influence_strength * (1 - nearby_distances.mean(dim=2) / self.influence_radius)

        updated_activations = activations.clone() + influence_nearby

        # Vectorized Barnes-Hut approximation for distant neurons
        distant_distances = distances * distant_mask
        distant_positions_mean = self.positions.unsqueeze(1).mean(dim=1)  # Approximate mean position of distant neurons
        distant_influence = self.influence_strength * (1 - torch.abs(self.positions - distant_positions_mean) / self.influence_radius)

        updated_activations += distant_influence

        # Ensure activations stay in [0, 1] range
        return torch.clamp(updated_activations, 0, 1)

In [None]:
class ThoughtModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_custom_neurons):
        super(ThoughtModel, self).__init__()

        # First hidden layer (custom thought layer)
        self.thought_layer = ThoughtLayer(input_size=input_size, num_neurons=num_custom_neurons)

        # Second hidden layer (custom thought layer)
        self.thought_layer2 = ThoughtLayer(input_size=num_custom_neurons, num_neurons=hidden_size)

        # Output layer (fully connected)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = self.thought_layer(x)
        x = torch.relu(self.thought_layer2(x))
        x = self.fc(x)
        return x

In [None]:
#Define the transformations to normalize the data
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

# Create DataLoader objects for batching
train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)
# Assuming train_dataset is the dataset used in train_loader
train_size = int(0.8 * len(train_dataset))  # 80% for training
val_size = len(train_dataset) - train_size  # 20% for validation
train_dataset, val_dataset = random_split(train_dataset, [train_size, val_size])

# Create new DataLoader for the validation dataset
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32)

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
# Initialize the model (make sure input/output sizes match the dataset)
input_size = 784  # 784 features in the digits dataset
hidden_size = 128  # You can adjust this
output_size = 10  # Number of classes (10 for digits dataset)
num_custom_neurons = 64  # Adjust as needed

model = ThoughtModel(input_size=input_size, hidden_size=hidden_size, output_size=output_size, num_custom_neurons=num_custom_neurons)

# Move model to the device
model.to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()  # For classification tasks
optimizer = optim.Adam(model.parameters(), lr=0.001)  # Adam optimizer


In [None]:
# Hyperparameters
num_epochs = 30

# Initialize lists to store training losses and accuracies
train_losses = []
train_accuracies = []

# Initialize lists to store positions of both thought layers
all_positions_layer1 = []
all_positions_layer2 = []

# Training loop
for epoch in range(num_epochs):
    start_time = time.time()
    model.train()  # Set the model to training mode
    epoch_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in train_loader:
        # Move data to the device
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()  # Clear previous gradients

        # Forward pass
        outputs = model(inputs)

        # Compute loss
        loss = criterion(outputs, labels)
        epoch_loss += loss.item()

        # Backward pass
        loss.backward()
        optimizer.step()  # Update weights

        # Calculate accuracy
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    # Calculate average loss and accuracy for this epoch
    avg_loss = epoch_loss / len(train_loader)
    accuracy = correct / total

    # Store the metrics
    train_losses.append(avg_loss)
    train_accuracies.append(accuracy)

    # Store neuron positions at the end of the epoch for both thought layers
    all_positions_layer1.append(model.thought_layer.positions.detach().cpu().numpy())
    all_positions_layer2.append(model.thought_layer2.positions.detach().cpu().numpy())

    epoch_time = time.time() - start_time  # End time
    print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {avg_loss:.4f}, Accuracy: {accuracy:.4f}, Time per epoch: {epoch_time:.2f} seconds")

# After training, you can plot the losses and accuracies using matplotlib if needed

# After training, you can save the model if desired
torch.save(model.state_dict(), 'model.pth')

AttributeError: 'FullyConnectedModel' object has no attribute 'thought_layer'

In [None]:
# After initializing your training losses and accuracies
val_losses = []  # Initialize the list for validation losses
val_accuracies = []  # Initialize the list for validation accuracies

# Evaluation phase
model.eval()  # Set the model to evaluation mode
val_epoch_loss = 0.0
val_correct = 0
val_total = 0

with torch.no_grad():  # Disable gradient calculation
    for inputs, labels in val_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        outputs = model(inputs)

        # Compute validation loss
        val_loss = criterion(outputs, labels)
        val_epoch_loss += val_loss.item()

        # Calculate accuracy
        _, predicted = torch.max(outputs.data, 1)
        val_total += labels.size(0)
        val_correct += (predicted == labels).sum().item()

# Calculate average validation loss and accuracy for this epoch
avg_val_loss = val_epoch_loss / len(val_loader)
val_accuracy = val_correct / val_total

# Store the validation metrics
val_losses.append(avg_val_loss)  # Use val_losses
val_accuracies.append(val_accuracy)  # Use val_accuracies

print(f"Validation Loss: {avg_val_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}")

# Print total number of parameters in the model
total_params = sum(p.numel() for p in model.parameters())
print(f"Total number of parameters: {total_params}")

In [None]:
def plot_neuron_positions_1d(all_positions_layer1, all_positions_layer2):
    num_epochs = len(all_positions_layer1)
    fig = plt.figure(figsize=(10, 5))

    for epoch in range(num_epochs):
        plt.clf()  # Clear the plot before each epoch
        positions_layer1 = all_positions_layer1[epoch]  # Neuron positions for layer 1
        positions_layer2 = all_positions_layer2[epoch]  # Neuron positions for layer 2

        # Plot neuron positions for both layers in 1D space
        plt.scatter(positions_layer1, [0] * len(positions_layer1), c='b', marker='o', label='Layer 1')
        plt.scatter(positions_layer2, [1] * len(positions_layer2), c='r', marker='x', label='Layer 2')

        plt.title(f'Epoch {epoch + 1}/{num_epochs} - Neuron Positions for Both Layers')
        plt.xlabel('Neuron Position (1D)')
        plt.yticks([0, 1], ['Layer 1', 'Layer 2'])  # Separate y-axis values for the two layers
        plt.grid(True)
        plt.legend()

        plt.pause(0.5)  # Pause to visualize each epoch's neuron positions
        plt.show()

# Call the function to plot 1D neuron positions for both thought layers
plot_neuron_positions_1d(all_positions_layer1, all_positions_layer2)

# Plotting training and validation losses and accuracies
plt.figure(figsize=(12, 5))

# Plotting losses
plt.subplot(1, 2, 1)
plt.plot(train_losses, label='Training Loss', color='blue')
plt.plot(val_losses, label='Validation Loss', color='orange')
plt.title('Loss per Epoch')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.grid()

# Plotting accuracies
plt.subplot(1, 2, 2)
plt.plot(train_accuracies, label='Training Accuracy', color='blue')
plt.plot(val_accuracies, label='Validation Accuracy', color='orange')
plt.title('Accuracy per Epoch')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.grid()

plt.tight_layout()
plt.show()