In [8]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset
import pandas as pd


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


# Load dataset 1
dataset_1 = pd.read_csv('../inputs/dataset_1.csv')

# Shuffle and split the dataset into 50 subsets
n_subsets = 50
subsets = []

for class_label in dataset_1.iloc[:, 2].unique():
    class_data = dataset_1[dataset_1.iloc[:, 2] == class_label]
    class_subsets = np.array_split(class_data.sample(frac=1).reset_index(drop=True), n_subsets)
    subsets.extend(class_subsets)

# Ensure each subset has an equal number of samples per class
shuffled_subsets = []
for i in range(n_subsets):
    subset = pd.concat([subsets[j] for j in range(i, len(subsets), n_subsets)])
    shuffled_subsets.append(subset)

# Define the MLP network (f)
class MLP(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out

# Define the GRU-based network (g)
class GRUOptimizer(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(GRUOptimizer, self).__init__()
        self.gru = nn.GRU(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
    
    def forward(self, x, hidden):
        out, hidden = self.gru(x, hidden)
        out = self.fc(out)
        return out, hidden

# Define the architecture parameters
input_size = dataset_1.shape[1] - 1
hidden_size = 128
output_size = len(dataset_1.iloc[:, 2].unique())
gru_input_size = 1  # Gradients will be provided as single values
gru_hidden_size = 128
gru_output_size = 1  # Single output for the update value

# Initialize the GRU-based optimizer
optimizer_g = GRUOptimizer(gru_input_size, gru_hidden_size, gru_output_size).to(device)

# Training process

# Function to train the MLP network using the GRU optimizer
def train_with_gru_optimizer(subsets, num_epochs=10):
    loss_fn = nn.CrossEntropyLoss()
    gru_optimizer = optim.Adam(optimizer_g.parameters(), lr=0.001)

    for epoch in range(num_epochs):
        total_loss = 0
        for subset in subsets:
            # Split the subset into training and testing sets
            train_data, test_data = train_test_split(subset, test_size=0.8)

            # Initialize the MLP network
            model_f = MLP(input_size, hidden_size, output_size).to(device)
            model_f.train()

            # Prepare the data
            X_train = torch.tensor(train_data.iloc[:, :-1].values, dtype=torch.float32).to(device)
            y_train = torch.tensor(train_data.iloc[:, -1].values, dtype=torch.long).to(device)

            # Zero the parameter gradients
            model_f.zero_grad()
            
            # Forward pass
            outputs = model_f(X_train)
            loss = loss_fn(outputs, y_train)
            
            # Backward pass and get gradients
            loss.backward()
            
            # Update the parameters using the GRU optimizer
            with torch.no_grad():
                grads = [param.grad for param in model_f.parameters()]
                hidden = torch.zeros(1, len(grads), gru_hidden_size).to(device)
                for param, grad in zip(model_f.parameters(), grads):
                    grad_input = grad.view(1, 1, -1)  # Reshape to (batch_size, seq_len, input_size)
                    update, hidden = optimizer_g(grad_input, hidden)
                    param.data -= update.view(param.data.shape)
            
            total_loss += loss.item()
        
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss/len(subsets):.4f}')

# Train the GRU optimizer
train_with_gru_optimizer(shuffled_subsets)


  return bound(*args, **kwds)


RuntimeError: input.size(-1) must be equal to input_size. Expected 1, got 256

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim

# Define the MLP network (f)
class MLP(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out

# Define the architecture parameters
input_size = dataset_1.shape[1] - 1
hidden_size = 128
output_size = len(dataset_1.iloc[:, 2].unique())

In [4]:
# Define the GRU-based network (g)
class GRUOptimizer(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(GRUOptimizer, self).__init__()
        self.gru = nn.GRU(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
    
    def forward(self, x, hidden):
        out, hidden = self.gru(x, hidden)
        out = self.fc(out)
        return out, hidden

# Define the architecture parameters for the GRU-based optimizer
gru_input_size = input_size
gru_hidden_size = 128
gru_output_size = output_size

# Initialize the GRU-based optimizer
optimizer_g = GRUOptimizer(gru_input_size, gru_hidden_size, gru_output_size)

In [5]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
import pandas as pd


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


# Load dataset 1
dataset_1 = pd.read_csv('../inputs/dataset_1.csv')

# Shuffle and split the dataset into 50 subsets
n_subsets = 50
subsets = []

for class_label in dataset_1.iloc[:, 2].unique():
    class_data = dataset_1[dataset_1.iloc[:, 2] == class_label]
    class_subsets = np.array_split(class_data.sample(frac=1).reset_index(drop=True), n_subsets)
    subsets.extend(class_subsets)

# Ensure each subset has an equal number of samples per class
shuffled_subsets = []
for i in range(n_subsets):
    subset = pd.concat([subsets[j] for j in range(i, len(subsets), n_subsets)])
    shuffled_subsets.append(subset)

# Define the MLP network (f)
class MLP(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out

# Define the GRU-based network (g)
class GRUOptimizer(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(GRUOptimizer, self).__init__()
        self.gru = nn.GRU(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
    
    def forward(self, x, hidden):
        out, hidden = self.gru(x, hidden)
        out = self.fc(out)
        return out, hidden

# Define the architecture parameters
input_size = dataset_1.shape[1] - 1
hidden_size = 128
output_size = len(dataset_1.iloc[:, 2].unique())
gru_input_size = 1  # Gradients will be provided as single values
gru_hidden_size = 128
gru_output_size = 1  # Single output for the update value

# Initialize the GRU-based optimizer
optimizer_g = GRUOptimizer(gru_input_size, gru_hidden_size, gru_output_size).to(device)

# Training process

# Function to train the MLP network using the GRU optimizer
def train_with_gru_optimizer(subsets, num_epochs=10):
    loss_fn = nn.CrossEntropyLoss()
    gru_optimizer = optim.Adam(optimizer_g.parameters(), lr=0.001)

    for epoch in range(num_epochs):
        total_loss = 0
        for subset in subsets:
            # Split the subset into training and testing sets
            train_data, test_data = train_test_split(subset, test_size=0.8)

            # Initialize the MLP network
            model_f = MLP(input_size, hidden_size, output_size).to(device)
            model_f.train()

            # Prepare the data
            X_train = torch.tensor(train_data.iloc[:, :-1].values, dtype=torch.float32).to(device)
            y_train = torch.tensor(train_data.iloc[:, -1].values, dtype=torch.long).to(device)

            # Zero the parameter gradients
            model_f.zero_grad()
            
            # Forward pass
            outputs = model_f(X_train)
            loss = loss_fn(outputs, y_train)
            
            # Backward pass and get gradients
            loss.backward()
            
            # Update the parameters using the GRU optimizer
            with torch.no_grad():
                grads = [param.grad for param in model_f.parameters()]
                hidden = torch.zeros(1, len(grads), gru_hidden_size).to(device)
                for param, grad in zip(model_f.parameters(), grads):
                    grad_input = grad.view(1, 1, -1)  # Reshape to (batch_size, seq_len, input_size)
                    update, hidden = optimizer_g(grad_input, hidden)
                    param.data -= update.view(param.data.shape)
            
            total_loss += loss.item()
        
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss/len(subsets):.4f}')

# Train the GRU optimizer
train_with_gru_optimizer(shuffled_subsets)


RuntimeError: input.size(-1) must be equal to input_size. Expected 2, got 256