In [14]:
import torch as th
import numpy as np
from torch import nn, optim, functional
from torchvision import datasets, transforms

def train_mnist(model, epochs=5, test_every_ep=90, learning_rate=0.003):

    # This makes our data easier to work with for the Network
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ])

    # Download/Load Data from Online Hub
    trainset = datasets.MNIST('~/.pytorch/MNIST_data/', download=True, train=True, transform=transform)
    trainloader = th.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

    # Same for Testing Data
    testset = datasets.MNIST('~/.pytorch/MNIST_data/', download=True, train=False, transform=transform)
    testloader = th.utils.data.DataLoader(testset, batch_size=64, shuffle=True)
    test_iter = iter(testloader)
    # Define the loss function and optimizer
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    # Train the model
    for epoch in range(epochs):
        model.train()
        running_loss = 0
        step = 0   
        for images, labels in trainloader:
            # Flatten the images
            step+=1
            labels = th.nn.functional.one_hot(labels, num_classes=10).float()
            optimizer.zero_grad()
            output = model(images)
            loss = criterion(output, labels)
            loss.backward()
            optimizer.step()
            if step % 30 == 0:
                print('Loss:', loss.item())
            running_loss += loss.item()
            if step % test_every_ep == 0:
                model.eval()
                with th.no_grad(): # Lets Perform a Check to see how Accurate our model is
                    image, labels = next(test_iter) # Get next Training Set
                    labels_one_hot = th.nn.functional.one_hot(labels, num_classes=10).float() # Get MNIST labels for the Training Set
                    output = model(image) # Perform Forward Pass
                    loss = criterion(output, labels_one_hot) # Grade the Output
                    predictions = th.argmax(output, dim=1) # Check to see what it predicted
                    correct = (predictions == labels).sum().item() # See how many it got right
                    total = labels.size(0) 
                print("Accuracy: {:.2f}%".format(correct/total*100))
                print('Testing Loss:', loss.item())
        print(f"Epoch {epoch+1} - Training loss: {running_loss/len(trainloader)}")
    print("Training completed")


In [3]:
from CoderSchoolAI.Environment.CoderSchoolEnvironments.SnakeEnvironment import *
from CoderSchoolAI.Environment.Attributes import *
from CoderSchoolAI.Neural.Blocks import *
from CoderSchoolAI.Neural.Net import *
from CoderSchoolAI.Training.Datasets import train_on_dataset, MNISTDataset

device = "cuda"

image = ObsAttribute(name="img", space=BoxType(-1, 1, shape=(1, 28, 28)))
input_block = InputBlock(in_attribute=image, is_module_dict=False, device=device)

# Define the ConvBlock which acts as a convolutional layer for processing the game state.
# The depth of 3 represents the number of convolutional layers in this block.
conv_block = ConvBlock(input_shape=input_block.in_attribute.space.shape, num_channels=1, depth=4, device=device)
lin_block = LinearBlock(input_size=conv_block.output_size, output_size=conv_block.output_size/2, hidden_size=conv_block.output_size, num_hidden_layers=3, dropout=0.2, device=device)
# Define the OutputBlock that will decide the next action to take based on the current game state.
# The num_classes corresponds to the number of possible actions the snake can take (up, down, left, right).
out_block = OutputBlock(input_size=conv_block.output_size, num_classes=10, device=device)

# Initialize the network and add the blocks
net = Net(device='cuda')

net.add_block(input_block)
net.add_block(conv_block)
net.add_block(out_block)
net.compile()

print(out_block.device)
# Call the function to train the model
mnist_dataset = MNISTDataset()
train_on_dataset(net, mnist_dataset, epochs=5)

RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cpu and cuda:0! (when checking argument for argument weight in method wrapper___slow_conv2d_forward)