In [1]:
import torch
import torch.nn as nn

import torch.optim as optim

from torchvision import datasets
import torchvision.transforms as transforms
from torch.utils.data import random_split, DataLoader

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cpu


In [3]:
class ENNModel(nn.Module):
	def __init__(self, in_channels, num_classes=10, echoing_depth=5, echoing_limit=5):
		super(ENNModel, self).__init__()
		self.in_channels = in_channels
		self.num_classes = num_classes
		self.echoing_depth = echoing_depth
		self.echoing_limit = echoing_limit

		self.conv_layers = nn.Sequential(
			nn.Conv2d(self.in_channels, 64, kernel_size=3, padding=1),
			nn.ReLU(),
			nn.MaxPool2d(kernel_size=2, stride=2),
			nn.Conv2d(64, 128, kernel_size=3, padding=1),
			nn.ReLU(),
			nn.MaxPool2d(kernel_size=2, stride=2),
			nn.Conv2d(128, 256, kernel_size=3, padding=1),
			nn.ReLU(),
			nn.Conv2d(256, 256, kernel_size=3, padding=1),
			nn.ReLU(),
			nn.MaxPool2d(kernel_size=2, stride=2),
			nn.Conv2d(256, 512, kernel_size=3, padding=1),
			nn.ReLU(),
			nn.Conv2d(512, 512, kernel_size=3, padding=1),
			nn.ReLU(),
			nn.MaxPool2d(kernel_size=2, stride=2),
			nn.Conv2d(512, 512, kernel_size=3, padding=1),
			nn.ReLU(),
			nn.Conv2d(512, 512, kernel_size=3, padding=1),
			nn.ReLU(),
			nn.MaxPool2d(kernel_size=2, stride=2)
		)

		# fully connected linear layers
		self.linear1 = nn.Linear(in_features=512*7*7, out_features = 1024)
		self.linear_echoing_layers = nn.ModuleList([*[nn.Linear(in_features=1024, out_features=1024) for i in range(self.echoing_depth)], nn.Linear(in_features=1024, out_features=self.num_classes)])
		# self.linear1 = nn.Linear(in_features=512*7*7, out_features=1024)
		# self.linear2 = nn.Linear(in_features=1024, out_features=1024)
		# self.linear3 = nn.Linear(in_features=1024, out_features=1024)
		# self.linear4 = nn.Linear(in_features=1024, out_features=1024)
		# self.linear5 = nn.Linear(in_features=1024, out_features=self.num_classes)
	def run_echo_chamber(self, input, index, echoing_complete):
		output = nn.functional.dropout2d(nn.functional.relu(self.linear_echoing_layers[index](input)))
		if(index != self.echoing_depth - 1): branch_2 = self.run_echo_chamber(self, output, index + 1)
		elif(torch.max(nn.Softmax()(output)) > 0.5 and not echoing_complete[0]):
			echoing_complete[0] = True
			return output
		if(index != 0 and not echoing_complete[0]): branch_1 = self.run_echo_chamber(self, output, index - 1)
		return branch_1 or branch_2

	def forward(self, x):
		conv_output = self.conv_layers(x)
		conv_output_flat = conv_output.view(conv_output.size(0), -1)

		echoing_input = self.linear1(conv_output_flat)

		echoing_complete = [False]
		echoing_output = self.run_echo_chamber(self, echoing_input, 0, echoing_complete)

		return echoing_output

In [4]:
class VGG11(nn.Module):
	def __init__(self, in_channels, num_classes=10):
		super(VGG11, self).__init__()
		self.in_channels = in_channels
		self.num_classes = num_classes
		self.conv_layers = nn.Sequential(
			nn.Conv2d(self.in_channels, 64, kernel_size=3, padding=1),
			nn.ReLU(),
			nn.MaxPool2d(kernel_size=2, stride=2),
			nn.Conv2d(64, 128, kernel_size=3, padding=1),
			nn.ReLU(),
			nn.MaxPool2d(kernel_size=2, stride=2),
			nn.Conv2d(128, 256, kernel_size=3, padding=1),
			nn.ReLU(),
			nn.Conv2d(256, 256, kernel_size=3, padding=1),
			nn.ReLU(),
			nn.MaxPool2d(kernel_size=2, stride=2),
			nn.Conv2d(256, 512, kernel_size=3, padding=1),
			nn.ReLU(),
			nn.Conv2d(512, 512, kernel_size=3, padding=1),
			nn.ReLU(),
			nn.MaxPool2d(kernel_size=2, stride=2),
			nn.Conv2d(512, 512, kernel_size=3, padding=1),
			nn.ReLU(),
			nn.Conv2d(512, 512, kernel_size=3, padding=1),
			nn.ReLU(),
			nn.MaxPool2d(kernel_size=2, stride=2)
		)
		self.avgpool = nn.AdaptiveAvgPool2d((7, 7))
		self.linear_layers = nn.Sequential(
			nn.Linear(in_features=512*7*7, out_features=4096),
			nn.ReLU(),
			nn.Dropout(0.5),
			nn.Linear(in_features=4096, out_features=4096),
			nn.ReLU(),
			nn.Dropout(0.5),
			nn.Linear(in_features=4096, out_features=self.num_classes)
		)
	def forward(self, x):
		x = self.conv_layers(x)
		print("finished conv layers")
		# flatten to prepare for the fully connected layers
		x = self.avgpool(x)
		print("avgpooled conv output")
		x = torch.flatten(x)
		print("flattened conv output", torch.Tensor.dim(x))
		x = self.linear_layers(x)
		print("finished linear layers")
		return x

In [5]:
transform = transforms.Compose(
	[transforms.ToTensor(),
	 transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

batch_size = 64

trainset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
training_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2)

validationset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

validation_loader = torch.utils.data.DataLoader(validationset, batch_size=batch_size, shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat',
		   'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Files already downloaded and verified
Files already downloaded and verified


In [6]:
cnn = VGG11(3)

In [7]:
criterion = nn.CrossEntropyLoss()
optimiser = optim.SGD(cnn.parameters(), lr=0.001, momentum=0.9)

In [10]:
def train_one_epoch(epoch_index, tb_writer):
	running_loss = 0.
	last_loss = 0.

	# Here, we use enumerate(training_loader) instead of
	# iter(training_loader) so that we can track the batch
	# index and do some intra-epoch reporting
	for i, data in enumerate(training_loader):
		# Every data instance is an input + label pair
		inputs, labels = data
		outputs = []

		# Zero your gradients for every batch!
		optimiser.zero_grad()

		for j in range(batch_size):
			# Make predictions for this batch
			outputs.append(cnn(inputs[j]))

			# Compute the loss and its gradients
		loss = criterion(torch.stack(outputs), labels)
		running_loss += loss
		loss.backward()

		# Adjust learning weights
		optimiser.step()

		# Gather data and report
		if i % 1000 == 999:
			last_loss = running_loss / 1000 # loss per batch
			print('  batch {} loss: {}'.format(i + 1, last_loss))
			tb_x = epoch_index * len(training_loader) + i + 1
			tb_writer.add_scalar('Loss/train', last_loss, tb_x)
			running_loss = 0.

	return last_loss

In [None]:
from datetime import datetime
from torch.utils.tensorboard import SummaryWriter

timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
writer = SummaryWriter('runs/cnn_trainer_{}'.format(timestamp))
epoch_number = 0

EPOCHS = 5

best_vloss = 1_000_000.

for epoch in range(EPOCHS):
	print('EPOCH {}:'.format(epoch_number + 1))

	# Make sure gradient tracking is on, and do a pass over the data
	cnn.train(True)
	avg_loss = train_one_epoch(epoch_number, writer)


	running_vloss = 0.0
	# Set the model to evaluation mode, disabling dropout and using population
	# statistics for batch normalization.
	cnn.eval()

	# Disable gradient computation and reduce memory consumption.
	with torch.no_grad():
		for i, vdata in enumerate(validation_loader):
			vinputs, vlabels = vdata
			voutputs = []
			for j in range(batch_size):
				voutputs.append(cnn(vinputs[j]))
			vloss = criterion(torch.stack(voutputs), vlabels)
			running_vloss += vloss

	avg_vloss = running_vloss / (i + 1)
	print('LOSS train {} valid {}'.format(avg_loss, avg_vloss))

	# Log the running loss averaged per batch
	# for both training and validation
	writer.add_scalars('Training vs. Validation Loss',
					{ 'Training' : avg_loss, 'Validation' : avg_vloss },
					epoch_number + 1)
	writer.flush()

	# Track best performance, and save the model's state
	if avg_vloss < best_vloss:
		best_vloss = avg_vloss
		model_path = 'model_{}_{}'.format(timestamp, epoch_number)
		torch.save(cnn.state_dict(), model_path)

	epoch_number += 1

EPOCH 1:
finished conv layers
avgpooled conv output
flattened conv output 1
finished linear layers
finished conv layers
avgpooled conv output
flattened conv output 1
finished linear layers
finished conv layers
avgpooled conv output
flattened conv output 1
finished linear layers
finished conv layers
avgpooled conv output
flattened conv output 1
finished linear layers
finished conv layers
avgpooled conv output
flattened conv output 1
finished linear layers
finished conv layers
avgpooled conv output
flattened conv output 1
finished linear layers
finished conv layers
avgpooled conv output
flattened conv output 1
finished linear layers
finished conv layers
avgpooled conv output
flattened conv output 1
finished linear layers
finished conv layers
avgpooled conv output
flattened conv output 1
finished linear layers
finished conv layers
avgpooled conv output
flattened conv output 1
finished linear layers
finished conv layers
avgpooled conv output
flattened conv output 1
finished linear layers
f

KeyboardInterrupt: 