In [None]:
import torch
import torchvision
import torchvision.transforms as transforms

transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5))])
transform1 = transforms.Compose([transforms.RandomGrayscale(p=0.4), transforms.RandomHorizontalFlip(p=0.5), transforms.ToTensor(), transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5))])


trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform1)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=1024, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader= torch.utils.data.DataLoader(testset, batch_size=1024, shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat', 'deer','dog','frog','horse','ship','truck')

import torch.nn as nn
import torch.nn.functional as F
from torchsummary import summary

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 128, 3)
        self.batchnorm1 = nn.BatchNorm2d(128)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(128, 256, 3)
        self.batchnorm2 = nn.BatchNorm2d(256)
        self.conv3 = nn.Conv2d(256, 512, 3)
        self.batchnorm3 = nn.BatchNorm2d(512)
        self.fc1 = nn.Linear(512 * 2 * 2, 10)
        self.dropout = nn.Dropout2d(p=0.3)

    def forward(self,x):
        x = self.pool(self.batchnorm1(F.relu(self.conv1(x))))
        x = self.pool(self.batchnorm2(F.relu(self.conv2(x))))
        x = self.pool(self.batchnorm3(F.relu(self.conv3(x))))
        x = x.view(-1, 512 * 2 * 2)
        x = self.dropout(x)
        x = F.softmax(self.fc1(x))
        return x

net = Net()
# torch.cuda.set_device(1)
torch.cuda.set_device(0)
device = torch.device("cuda:0 " if torch.cuda.is_available() else "cpu")
print(device)

# if torch.cuda.device_count() > 1:
#   print("Let's use", torch.cuda.device_count(), "GPUs!")
#   # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs
#   net = nn.DataParallel(net)

net.to(device)

summary(net, (3, 32, 32))

import torch.optim as optim

criterion = nn.CrossEntropyLoss()
# optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=5e-4)
# optimizer = optim.Adam(net.parameters(), lr=0.001, betas=(0.9,0.999), eps=1e-08, weight_decay=5e-4)
optimizer = optim.RMSprop(net.parameters(), lr=0.001, momentum=0.9, weight_decay=5e-4)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

max_epoch = 50

for epoch in range(max_epoch):

    scheduler.step()
    running_loss = 0.0
    epoch_correct = 0
    epoch_total = 0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = net(inputs)

        _, predicted = torch.max(outputs.data,1)
        epoch_total += labels.size(0)
        epoch_correct += (predicted == labels).sum().item()

        loss = criterion(outputs,labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        # if i % 2000 == 1999:
        #     print ('[%d, %5d] loss: %3f' % (epoch+1, i+1, running_loss / 2000))
        #     running_loss = 0.0
    print('[epoch %d/%d] loss:%3f accuracy:%3f' % (epoch + 1, max_epoch, running_loss / i, 100 * epoch_correct / epoch_total))
print('Finished Training')

correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)

        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy:%d %%' % (100 * correct / total))
