In [None]:
import torch
import torch.nn as nn
import torchvision.models

In [None]:
prebuilt_alexnet = torch.hub.load('pytorch/vision:v0.10.0', 'alexnet', pretrained=True)
prebuilt_resnet18 = torchvision.models.resnet18(weights=torchvision.models.ResNet18_Weights.IMAGENET1K_V1)

Downloading: "https://github.com/pytorch/vision/zipball/v0.10.0" to /root/.cache/torch/hub/v0.10.0.zip
Downloading: "https://download.pytorch.org/models/alexnet-owt-7be5be79.pth" to /root/.cache/torch/hub/checkpoints/alexnet-owt-7be5be79.pth
100%|██████████| 233M/233M [00:01<00:00, 166MB/s]
Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 162MB/s]


In [None]:
chosen_activation_functions=[nn.ReLU, nn.LeakyReLU ,nn.PReLU, nn.Tanh, nn.Mish]

In [None]:
def in_place_check(a):
    return a in [nn.ReLU, nn.LeakyReLU, nn.RReLU, nn.SELU, nn.ELU, nn.Mish]

class Block(nn.Module):
    def __init__(self, in_channels, out_channels, activation, stride=1, expansion=1, downsample=None):
        super(Block, self).__init__()
        self.expansion = expansion
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1,bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.activation = activation(inplace=True) if in_place_check(activation) else activation() #nn.relu
        self.conv2 = nn.Conv2d(out_channels, out_channels*self.expansion, kernel_size=3, padding=1,bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels*self.expansion)
        self.downsample = downsample
    def forward(self, x):
        identity = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.activation(out)
        out = self.conv2(out)
        out = self.bn2(out)
        if self.downsample is not None:
            identity = self.downsample(identity)
        out += identity
        out = self.activation(out)
        return out

In [None]:
class resnet18(nn.Module):
    def __init__(self, img_channels, activation, n_output=1000):
        super(resnet18, self).__init__()
        layers = [2, 2, 2, 2]
        self.expansion = 1
        self.in_channels = 64
        self.conv1 = nn.Conv2d(in_channels=img_channels,out_channels=self.in_channels,kernel_size=7, stride=2,padding=3,bias=False)
        self.bn1 = nn.BatchNorm2d(self.in_channels)
        self.activation = activation(inplace=True) if in_place_check(activation) else activation()
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.block1 = self.block_construct(64, 64, 1, activation)
        self.block2 = self.block_construct(64, 128, 2, activation)
        self.block3 = self.block_construct(128, 256, 2, activation)
        self.block4 = self.block_construct(256, 512, 2, activation)
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512, n_output)

    def block_construct(self, in_channels, out_channels, stride, activation):
        if stride!=1:
            downsample = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=2, padding=1),
            nn.BatchNorm2d(out_channels))
        else:
            downsample = None
        return nn.Sequential(
            Block(in_channels, out_channels, activation, downsample=downsample, stride=stride),
            Block(out_channels, out_channels, activation))

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.activation(x)
        x = self.maxpool(x)
        x = self.block1(x)
        x = self.block2(x)
        x = self.block3(x)
        x = self.block4(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)

        return x

In [None]:
for a in [nn.ReLU, nn.LeakyReLU, nn.RReLU,nn.PReLU, nn.SELU, nn.ELU, nn.Sigmoid, nn.Tanh, nn.Mish]:
    model=resnet18(img_channels=3,activation=a)

In [None]:
class alexnet(nn.Module):
    def __init__(self, activation, n_output=1000):
        super(alexnet, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
            activation(inplace=True) if in_place_check(activation) else activation(),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False),
            nn.Conv2d(64, 192, kernel_size=5, stride=1, padding=2),
            activation(inplace=True) if in_place_check(activation) else activation(),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False),
            nn.Conv2d(192, 384, kernel_size=3, stride=1, padding=1),
            activation(inplace=True) if in_place_check(activation) else activation(),
            nn.Conv2d(384, 256, kernel_size=3, stride=1, padding=1),
            activation(inplace=True) if in_place_check(activation) else activation(),
            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
            activation(inplace=True) if in_place_check(activation) else activation(),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False),
            nn.AdaptiveAvgPool2d(output_size=(6, 6)))
        self.fc = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(9216, 4096),
            activation(inplace=True) if in_place_check(activation) else activation(),
            nn.Dropout(0.5),
            nn.Linear(4096, 4096),
            activation(inplace=True) if in_place_check(activation) else activation(),
            nn.Linear(4096, n_output))

    def forward(self, x):
        x = self.conv(x)
        x = x.reshape(x.size(0), -1)
        x = self.fc(x)
        return x

In [None]:
for a in [nn.ReLU, nn.LeakyReLU, nn.RReLU,nn.PReLU, nn.SELU, nn.ELU, nn.Sigmoid, nn.Tanh, nn.Mish]:
    model=alexnet(activation=a)

In [None]:
alex_transformations = torchvision.transforms.Compose([
    torchvision.transforms.Resize(256),
    torchvision.transforms.CenterCrop(224),
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])

resnet18_transformations = torchvision.models.ResNet18_Weights.IMAGENET1K_V1.transforms()

In [None]:
cifar_train = torchvision.datasets.CIFAR10(root='./data/CIFAR/', download=True)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/CIFAR/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:05<00:00, 28670514.94it/s]


Extracting ./data/CIFAR/cifar-10-python.tar.gz to ./data/CIFAR/


In [None]:
models={'Alex':{'CIFAR':{a.__name__:None for a in chosen_activation_functions}}}

In [None]:
data,path,transformations=torchvision.datasets.CIFAR10,'CIFAR', alex_transformations
trainset = data(root=f'./data/{path}/', train=True, transform=transformations)
trainloader = torch.utils.data.DataLoader(trainset,pin_memory=True, batch_size=4, shuffle=True, num_workers=2)
testset = data(root=f'./data/{path}/', train=False, transform=transformations)
testloader = torch.utils.data.DataLoader(testset,pin_memory=True, batch_size=4,shuffle=False, num_workers=2)

In [None]:
##ALEX, CIFAR10
epochs = 5
device='cuda' if torch.cuda.is_available() else 'cpu'
print(device)
for activation in chosen_activation_functions:
    model = alexnet(activation,10).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=.001)

    for epoch in range(epochs):
        for i, (images, labels) in enumerate(trainloader):
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            if (i+1)%100==0:
                print(f'epoch {epoch+1}/{epochs}, step {i+1}/{len(trainloader)}, loss {loss.item()}')

    models['Alex']['CIFAR'][activation.__name__]=model

cuda


  self.pid = os.fork()


epoch 1/5, step 100/12500, loss 2.1726877689361572
epoch 1/5, step 200/12500, loss 2.3672842979431152
epoch 1/5, step 300/12500, loss 2.295866012573242
epoch 1/5, step 400/12500, loss 2.3500595092773438
epoch 1/5, step 500/12500, loss 2.337333917617798
epoch 1/5, step 600/12500, loss 2.2627322673797607
epoch 1/5, step 700/12500, loss 2.2957253456115723
epoch 1/5, step 800/12500, loss 2.3241050243377686
epoch 1/5, step 900/12500, loss 2.250417947769165
epoch 1/5, step 1000/12500, loss 2.32346248626709
epoch 1/5, step 1100/12500, loss 2.3183836936950684
epoch 1/5, step 1200/12500, loss 2.270249605178833
epoch 1/5, step 1300/12500, loss 2.3190081119537354
epoch 1/5, step 1400/12500, loss 2.3172552585601807
epoch 1/5, step 1500/12500, loss 2.341615915298462
epoch 1/5, step 1600/12500, loss 2.2384018898010254
epoch 1/5, step 1700/12500, loss 2.3068385124206543
epoch 1/5, step 1800/12500, loss 2.360718011856079
epoch 1/5, step 1900/12500, loss 2.3238375186920166
epoch 1/5, step 2000/12500, l

In [None]:
models['resnet']={'CIFAR':{a.__name__:None for a in chosen_activation_functions}}
data,path,transformations=torchvision.datasets.CIFAR10,'CIFAR', resnet18_transformations
trainset = data(root=f'./data/{path}/', train=True, transform=transformations)
trainloader = torch.utils.data.DataLoader(trainset,pin_memory=True, batch_size=4, shuffle=True, num_workers=2)
testset = data(root=f'./data/{path}/', train=False, transform=transformations)
testloader = torch.utils.data.DataLoader(testset,pin_memory=True, batch_size=4,shuffle=False, num_workers=2)

In [None]:
##RESNET, CIFAR10
epochs = 5
device='cuda' if torch.cuda.is_available() else 'cpu'
for activation in chosen_activation_functions:
    model = resnet18(3,activation,10).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=.001)

    for epoch in range(epochs):
        for i, (images, labels) in enumerate(trainloader):
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            if (i+1)%100==0:
                print(f'epoch {epoch+1}/{epochs}, step {i+1}/{len(trainloader)}, loss {loss.item()}')

    models['resnet']['CIFAR'][activation.__name__]=model

epoch 1/5, step 100/12500, loss 2.59659481048584
epoch 1/5, step 200/12500, loss 2.091352701187134
epoch 1/5, step 300/12500, loss 2.277880907058716
epoch 1/5, step 400/12500, loss 2.041156530380249
epoch 1/5, step 500/12500, loss 2.4404568672180176
epoch 1/5, step 600/12500, loss 2.3176517486572266
epoch 1/5, step 700/12500, loss 1.8590718507766724
epoch 1/5, step 800/12500, loss 2.4540860652923584
epoch 1/5, step 900/12500, loss 1.3234186172485352
epoch 1/5, step 1000/12500, loss 2.3824453353881836
epoch 1/5, step 1100/12500, loss 1.5649248361587524
epoch 1/5, step 1200/12500, loss 2.2650833129882812
epoch 1/5, step 1300/12500, loss 1.5066795349121094
epoch 1/5, step 1400/12500, loss 1.8222683668136597
epoch 1/5, step 1500/12500, loss 1.2506756782531738
epoch 1/5, step 1600/12500, loss 1.5512205362319946
epoch 1/5, step 1700/12500, loss 2.0493435859680176
epoch 1/5, step 1800/12500, loss 1.9201886653900146
epoch 1/5, step 1900/12500, loss 2.4366703033447266
epoch 1/5, step 2000/12500

In [None]:
for a in chosen_activation_functions:
  torch.save(models['Alex']['CIFAR'][a.__name__], f'./ALEX_CIFAR_{a.__name__}')
  torch.save(models['resnet']['CIFAR'][a.__name__], f'./RES_CIFAR_{a.__name__}')

In [None]:
##ALEX TESTS
# performs horribly, perhaps I messed up the implementation? Not continuing to secondary AF tests with alexnet, full swap to resnet18
for a in chosen_activation_functions:
  testset = data(root=f'./data/{path}/', train=False, transform=alex_transformations)
  testloader = torch.utils.data.DataLoader(testset,pin_memory=True, batch_size=4,shuffle=False, num_workers=2)
  model=models['Alex']['CIFAR'][a.__name__]
  model.eval()
  with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in testloader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(f'Alexnet with {a.__name__}:\n\tAccuracy: {correct/total}')

  self.pid = os.fork()


Alexnet with ReLU:
	Accuracy: 0.1
	Metric2:100
Alexnet with LeakyReLU:
	Accuracy: 0.1002
	Metric2:100
Alexnet with PReLU:
	Accuracy: 0.0896
	Metric2:100
Alexnet with Tanh:
	Accuracy: 0.1575
	Metric2:100
Alexnet with Mish:
	Accuracy: 0.1
	Metric2:100


In [None]:
##RESNET18 TESTS
# specfifically the first 5 AFs.
for a in chosen_activation_functions:
  testset = data(root=f'./data/{path}/', train=False, transform=resnet18_transformations)
  testloader = torch.utils.data.DataLoader(testset,pin_memory=True, batch_size=4,shuffle=False, num_workers=2)
  model=models['resnet']['CIFAR'][a.__name__]
  model.eval()
  with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in testloader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(f'Resnet with {a.__name__}:\n\tAccuracy: {correct/total}')

  self.pid = os.fork()


Resnet with ReLU:
	Accuracy: 0.8148
Resnet with LeakyReLU:
	Accuracy: 0.8187
Resnet with PReLU:
	Accuracy: 0.8201
Resnet with Tanh:
	Accuracy: 0.7528
Resnet with Mish:
	Accuracy: 0.8023


In [None]:
chosen_activation_functions_v2=[nn.RReLU, nn.SELU, nn.ELU, nn.Sigmoid, nn.Softplus]

In [None]:
trainset = data(root=f'./data/{path}/', train=True, transform=resnet18_transformations)
trainloader = torch.utils.data.DataLoader(trainset,pin_memory=True, batch_size=4, shuffle=True)

chosen_activation_functions_v2=[nn.RReLU, nn.SELU, nn.ELU, nn.Sigmoid, nn.Softplus]
models['resnet']={'CIFAR':{a.__name__:None for a in chosen_activation_functions_v2}}
#RESNET training w/ 2nd set of AFs
epochs = 5
device='cuda' if torch.cuda.is_available() else 'cpu'
for activation in chosen_activation_functions_v2:
    model = resnet18(3,activation,10).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=.001)

    for epoch in range(epochs):
        for i, (images, labels) in enumerate(trainloader):
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            if (i+1)%100==0:
                print(f'epoch {epoch+1}/{epochs}, step {i+1}/{len(trainloader)}, loss {loss.item()}')

    models['resnet']['CIFAR'][activation.__name__]=model

epoch 1/5, step 100/12500, loss 2.780536651611328
epoch 1/5, step 200/12500, loss 2.713531017303467
epoch 1/5, step 300/12500, loss 2.397186756134033
epoch 1/5, step 400/12500, loss 1.6629691123962402
epoch 1/5, step 500/12500, loss 2.555617094039917
epoch 1/5, step 600/12500, loss 3.334902763366699
epoch 1/5, step 700/12500, loss 2.39925479888916
epoch 1/5, step 800/12500, loss 2.202270984649658
epoch 1/5, step 900/12500, loss 2.3807077407836914
epoch 1/5, step 1000/12500, loss 1.7804147005081177
epoch 1/5, step 1100/12500, loss 1.5460861921310425
epoch 1/5, step 1200/12500, loss 1.940732717514038
epoch 1/5, step 1300/12500, loss 2.4355645179748535
epoch 1/5, step 1400/12500, loss 1.9827206134796143
epoch 1/5, step 1500/12500, loss 2.6732122898101807
epoch 1/5, step 1600/12500, loss 1.5836422443389893
epoch 1/5, step 1700/12500, loss 2.117722272872925
epoch 1/5, step 1800/12500, loss 1.788416862487793
epoch 1/5, step 1900/12500, loss 2.3213183879852295
epoch 1/5, step 2000/12500, loss

In [None]:
##RESNET18 CIFAR TESTS V2
data,path,transformations=torchvision.datasets.CIFAR10,'CIFAR', resnet18_transformations
for a in chosen_activation_functions_v2:
  testset = data(root=f'./data/{path}/', train=False, transform=resnet18_transformations)
  testloader = torch.utils.data.DataLoader(testset,pin_memory=True, batch_size=4,shuffle=False)
  model=models['resnet']['CIFAR'][a.__name__]
  model.eval()
  with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in testloader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(f'Resnet with {a.__name__}:\n\tAccuracy: {correct/total}')

Resnet with RReLU:
	Accuracy: 0.7946
Resnet with SELU:
	Accuracy: 0.7825
Resnet with ELU:
	Accuracy: 0.7803
Resnet with Sigmoid:
	Accuracy: 0.6655
Resnet with Softplus:
	Accuracy: 0.7813


In [None]:
if not models.get('resnet'):
  models['resnet']={}
models['resnet']['CIFAR100']={a.__name__:None for a in chosen_activation_functions + chosen_activation_functions_v2}
CIFAR100 = torchvision.datasets.CIFAR100(root='./data/CIFAR100/', download=True)
data,path,transformations=torchvision.datasets.CIFAR100,'CIFAR100', resnet18_transformations
trainset = data(root=f'./data/{path}/', train=True, transform=transformations)
trainloader = torch.utils.data.DataLoader(trainset,pin_memory=True, batch_size=4, shuffle=True, num_workers=1)

Downloading https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz to ./data/CIFAR100/cifar-100-python.tar.gz


100%|██████████| 169001437/169001437 [00:06<00:00, 27029277.73it/s]


Extracting ./data/CIFAR100/cifar-100-python.tar.gz to ./data/CIFAR100/


In [None]:
#for a in chosen_activation_functions_v2:
#  torch.save(models['resnet']['CIFAR'][a.__name__], f'./RES_CIFAR_{a.__name__}')

In [None]:
models['resnet']['CIFAR100']

{'ReLU': None,
 'LeakyReLU': None,
 'PReLU': None,
 'Tanh': None,
 'Mish': None,
 'RReLU': None,
 'SELU': None,
 'ELU': None,
 'Sigmoid': None,
 'Softplus': None}

In [None]:
##RESNET, both sets of AFs, CIFAR100
epochs = 5
device='cuda' if torch.cuda.is_available() else 'cpu'
print(device)
for activation in chosen_activation_functions + chosen_activation_functions_v2:
    model = resnet18(3,activation,100).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=.001)

    for epoch in range(epochs):
        for i, (images, labels) in enumerate(trainloader):
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            if (i+1)%100==0:
                print(f'epoch {epoch+1}/{epochs}, step {i+1}/{len(trainloader)}, loss {loss.item()}')

    models['resnet']['CIFAR100'][activation.__name__]=model

cuda


  self.pid = os.fork()


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
epoch 1/5, step 100/12500, loss 4.544724464416504
epoch 1/5, step 200/12500, loss 4.220577716827393
epoch 1/5, step 300/12500, loss 5.218964576721191
epoch 1/5, step 400/12500, loss 4.205005645751953
epoch 1/5, step 500/12500, loss 4.823550701141357
epoch 1/5, step 600/12500, loss 4.525684356689453
epoch 1/5, step 700/12500, loss 4.5378804206848145
epoch 1/5, step 800/12500, loss 4.510509490966797
epoch 1/5, step 900/12500, loss 4.418302059173584
epoch 1/5, step 1000/12500, loss 4.307265758514404
epoch 1/5, step 1100/12500, loss 4.828759670257568
epoch 1/5, step 1200/12500, loss 4.7750349044799805
epoch 1/5, step 1300/12500, loss 4.184352874755859
epoch 1/5, step 1400/12500, loss 4.174256324768066
epoch 1/5, step 1500/12500, loss 4.8116865158081055
epoch 1/5, step 1600/12500, loss 3.8852057456970215
epoch 1/5, step 1700/12500, loss 4.145949363708496
epoch 1/5, step 1800/12500, loss 4.711699485778809
epoch 1/5, step 1900/1

In [None]:
for a in chosen_activation_functions+chosen_activation_functions_v2:
  torch.save(models['resnet']['CIFAR100'][a.__name__], f'./RES_C100_{a.__name__}')

In [None]:
##RESNET18 CIFAR100 TESTS
data,path,transformations=torchvision.datasets.CIFAR100,'CIFAR100', resnet18_transformations
for a in chosen_activation_functions + chosen_activation_functions_v2:
  testset = data(root=f'./data/{path}/',  train=False, transform=resnet18_transformations)
  testloader = torch.utils.data.DataLoader(testset,pin_memory=True, batch_size=4,shuffle=False)
  model=models['resnet']['CIFAR100'][a.__name__]
  model.eval()
  with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in testloader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(f'Resnet with {a.__name__}:\n\tAccuracy: {correct/total}')

Resnet with ReLU:
	Accuracy: 0.4426
Resnet with LeakyReLU:
	Accuracy: 0.4811
Resnet with PReLU:
	Accuracy: 0.5239
Resnet with Tanh:
	Accuracy: 0.4128
Resnet with Mish:
	Accuracy: 0.5105
Resnet with RReLU:
	Accuracy: 0.4623
Resnet with SELU:
	Accuracy: 0.4149
Resnet with ELU:
	Accuracy: 0.4325
Resnet with Sigmoid:
	Accuracy: 0.2495
Resnet with Softplus:
	Accuracy: 0.43
