# Introduction

Instead of learning filters in a CNN supervised, we can also learn the filters in a CNN unsupervised.

A work that already tried this out is:

["Stacked Convolutional Auto-Encoders for Hierarchical Feature Extraction"](https://people.idsia.ch/~ciresan/data/icann2011.pdf) by Jonathan Masci, Ueli Meier, Dan Ciresan, and Jurgen Schmidhuber. ICANN 2011.


# GPU check Tensorflow

In [6]:
import tensorflow as tf
tf.config.experimental.list_physical_devices('GPU')

[]

# GPU check PyTorch

In [10]:
import torch
import torch.nn as nn

# Check if a GPU is available and if not, fall back on CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"available device: {device}")

# Define a simple model
model = nn.Sequential(
    nn.Linear(100, 50),
    nn.ReLU(),
    nn.Linear(50, 10)
)

# Move the model to the GPU
model.to(device)

# Now any tensor you pass to the model will be automatically moved to the GPU
input = torch.randn(1, 100).to(device)
output = model(input)

available device: cuda


# Convolutional autoencoder with Keras

Here is an example how to learn filters with autoencoders in TensorFlow / Keras.

In [None]:
import keras
from keras.datasets import mnist
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D, UpSampling2D, Input

# 1. load MNIST data
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape(x_train.shape[0], 28, 28, 1).astype('float32') / 255
x_test = x_test.reshape(x_test.shape[0], 28, 28, 1).astype('float32') / 255
y_train = keras.utils.to_categorical(y_train, 10) # generate one-hot-encoding vectors
y_test = keras.utils.to_categorical(y_test, 10)   # generate one-hot-encoding vectors


# 2. define and train the autoencoder
input_img = Input(shape=(28, 28, 1))
x = Conv2D(32, (3, 3), activation='relu', padding='same')(input_img)
encoded = MaxPooling2D((2, 2), padding='same')(x)

x = Conv2D(32, (3, 3), activation='relu', padding='same')(encoded)
x = UpSampling2D((2, 2))(x)
decoded = Conv2D(1, (3, 3), activation='sigmoid', padding='same')(x)

autoencoder = Model(input_img, decoded)
autoencoder.compile(optimizer='adam', loss='binary_crossentropy')
autoencoder.summary()

# note the fit(x_train, x_train)!
autoencoder.fit(x_train, x_train, epochs=5, batch_size=128, validation_data=(x_test, x_test))


# 3. define the CNN using the trained autoencoder weights
cnn = Sequential()

# here we use the autoencoder learned weights from the
# encoder part of the above autoencoder
cnn.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(28, 28, 1), weights=autoencoder.layers[1].get_weights()))
cnn.add(Conv2D(64, (3, 3), activation='relu'))
cnn.add(MaxPooling2D(pool_size=(2, 2)))
cnn.add(Dropout(0.25))
cnn.add(Flatten())
cnn.add(Dense(128, activation='relu'))
cnn.add(Dropout(0.5))
cnn.add(Dense(10, activation='softmax'))

cnn.compile(loss=keras.losses.categorical_crossentropy, optimizer='adam', metrics=['accuracy'])
cnn.summary()

# 4. train the CNN
cnn.fit(x_train, y_train, batch_size=128, epochs=5, validation_data=(x_test, y_test))


# Convolutional autoencoders with PyTorch

In [15]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable

# 1. check if a GPU is available and if not, fall back on CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 2. load MNIST dataset
transform = transforms.ToTensor()
train_data = datasets.MNIST(root='.', train=True, download=True, transform=transform)
train_loader = torch.utils.data.DataLoader(train_data, batch_size=128, shuffle=True)

# 3. define the autoencoder
class Autoencoder(nn.Module):
    def __init__(self):
        super(Autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.decoder = nn.Sequential(
            nn.Conv2d(32, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Upsample(scale_factor=2),
            nn.Conv2d(32, 1, kernel_size=3, padding=1),
            nn.Sigmoid()
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

autoencoder = Autoencoder().to(device)
criterion = nn.BCELoss()
optimizer = optim.Adam(autoencoder.parameters())

# 4. train the autoencoder?
if True:
    print("\nTraining autoencoder...")
    for epoch in range(5):
        for batch_idx, (data, _) in enumerate(train_loader):
            data = data.to(device)
            optimizer.zero_grad()
            output = autoencoder(data)
            loss = criterion(output, data)
            loss.backward()
            optimizer.step()
        print('Epoch: {}, Loss: {:.4f}'.format(epoch, float(loss)))

    
# 5. define the CNN using the trained autoencoder weights
class CNN(nn.Module):
    def __init__(self, pretrained):
        super(CNN, self).__init__()
        self.conv1 = pretrained.encoder
        self.conv2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Dropout(0.25),
            nn.Flatten(),            
            nn.Linear(64*7*7, 128),  # Changed the input size here
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(128, 10)
        )

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        return x

cnn = CNN(autoencoder).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(cnn.parameters())

# 6. train the CNN
print("\nTraining the CNN...")
for epoch in range(5):
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = cnn(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
    print('Epoch: {}, Loss: {:.4f}'.format(epoch, float(loss)))



Training the CNN...
Epoch: 0, Loss: 0.3354
Epoch: 1, Loss: 0.1316
Epoch: 2, Loss: 0.0517
Epoch: 3, Loss: 0.1358
Epoch: 4, Loss: 0.0799


In [14]:
cnn

CNN(
  (conv1): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv2): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Dropout(p=0.25, inplace=False)
    (4): Flatten(start_dim=1, end_dim=-1)
    (5): Linear(in_features=3136, out_features=128, bias=True)
    (6): ReLU()
    (7): Dropout(p=0.5, inplace=False)
    (8): Linear(in_features=128, out_features=10, bias=True)
  )
)