In [1]:
import cv2
import sys
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from tqdm import tqdm
from torch.utils.data import Dataset, DataLoader
import os
import numpy as np
import random
import pandas as pd
from matplotlib import pyplot as plt
from sklearn import preprocessing

In [2]:
class CnnAutoencoder(nn.Module):
    def __init__(self, scale=2, channel_maps=[], padding=1, kernel_size=3, num_channels=3, img_width=100, img_height=100, device=torch.device("cpu")):
        super().__init__()

        self.device = device

        self.img_width      = img_width
        self.img_height     = img_height
        self.num_channels   = num_channels
        self.kernel_size    = kernel_size
        self.padding        = padding
        self.channel_maps   = channel_maps
        self.scale          = scale

        self.reversed_channel_maps = list(reversed(channel_maps))

        # Build convolutional layers
        self.convolutional_layers = nn.ModuleList([])

        for i in range(len(self.channel_maps) - 1):
            self.convolutional_layers.append(nn.Conv2d(self.channel_maps[i], self.channel_maps[i+1], kernel_size=self.kernel_size, padding=self.padding))

        # Build deconvolutional layers
        self.deconvolutional_layers = nn.ModuleList([])

        for i in range(len(self.reversed_channel_maps) - 1):
            self.deconvolutional_layers.append(nn.ConvTranspose2d(self.reversed_channel_maps[i], self.reversed_channel_maps[i+1], 2, stride=2))

    def conv(self, x):
        for i in range(len(self.convolutional_layers)):
            conv_layer = self.convolutional_layers[i]

            x = F.max_pool2d(F.relu(conv_layer(x)), self.scale)
        
        return x

    def compress(self, x):
        x = self.conv(x)
        x = x.view(-1, x.shape[1] * x.shape[2] * x.shape[3])

        return x

    def deconv(self, x):
        for i in range(len(self.deconvolutional_layers)):
            deconv_layer = self.deconvolutional_layers[i]
            x = deconv_layer(x)

            if i != len(self.deconvolutional_layers) - 1:
                x = F.relu(x)
            else:
                x = torch.sigmoid(x)

        return x

    def forward(self, x):
        x = self.conv(x)
        x = self.deconv(x)

        return x


In [3]:
class CnnAutoencoderDataset(Dataset):
    def __init__(self, img_dir, img_width, img_height):
        self.img_dir    = img_dir
        self.img_width  = img_width
        self.img_height = img_height
        self.images     = os.listdir(img_dir)

        self.dim = (img_width, img_height)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, index):
        img_path = os.path.join(self.img_dir, self.images[index])

        img = (cv2.resize(cv2.imread(img_path), self.dim) / 255).transpose((2, 0, 1)) 

        # Input is the Output
        return torch.Tensor(img), torch.Tensor(img)

In [4]:
device         = 'cuda'
gpu_index      = 0
chunk_size     = 1
batch_size     = 1
cont           = False
kernel_size    = 3
model_file     = "cnn-model.pth"
output_file    = "data.csv"
channel_maps   = [3, 16, 8, 4]
padding        = 1
scale          = 2
img_width      = 128
img_height     = 128
img_dir        = "/home/ralampay/Desktop/training/Male"
num_channels   = 3

In [5]:
if device == 'cuda':
    print("CUDA Device: {}".format(torch.cuda.get_device_name(gpu_index)))
    device = "cuda:{}".format(gpu_index)
    
model = CnnAutoencoder(
    scale=scale,
    channel_maps=channel_maps,
    padding=padding,
    kernel_size=kernel_size,
    num_channels=num_channels,
    img_width=img_width,
    img_height=img_height
).to(device)

CUDA Device: NVIDIA GeForce RTX 3050 Laptop GPU


In [6]:
print("Loading model from {}".format(model_file))
state = torch.load(model_file)
model.load_state_dict(state['state_dict'])
model.optimizer = state['optimizer']

Loading model from cnn-model.pth


In [9]:
dataset = CnnAutoencoderDataset(
    img_dir=img_dir,
    img_width=img_width,
    img_height=img_height
)

loader = DataLoader(
    dataset,
    batch_size=batch_size,
    shuffle=False,
    drop_last=False
)

In [11]:
raw_data = []

for batch_idx, (data, targets) in enumerate(loader):
    data = data.float().to(device=device)
    compressed_data = model.compress(data)
    
    for item in compressed_data:
        vector = item.detach().cpu().numpy().astype(np.float32)
        raw_data.append(vector)
        
columns = []

for i in range(len(raw_data[0])):
    columns.append("x{}".format(i))
    
df_x = pd.DataFrame(raw_data, columns=columns)

print("Latent Size: {}".format(len(columns)))

print("Normalizing...")

min_max_scaler = preprocessing.MinMaxScaler()

x_scaled = min_max_scaler.fit_transform(df_x.values)

df_x = pd.DataFrame(x_scaled, columns=columns)

print("Writing to file {}".format(output_file))
df_x.to_csv(output_file, index=False, header=columns)

Latent Size: 1024
Normalizing...
Writing to file data.csv
