In [None]:
import os, sys
project_dir = os.path.join(os.getcwd(),'./')
if project_dir not in sys.path:
    sys.path.append(project_dir)

medmnist_dir = os.path.join(project_dir, 'modules/MedMNIST')
if medmnist_dir not in sys.path:
    sys.path.append(medmnist_dir)

ipdl_dir = os.path.join(project_dir, 'modules/IPDL')
if ipdl_dir not in sys.path:
    sys.path.append(ipdl_dir)    

from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import torch

In [None]:
from experiment.autoencoder import PCAE

pcae_exp = PCAE() 

# Pretrain (FMNIST)

In [None]:
from torchvision import datasets
from torchvision.transforms import Compose, Resize, ToTensor
from torch.utils.data import DataLoader

transforms = Compose([Resize((64, 64)), ToTensor()])

train_set = datasets.FashionMNIST("dataset", train=True, transform=transforms, download=True)
test_set = datasets.FashionMNIST("dataset", train=False, transform=transforms, download=True)
train_loader = DataLoader(train_set, batch_size=128, shuffle=True)
val_loader = DataLoader(test_set, batch_size=128, shuffle=False)

In [None]:
from torch.utils.tensorboard import SummaryWriter

tb_writer = SummaryWriter('logs/{}/AE_FMNIST'.format(pcae_exp.model_name))
pcae_exp.train(train_loader, val_loader, tb_writer, n_epoch=250)

## Save results

### MI Estimation

In [None]:
df = pcae_exp.ip.to_df()
df.to_pickle('data/PCAE/MI/AE_FMNIST.pkl')

### Model state dict

In [None]:

torch.save(pcae_exp.model.state_dict(), 'data/PCAE/weights/AE/FMNIST.pt')

# Fitting phase

Usando la base de datos del MedMNIST

In [None]:
import medmnist
from medmnist import INFO

# data_flag = 'breastmnist'
data_flag = 'tissuemnist'
download = True

info = INFO[data_flag]
task = info['task']
n_channels = info['n_channels']
n_classes = len(info['label'])

DataClass = getattr(medmnist, info['python_class'])

In [None]:
from torchvision.transforms import Compose, Resize, ToTensor

data_transform = Compose([
    Resize((64, 64)),
    ToTensor(),
])

train_dataset = DataClass(split='train', transform=data_transform, download=download)
eval_dataset = DataClass(split='test', transform=data_transform, download=download)
test_dataset = DataClass(split='test', transform=data_transform, download=download)

In [None]:
128/8

## Reduce dataset

In [None]:
from imblearn.under_sampling import RandomUnderSampler

# sampling_strategies = [{0: 128, 1: 128}, {0: 16, 1: 16}] # Breast dataset
sampling_strategies = [ # Tissue Dataset
        {0: 1024, 1: 1024, 2: 1024, 3: 1024, 4: 1024, 5: 1024, 6: 1024, 7: 1024},
        {0: 16, 1: 16, 2: 16, 3: 16, 4: 16, 5: 16, 6: 16, 7: 16}
    ] 

datasets = [train_dataset, eval_dataset]

for idx, dataset in enumerate(datasets):
    x = dataset.imgs
    y = dataset.labels

    sampling_strategy = sampling_strategies[idx]
    undersampler = RandomUnderSampler(sampling_strategy=sampling_strategy, random_state=123)
    X_resampled, y_resampled = undersampler.fit_resample(x.reshape((x.shape[0], -1)), y.flatten())

    dataset.imgs = X_resampled.reshape((-1, x.shape[1], x.shape[2]))
    dataset.labels = y_resampled.reshape((-1, y.shape[1]))

In [None]:
print('Train Dataset: {} samples'.format(len(train_dataset)))
print('Eval Dataset: {} samples'.format(len(eval_dataset)))

## Training

In [None]:
pcae_exp.load_model(os.path.join(project_dir, 'data/PCAE/weights/AE/FMNIST.pt'))

In [None]:
from torch.utils.tensorboard import SummaryWriter
from torch.utils.data import DataLoader

train_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)
eval_loader = DataLoader(dataset=eval_dataset, batch_size=128, shuffle=False)

tb_writer = SummaryWriter('logs/{}/AE_TISSUE'.format(pcae_exp.model_name))
pcae_exp.train(train_loader, eval_loader, tb_writer, n_epoch=500)

## Save results

### MI Estimation

In [None]:
df = pcae_exp.ip.to_df()
df.to_pickle('data/PCAE/MI/AE_TISSUE.pkl')

### Model state dict

In [None]:
torch.save(pcae_exp.model.state_dict(), 'data/PCAE/weights/AE/TISSUE.pt')