# Imports and data loading

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import sklearn
from pathlib import Path

In [2]:
iterations = 101000
on_remote = False

if on_remote:
    SUBS_data_filename = Path(f'/nobackup/smhid20/users/sm_maran/dpr_data/simulations/QG_samples_SUBS_{iterations}.npy')
else:
    SUBS_data_filename = Path(f'C:/Users/svart/Desktop/MEX/data/QG_samples_SUBS_101000.npy')

X_subs = np.load(SUBS_data_filename).astype('float32')

In [3]:
def generate_data(p_train, k, spinup, spacing):
    N = iterations + 1
    n_train = int(np.round(p_train * (N - spinup)))
    n_val = int(np.round((1 - p_train)/2 * (N - spinup)))

    start, stop = spinup, spinup + n_train
    fit_x, fit_y = slice(start, stop - k), slice(start + k, stop)

    start, stop = stop, stop + n_val
    val_x, val_y = slice(start, stop - k), slice(start + k, stop)

    start, stop = stop, N
    prd_x, prd_y = slice(start, stop - k), slice(start + k, stop)

    # Data
    X_train, Y_train =  X_subs[fit_x], X_subs[fit_y]
    X_val, Y_val =      X_subs[val_x], X_subs[val_y]
    X_test, Y_test =    X_subs[prd_x], X_subs[prd_y]
        
    return X_train[::spacing], Y_train[::spacing], X_val[::spacing], Y_val[::spacing], X_test[::spacing], Y_test[::spacing]

In [13]:
k = 150
spinup = 1001
spacing = 10
p_train = 0.8

X_train, Y_train, X_val, Y_val, X_test, Y_test = generate_data(p_train, k, spinup, spacing)

In [14]:
mean_data = X_train.mean()
std_data = X_train.std()

def normalize_X(X):
    return (X - mean_data)/std_data

X_train_normalized = normalize_X(X_train)
Y_train_normalized = normalize_X(Y_train)

X_val_normalized = normalize_X(X_val)
X_test_normalized = normalize_X(X_test)

# Methods

## PCA

In [None]:
pca = sklearn.decomposition.PCA()
X_transformed_PCA = pca.fit_transform(X_train_normalized)

In [None]:
# Function to reconstruct data using first l components
def reconstruct_data(transformed_data, pca, l):
    return np.dot(transformed_data[:, :l], pca.components_[:l, :]) + pca.mean_

def reconstruction_error(original_data, reconstructed_data):
    return np.mean(np.square(original_data - reconstructed_data))

def plot_image(x):
    x = x.reshape((65,65))
    plt.imshow(x)

def get_encoded(x, l):
    return x[:l]


In [None]:
# Range of l values (number of components)
l_values = [int(l) for l in np.logspace(1, np.log10(4225), 10)]

errors = []

for l in l_values:
    reconstructed_data = reconstruct_data(X_transformed_PCA, pca, l)
    error = reconstruction_error(X_train_normalized, reconstructed_data)
    errors.append(error)

# Plotting the reconstruction errors

plt.figure(figsize=(6,6))
plt.loglog(l_values, errors, marker='o')
plt.grid(True,'both')
plt.xlabel('Number of components (l)')
plt.ylabel('Reconstruction error')
plt.title('Reconstruction error as a function of l')
plt.tight_layout()
plt.show()

In [None]:
l = 100
X_PCA = reconstruct_data(X_transformed_PCA, pca, l)
error = reconstruction_error(X_train_normalized, X_PCA)
print(error)
plot_image(X_PCA[1000])

## Autoencoder

In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader

In [7]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

cuda:0


In [15]:
X_train_tensor = torch.tensor(X_train_normalized, dtype=torch.float32).to(device)  # Convert to tensor

train_dataset = TensorDataset(X_train_tensor[:-1], X_train_tensor[1:])
batch_size = 32  # You can choose a batch size that fits your needs
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

### Linear

In [12]:
class Autoencoder(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(Autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_size, hidden_size[0]),
            #nn.LeakyReLU(inplace=True, negative_slope=0.5),
            #nn.Linear(hidden_size[0], hidden_size[1]),
            #nn.ReLU(True),
            #nn.Linear(hidden_size[1], hidden_size[2]),
        )
        self.decoder = nn.Sequential(
            #nn.Linear(hidden_size[2], hidden_size[1]),
            #nn.ReLU(True),
            #nn.Linear(hidden_size[1], hidden_size[0]),
            #nn.LeakyReLU(inplace=True, negative_slope=0.5),
            nn.Linear(hidden_size[0], input_size),
        )
        self.apply(self.init_weights)


    @staticmethod  
    def init_weights(m):
        if isinstance(m, nn.Linear):
            torch.nn.init.xavier_uniform_(m.weight)

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x


In [None]:
saved_model = False
if not saved_model:
    linear_model = Autoencoder(input_size=4225, hidden_size=(100,))  # Example sizes
    linear_model.to(device)

    criterion = nn.MSELoss()
    optimizer = optim.Adam(linear_model.parameters(), lr=1e-3)
    
    loss_values_linear = []

num_epochs = 50
for epoch in range(num_epochs):
    total_loss=0
    for data in train_loader:
        img,_ = data  # Assuming the dataset returns a tuple (image, label)
        output = linear_model(img)
        loss = criterion(output, img)
        total_loss += loss.item()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    total_loss = total_loss / len(train_loader)
    loss_values_linear.append(total_loss)
    print(f'Epoch [{epoch+1}/{num_epochs}], Average Loss: {total_loss:.4f}')


In [None]:
plt.figure(figsize=(10, 6))
plt.loglog(loss_values_linear, color='blue')
plt.title('Loss as a Function of Epochs\nLinear model')
plt.xlabel('Epoch')
plt.ylabel('Average Loss')
plt.grid(True)
plt.show()

### Convolutional

In [18]:
class ConvAutoencoder(nn.Module):
    def __init__(self):
        super(ConvAutoencoder, self).__init__()
        self.image_size = 65
        
        self.encoder = nn.Sequential(
            nn.Unflatten(1,(1,self.image_size, self.image_size)),
            nn.Conv2d(in_channels=1, out_channels=16, kernel_size=3, padding=1),
            nn.ReLU(True),
            nn.Flatten(start_dim=1),
            nn.Linear(in_features=16*4225, out_features=100),
            )
        
        self.decoder = nn.Sequential(
            nn.Linear(in_features=100, out_features=16*4225),
            nn.ReLU(True),
            nn.Unflatten(1,(16, self.image_size, self.image_size)),
            nn.Conv2d(in_channels=16, out_channels=1, kernel_size=3, padding=1),
            nn.Flatten(start_dim=1),
            )
        
        self.apply(self.init_weights)

    @staticmethod
    def init_weights(m):
        if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
            torch.nn.init.kaiming_uniform_(m.weight, mode='fan_in', nonlinearity='relu')
            if m.bias is not None:
                torch.nn.init.constant_(m.bias, 0)
        elif isinstance(m, nn.BatchNorm2d):
            torch.nn.init.constant_(m.weight, 1)
            torch.nn.init.constant_(m.bias, 0)


    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

In [20]:
# Regular MSE loss

saved_model = True
if not saved_model:
    conv_model = ConvAutoencoder()
    conv_model.to(device)

    criterion = nn.MSELoss()
    optimizer = optim.Adam(conv_model.parameters(), lr=1e-3, weight_decay=0.1)

    loss_values_conv = []

num_epochs = 200

for epoch in range(num_epochs):
    total_loss=0
    for data in train_loader:
        img,_ = data  # Assuming the dataset returns a tuple (image, label)
        output = conv_model(img)
        loss = criterion(output, img)
        total_loss += loss.item()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    total_loss = total_loss / len(train_loader)
    loss_values_conv.append(total_loss)
    print(f'Epoch [{epoch+1}/{num_epochs}], Average Loss: {total_loss:.4f}')

Epoch [1/200], Average Loss: 4.7506
Epoch [2/200], Average Loss: 0.7446
Epoch [3/200], Average Loss: 0.5409
Epoch [4/200], Average Loss: 0.4358
Epoch [5/200], Average Loss: 0.3756
Epoch [6/200], Average Loss: 0.3456
Epoch [7/200], Average Loss: 0.3240
Epoch [8/200], Average Loss: 0.3045
Epoch [9/200], Average Loss: 0.2928
Epoch [10/200], Average Loss: 0.2899
Epoch [11/200], Average Loss: 0.2855


KeyboardInterrupt: 

In [None]:
model_parameters = filter(lambda p: p.requires_grad, conv_model.parameters())
params = sum([np.prod(p.size()) for p in model_parameters])
print(params)

plt.figure(figsize=(10, 6))
plt.loglog(loss_values_conv, color='blue')
plt.title('Loss as a Function of Epochs\nConv model')
plt.xlabel('Epoch')
plt.ylabel('Average Loss')
plt.grid(True)
plt.show()

## Regularized Loss CAE

In [24]:
# More interesting loss

criterion = nn.MSELoss()
alpha = 1
saved_model = False
if not saved_model:
    conv_model = ConvAutoencoder()
    conv_model.to(device)

    optimizer = optim.Adam(conv_model.parameters(), lr=1e-3)

    loss_values_conv = []

num_epochs = 10

for epoch in range(num_epochs):
    total_loss_reconstruction = 0
    total_loss_temporal = 0
    total_loss = 0
    
    for current_state, next_state in train_loader:

        current_latent = conv_model.encoder(current_state)
        next_latent = conv_model.encoder(next_state)
        
        loss_temporal = criterion(current_latent, next_latent)

        current_reconstruction = conv_model.decoder(current_latent)
        next_reconstruction = conv_model.decoder(next_latent)
        
        loss_reconstruction = criterion(current_reconstruction, current_state) + \
                              criterion(next_reconstruction, next_state)
        
        loss = loss_reconstruction + alpha * loss_temporal
        
        total_loss += loss.item()
        total_loss_reconstruction += loss_reconstruction.item()
        total_loss_temporal += loss_temporal.item()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    total_loss_reconstruction /= 2 * len(train_loader)
    total_loss_temporal /= len(train_loader)
    total_loss /= len(train_loader)

    loss_values_conv.append([total_loss, total_loss_reconstruction, total_loss_temporal])
    print(f'Epoch [{epoch+1}/{num_epochs}], Average Loss: {total_loss:.4f}, Reconstruction: {total_loss_reconstruction:.4f}, Temporal: {total_loss_temporal:.4f}')

Epoch [1/10], Average Loss: 5.8560, Reconstruction: 2.8750, Temporal: 0.1060
Epoch [2/10], Average Loss: 1.7105, Reconstruction: 0.8400, Temporal: 0.0305


KeyboardInterrupt: 

## Evaluation

In [None]:
X_val_normalized = normalize_X(X_val)
X_val_tensor = torch.tensor(X_val_normalized, dtype=torch.float32).to(device)  # Convert to tensor

In [None]:
model = conv_model

l = 100
X_PCA_test = reconstruct_data(pca.transform(X_val_normalized), pca, l)
X_AE_test = model(X_val_tensor).cpu().detach().numpy()

error_pca = reconstruction_error(X_val_normalized, X_PCA_test)
error_ae = reconstruction_error(X_val_normalized, X_AE_test)
print(error_pca, error_ae)

In [None]:
model.eval()

cmap = 'viridis'

random_index = np.random.randint(len(X_test))
image_shape = (65, 65)  # Replace with the actual shape of your images

img_train = X_val_normalized[random_index].reshape(image_shape)

img_pca = X_PCA_test[random_index].reshape(image_shape)

X_AE = model(X_val_tensor[random_index].unsqueeze(0)).cpu().detach().numpy()
img_ae =  X_AE.reshape(image_shape)

X_enc = model.encoder(X_val_tensor[random_index].unsqueeze(0)).cpu().detach().numpy()
img_enc =  X_enc.reshape((10,10))

#img_enc = get_encoded(X_PCA_test[random_index], l).reshape((10,10))


vmin = min(img_pca.min(), img_train.min(), img_ae.min())
vmax = max(img_pca.max(), img_train.max(), img_ae.max())


fig, axes = plt.subplots(2, 3, figsize=(10, 7))
axes[0,0].imshow(img_train, cmap=cmap, vmin=vmin, vmax=vmax)
axes[0,0].set_title('Original Image')
axes[0,0].axis('off')

axes[0,1].imshow(img_pca, cmap=cmap, vmin=vmin, vmax=vmax)
axes[0,1].set_title(f'PCA Image')
axes[0,1].axis('off')

axes[0,2].imshow(img_ae, cmap=cmap, vmin=vmin, vmax=vmax)
axes[0,2].set_title(f'Autoencoder Image')
axes[0,2].axis('off')

axes[1,0].plot(img_enc.flatten())
axes[1,0].set_title('Encoded Image')
axes[1,0].axis('off')

vmin_diff = min((img_pca-img_train).min(), (img_ae-img_train).min())
vmax_diff = max((img_pca-img_train).max(), (img_ae-img_train).max())

axes[1,1].imshow(img_pca- img_train, cmap=cmap, vmin=vmin_diff, vmax=vmax_diff)
er_PCA = np.round(reconstruction_error(img_train, img_pca),4)
axes[1,1].set_title(f'{str(er_PCA)}')
axes[1,1].axis('off')

axes[1,2].imshow(img_ae-img_train, cmap=cmap, vmin=vmin_diff, vmax=vmax_diff)
er_AE = np.round(reconstruction_error(img_train, img_ae), 4)
axes[1,2].set_title(f'{str(er_AE)}')
axes[1,2].axis('off')

plt.show()

In [None]:
model.eval()

cmap = 'viridis'
random_index = np.random.randint(len(X_test))
l=100
image_shape = (65, 65)  # Replace with the actual shape of your images
latent_shape = (10, 10)  # Replace with the actual shape of your images

img_train = X_val_normalized[random_index].reshape(image_shape)
img_train_next = X_val_normalized[random_index+1].reshape(image_shape)

img_pca = X_PCA_test[random_index].reshape(image_shape)
img_pca_next = X_PCA_test[random_index+1].reshape(image_shape)

img_ae = model(X_val_tensor[random_index].unsqueeze(0)).cpu().detach().numpy().reshape(image_shape)
img_ae_next = model(X_val_tensor[random_index+1].unsqueeze(0)).cpu().detach().numpy().reshape(image_shape)

img_enc = get_encoded(X_PCA_test[random_index], l)
img_enc_next = get_encoded(X_PCA_test[random_index+1],l)

vmin = min(img_pca.min(), img_train.min(), img_train_next.min(), img_pca_next.min())
vmax = max(img_pca.max(), img_train.max(), img_train_next.max(), img_pca_next.max())

fig, axes = plt.subplots(3,3, figsize=(12, 12))

# Train
axes[0,0].imshow(img_train, cmap=cmap, vmin=vmin, vmax=vmax)
axes[0,0].set_title('Original Image')
axes[0,0].axis('off')

axes[1,0].imshow(img_train_next, cmap=cmap, vmin=vmin, vmax=vmax)
axes[1,0].axis('off')

gradient_x, gradient_y = np.gradient(img_train_next - img_train)
X, Y = np.meshgrid(np.arange(gradient_x.shape[1]), np.arange(gradient_y.shape[0]))
axes[2,0].imshow(img_train_next - img_train, cmap=cmap, vmin=vmin, vmax=vmax)
axes[2,0].quiver(X, Y, gradient_x, gradient_y)
axes[2,0].axis('off')


# PCA
axes[0,1].imshow(img_pca, cmap=cmap, vmin=vmin, vmax=vmax)
axes[0,1].set_title(f'PCA Image')
axes[0,1].axis('off')

axes[1,1].imshow(img_pca_next, cmap=cmap, vmin=vmin, vmax=vmax)
axes[1,1].axis('off')

# Compute the difference
difference = img_pca_next - img_pca
gradient_x, gradient_y = np.gradient(difference)
axes[2,1].imshow(img_pca_next - img_pca, cmap=cmap, vmin=vmin, vmax=vmax)
axes[2,1].quiver(X, Y, gradient_x, gradient_y)
axes[2,1].axis('off')

# AE
axes[0,2].imshow(img_ae, cmap=cmap, vmin=vmin, vmax=vmax)
axes[0,2].set_title(f'AE Image')
axes[0,2].axis('off')

axes[1,2].imshow(img_ae_next, cmap=cmap, vmin=vmin, vmax=vmax)
axes[1,2].axis('off')

# Compute the difference
difference = img_ae_next - img_ae
gradient_x, gradient_y = np.gradient(difference)
axes[2,2].imshow(img_ae_next - img_ae, cmap=cmap, vmin=vmin, vmax=vmax)
axes[2,2].quiver(X, Y, gradient_x, gradient_y)
axes[2,2].axis('off')


plt.show()

In [None]:
model.eval()

cmap = 'viridis'
random_index = np.random.randint(len(X_test))
another_random_index = random_index + 1 #np.random.randint(len(X_test))

l=100
image_shape = (65, 65)  # Replace with the actual shape of your images
latent_shape = (10, 10)  # Replace with the actual shape of your images

img_enc_pca = get_encoded(X_PCA_test[random_index], l)
img_enc_pca_next = get_encoded(X_PCA_test[another_random_index],l)

vmin_pca = min(img_enc_pca.min(), img_enc_pca_next.min())
vmax_pca = max(img_enc_pca.max(), img_enc_pca_next.max())

err_pca = str(np.round(np.mean(((img_enc_pca_next - img_enc_pca)/(vmax_pca - vmin_pca))**2),4))


img_enc_ae = model.encoder(X_val_tensor[random_index].unsqueeze(0)).cpu().detach().numpy().flatten()
img_enc_ae_next = model.encoder(X_val_tensor[another_random_index].unsqueeze(0)).cpu().detach().numpy().flatten()

vmin_ae = min(img_enc_ae.min(), img_enc_ae_next.min())
vmax_ae = max(img_enc_ae.max(), img_enc_ae_next.max())

err_ae = str(np.round(np.mean(((img_enc_ae_next - img_enc_ae)/(vmax_ae - vmin_ae))**2),4))


fig, axes = plt.subplots(3,2, figsize=(12, 12))

axes[0,0].plot(img_enc_pca)
axes[0,0].set_title(f'PCA\n{err_pca}')
axes[0,0].set_ylim([vmin_pca, vmax_pca])
axes[1,0].plot(img_enc_pca_next)
axes[1,0].set_ylim([vmin_pca, vmax_pca])
axes[2,0].plot(img_enc_pca_next - img_enc_pca)

axes[0,1].plot(img_enc_ae)
axes[0,1].set_title(f'AE\n{err_ae}')
axes[0,1].set_ylim([vmin_ae, vmax_ae])
axes[1,1].plot(img_enc_ae_next)
axes[1,1].set_ylim([vmin_ae, vmax_ae])
axes[2,1].plot(img_enc_ae_next - img_enc_ae)

plt.show()
