In [1]:
import pyarrow.parquet as pa
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import seaborn as sns
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader
from torch.nn import functional as F
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from category_encoders import BinaryEncoder
from sklearn.preprocessing import MinMaxScaler
import torch
#from tensorboardX import SummaryWriter
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from kneed import KneeLocator
import matplotlib.cm as cm

In [38]:
class PCAAutoencoder(nn.Module):
    def __init__(self, encoder, decoder, last_hidden_shape):
        super().__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.last_hidden_shape = last_hidden_shape
        self.bottleneck = nn.Linear(in_features=self.last_hidden_shape, out_features=1)
        self.bottleneck_2 = None


    def increase_latentdim(self):
        # Combining bottleneck and bottleneck expansion layer and turning off gradients
        if self.bottleneck_2:
            new_bottleneck = nn.Linear(in_features=self.last_hidden_shape, out_features=self.bottleneck.out_features + 1)
            new_bottleneck.weight = nn.Parameter(torch.cat((self.bottleneck.weight, self.bottleneck_2.weight)), requires_grad=False)
            new_bottleneck.bias = nn.Parameter(torch.cat((self.bottleneck.bias, self.bottleneck_2.bias)), requires_grad=False)
            self.bottleneck = new_bottleneck
        # Creating new bottleneck expansion layer
        self.bottleneck_2 = nn.Linear(self.last_hidden_shape, 1)
        self.bottleneck_2.requires_grad_(requires_grad=True)
        # Turning off gradients for all layers in encoder (just in case)
        for layer in self.encoder:
            layer.requires_grad_(requires_grad=False)
        self._recreate_decoder()


    def _recreate_decoder(self):
        # Copying old decoder to new
        new_decoder = nn.ModuleList()
        # Increasing first dimension in first layer
        for i, layer in enumerate(self.decoder):
            if i == 0 and isinstance(layer, nn.Linear):
                new_decoder.append(nn.Linear(in_features=layer.in_features + 1, out_features=layer.out_features))
            else:
                new_decoder.append(layer)
        # Reinitializing weights
        for new_layer in new_decoder:
            self.init_weights(new_layer)
        
        self.decoder = new_decoder


    # Reinitialize weights
    def init_weights(self, m):
        if isinstance(m, nn.Linear):  # Check layer type
            nn.init.xavier_uniform_(m.weight)  # Xavier initialization
        if m.bias is not None:
            nn.init.zeros_(m.bias)  # Bias initialized to 0


    def encode(self, x):
        for layer in self.encoder:
            x = layer(x)
        if not self.bottleneck_2:
            x = self.bottleneck(x)
            return x
        else:
            print('yes')
            self.current_bn_weight = torch.cat((self.bottleneck.weight, self.bottleneck_2.weight))
            self.current_bn_bias = torch.cat((self.bottleneck.bias, self.bottleneck_2.bias))
            x = x@self.current_bn_weight.T + self.current_bn_bias
            return x


    def decode(self, x):
        for layer in self.decoder:
            x = layer(x)


    def forward(self, x):
        x = self.encode(x)
        x = self.decode(x)
        return x


In [42]:
# Define sizes for the layers
layer_sizes = [8, 4]  # Example decreasing sizes for the encoder

# Create the encoder ModuleList
encoder = nn.ModuleList()
for i in range(len(layer_sizes) - 1):
    encoder.append(nn.Linear(layer_sizes[i], layer_sizes[i + 1]))
    encoder.append(nn.ReLU())

# Create the decoder ModuleList (mirror of the encoder)
decoder = nn.ModuleList()
for i in range(len(layer_sizes) - 1, 0, -1):
    decoder.append(nn.Linear(layer_sizes[i], layer_sizes[i - 1]))
    decoder.append(nn.ReLU())

# Remove the last ReLU from the decoder (optional, depending on use case)
decoder = decoder[:-1]

In [None]:
#TODO look into why requires grads are turned on/off weirdly

In [39]:
model = PCAAutoencoder(encoder, decoder, 4)

In [43]:
model = PCAAutoencoder(encoder, decoder, 4)
for i in range(3):
    print("\nencoder\n")
    for layer in model.encoder.parameters():
        print(layer.requires_grad)
    print("\nbottleneck\n")
    for layer in model.bottleneck.parameters():
        print(layer.requires_grad)
    if model.bottleneck_2:
        for layer in model.bottleneck_2.parameters():
            print(layer.requires_grad)
    print("\ndecoder\n")
    for layer in model.decoder.parameters():
        print(layer.requires_grad)
    model.increase_latentdim()


encoder

True
True

bottleneck

True
True

decoder

True
True

encoder

False
False

bottleneck

True
True
True
True

decoder

True
True

encoder

False
False

bottleneck

False
False
True
True

decoder

True
True


In [13]:
for i in model.bottleneck.parameters():
    print(i)
    print(i.requires_grad)

Parameter containing:
tensor([[ 0.0605,  0.0390, -0.0118,  0.2159],
        [-0.3686, -0.2754,  0.0876, -0.2395],
        [-0.4354,  0.1305,  0.2570,  0.1123]])
False
Parameter containing:
tensor([-0.3499, -0.0935,  0.3131])
False


In [6]:
for i in model.encoder.parameters():
    print(type(i))

<class 'torch.nn.parameter.Parameter'>
<class 'torch.nn.parameter.Parameter'>


In [50]:
model.encode(torch.ones(4, 8))

yes


tensor([[-0.4127,  0.2841, -0.4800, -0.1114],
        [-0.4127,  0.2841, -0.4800, -0.1114],
        [-0.4127,  0.2841, -0.4800, -0.1114],
        [-0.4127,  0.2841, -0.4800, -0.1114]], grad_fn=<AddBackward0>)

In [46]:
model.current_bn_weight.requires_grad

True

In [48]:
for i in range(model.current_bn_weight.shape[1]):
    print(model.current_bn_weight[:, i].requires_grad)

True
True
True
True


In [51]:
import torch

# Create two separate tensors
tensor_requires_grad = torch.randn(3, requires_grad=True)  # This will track gradients
tensor_no_grad = torch.randn(2).detach()  # This will not track gradients

# Concatenate them into a single tensor
combined_tensor = torch.cat([tensor_requires_grad, tensor_no_grad])

# Check requires_grad property
print(combined_tensor.requires_grad)  # Output: True, but only parts of it contribute to gradient updates

True


In [53]:
for i in combined_tensor:
    print(i.requires_grad)

True
True
True
True
True


In [57]:
import torch

# Step 1: Create tensors where some elements require gradients and some don't
x_requires_grad = torch.randn(3, requires_grad=True)  # Will track gradients
x_no_grad = torch.randn(2).detach()  # This part should NOT track gradients

# Concatenate into a single tensor
x = torch.cat([x_requires_grad, x_no_grad])

# **Fix: Explicitly detach the non-gradients part**
x[3:].detach()  # Now the last two elements should not receive gradients

# Retain grad for debugging
x.retain_grad()

print("x.requires_grad:", x.requires_grad)  # True, but only some elements should contribute

# Step 2: Define a simple loss function (sum of all elements)
loss = x.sum()

# Step 3: Compute gradients
loss.backward()

# Step 4: Display gradients for each element
print("Gradients of x:")
print(x.grad)  # Now only first 3 elements should have gradients

# Step 5: Manually check gradients for each element
print("\nManually checking gradients:")
for i, g in enumerate(x.grad):
    print(f"x[{i}] gradient: {g.item()} (requires_grad={x[i].requires_grad})")

x.requires_grad: True
Gradients of x:
tensor([1., 1., 1., 1., 1.])

Manually checking gradients:
x[0] gradient: 1.0 (requires_grad=True)
x[1] gradient: 1.0 (requires_grad=True)
x[2] gradient: 1.0 (requires_grad=True)
x[3] gradient: 1.0 (requires_grad=True)
x[4] gradient: 1.0 (requires_grad=True)
