In [16]:
import pandas as pd
import numpy as np
import os
import seaborn as sns
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.utils as vutils
from PIL import Image

#Autoencoder
class Autoencoder(nn.Module):
    def __init__(self, input_size, latent_dim, output_channels=1, img_size=28):
        super(Autoencoder, self).__init__()

        self.encoder = nn.Sequential(
            nn.Linear(input_size, 128),  # Compress input to 128 units
            nn.ReLU(),
            nn.Linear(128, latent_dim)  # Latent space
        )
        
        # Decoder: Fully connected layers to reshape into image
        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, 128),  # Latent to 128 units
            nn.ReLU(),
            nn.Linear(128, output_channels * img_size * img_size),  # Output flattened image size
            nn.Sigmoid()  # Output in range [0, 1]
        )
    
    def forward(self, x):
        x = self.encoder(x)

        image = self.decoder(x)
        image = image.view(-1, 1, 28, 28)
        return image

#Globals
source_dir = "./../dataset/reduced_only_full/"
output_dir = "./../dataset/visuals/"
chunk_size = 100
components = 19
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
auto_encoder = Autoencoder(input_size=components, latent_dim=64, output_channels=1, img_size=28).to(DEVICE)

optimizer = optim.Adam(auto_encoder.parameters(), lr=1e-3)
criterion = nn.MSELoss()

In [None]:
for file in os.listdir(source_dir):
    print(f"Found: {file}")
    if file.endswith(".csv"):
        #Load the dataset
        file_path = os.path.join(source_dir, file)
        df = pd.read_csv(file_path)

        #Base target for writing for results from this file
        folder_base = file.replace(".csv", "")
        # base_hm_path = f"{output_dir}{folder_base}_HM"
        # base_sp_matrix_path = f"{output_dir}{folder_base}_SP_Matrix"
        # base_sp_path = f"{output_dir}{folder_base}_SP"
        base_ae_path = f"{output_dir}{folder_base}_AE"

        #Make the directories
        # os.makedirs(base_hm_path, exist_ok=True)
        # os.makedirs(base_sp_matrix_path, exist_ok=True)
        # os.makedirs(base_sp_path, exist_ok=True)
        os.makedirs(base_ae_path, exist_ok=True)

        #For each chunk
        for i in range(len(df) // chunk_size):
            start_idx = i * chunk_size
            end_idx = start_idx + chunk_size
            df_chunk = df.iloc[start_idx:end_idx]


            #Autoencoder approach
            #Convert to tensor
            data = torch.tensor(df_chunk.values, dtype=torch.float32).to(DEVICE)
            output_image = auto_encoder(data)
            output_image = output_image.cpu().detach().numpy() #NEED to detach before conversion

            output_image = output_image.squeeze(1)

            if output_image.max() <= 1:
                output_image = (output_image * 255).astype(np.uint8)

            grid_image = np.concatenate([np.concatenate(output_image[i*10:(i+1)*10], axis=1) for i in range(10)], axis=0)

            grid_image_pil = Image.fromarray(grid_image.astype(np.uint8))
            grid_image_pil.save(f"{base_ae_path}/AE_PIL_{i+1}.png")

            # #Heatmap image - No way to get back to data points
            # plt.figure(figsize=(10, 8))
            # sns.heatmap(df_chunk.corr(), annot=True, cmap="coolwarm", fmt=".2f")
            # plt.title(f"Correlation Heatmap (Chunk {i+1})")
            # hm_path = os.path.join(base_hm_path, f"Heatmap_{i+1}.png")
            # plt.savefig(hm_path)
            # plt.close()

            # #Pairplot - Takes a long time, 
            # plt.figure(figsize=(15, 15))
            # pair_plot = sns.pairplot(df_chunk, diag_kind='hist')
            # plt.title(f"Scatter Matrix (Chunk {i+1})")
            # sp_matrix_path = os.path.join(base_sp_matrix_path, f"SP_Matrix_{i+1}.png")
            # pair_plot.savefig(sp_matrix_path)
            # plt.close()

            # #3D plot - 8 Features
            # dist = df_chunk['dist'].values
            # h_dist = df_chunk['h_dist'].values
            # v_dist = df_chunk['v_dist'].values
            # avgPower = df_chunk['avgPower'].values 
            # avgSnr = df_chunk['avgSnr'].values
            # avg_pl = df_chunk['avg_pl'].values

            # #3D plotting
            # fig = plt.figure(figsize=(10, 8))
            # ax = fig.add_subplot(111, projection='3d')
            # sc = ax.scatter(dist, h_dist, v_dist, c=avgPower, cmap='viridis', s=avg_pl, alpha=0.7)
            # fig.colorbar(sc, label='Average Power')
            # sp_path = os.path.join(base_sp_path, f"SP_{i+1}.png")
            # plt.savefig(sp_path)
            # plt.close()
            

            print(f"Chunk {i+1} visualization saved for {file}!")

print("All visualizations saved!")

In [None]:
ae_image_path = "../dataset/visuals/2023-12-15_15_41-results_AE/AE_4.png"
image_as_array = plt.imread(ae_image_path) #its a PNG its already in range
image_as_array.flatten()
tensor = torch.tensor(image_as_array, dtype=torch.float32).unsqueeze(0).to(DEVICE)

with torch.no_grad():
        reconstructed_data = auto_encoder.decoder[0:2](tensor)  # Get only the decoder layers
        reconstructed_data = reconstructed_data.cpu().numpy().flatten()

reconstructed_df = pd.DataFrame(reconstructed_data.reshape(1, -1))
print(reconstructed_df)
# latent_vetor = auto_encoder.encoder(tensor)
# latent_vetor = latent_vetor.cpu().detach().numpy()
# print(latent_vetor)