# Clean file with modules

In [None]:
import pytorch_lightning as pl
from pytorch_lightning.loggers import WandbLogger
from torch import nn
import torch.nn.functional as F
from SproutDataset import SproutDataset
from torch.utils.data import DataLoader
from pytorch_lightning.callbacks import ModelCheckpoint
import torch
import numpy as np
import matplotlib.pyplot as plt
import hdbscan

import matplotlib.pyplot as plt
from lightly.loss import NTXentLoss

import inverse_mapping_functions as imf
import Siamese_Architecture as SA

import umap.umap_ as umap
import tqdm
plt.rcParams['image.origin'] = 'lower'

BATCH_SIZE = 64

In [None]:
# dataset_path = "C:\\Users\\tania\\Documents\\SPICE\\SPROUTS\\spectra_11Feb2023.nc"
dataset_path = "C:\\Users\\tania\\Documents\\SPICE\\SPROUTS\\spectra_11Feb2023.nc"
dataset = SproutDataset(dataset_path=dataset_path, augmentation_type='single', log_space=True, normalize_intensity=True)
dataloader = DataLoader(
            dataset,
            batch_size=BATCH_SIZE,
            shuffle=True)

In [None]:
# checkpoint = "C:\\Users\\tania\\Documents\\CU Boulder\\CU Fall 2024\\ASEN 6337\\Individual project\\SPICE_DeepLearning\\CUrc\\64-single-fulldata_gain01-3\\epoch=10-step=91500.ckpt"
# model = SA.SimSiam.load_from_checkpoint(checkpoint)  # Continue epochs 

model = SA.SimSiam(output_dim=32, backbone_output_dim=128, hidden_layer_dim=128)

# wandb_logger = WandbLogger(project="FullDataset_64_singleAug")#, log_model=True)
accelerator = "gpu" if torch.cuda.is_available() else "cpu"

In [None]:
trainer = pl.Trainer(max_epochs=5, devices=1, accelerator=accelerator)#, logger=wandb_logger)
trainer.fit(model=model, train_dataloaders=dataloader)

In [None]:
dataset_none = SproutDataset(dataset_path="C:\\Users\\tania\\Documents\\SPICE\\SPROUTS\\spectra_11Feb2023.nc", augmentation_type=None)
# outputs = SA.run_model(checkpoint, dataset_none)
model.eval()
outputs = []
with torch.no_grad():  # Disable gradient computation for inference
    for i in tqdm.tqdm(range (dataset_none.__len__())):
        spec = dataset_none.__getitem__(i).unsqueeze(0)
        # Move tensor to the same device as the model
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        model = model.to(device)
        spec = spec.to(device)

        outputs.append(model(spec)[0].cpu().numpy())

In [None]:
checkpoint = "C:\\Users\\tania\Documents\CU Boulder\CU Alpine\models_ckpts\single_epoch=4-step=45750.ckpt"
dataset_none = SproutDataset(dataset_path="C:\\Users\\tania\\Documents\\SPICE\\SPROUTS\\spectra_11Feb2023.nc", augmentation_type=None)
outputs = SA.run_model(checkpoint, dataset_none)

In [None]:
stacked_outputs = np.stack(outputs).squeeze()
stacked_outputs.shape

In [None]:
np.save('saved_outputs//stacked_outputs_single64_feb23_norm_log.npy', stacked_outputs)

In [None]:
stacked_outputs = np.load('saved_outputs//stacked_outputs_single64_feb23_norm.npy')

In [None]:
for x in [10, 20, 30]:
# for x in [20, 30]:
    for y in tqdm.tqdm([2, 5, 10]):
        clusterer = hdbscan.HDBSCAN(min_cluster_size=x, min_samples=y, metric='euclidean') # <=> cosine?
        clusterer.fit(stacked_outputs)
        labels = clusterer.labels_
        np.save(f'saved_outputs//norm_out32_Feb23_minclus{x}_minsamp{y}.npy', labels)

In [None]:
c=0
plt.figure(figsize=(12,15))
for x in [10, 20, 30]:
    for y in [2, 5, 10]:
        c+=1
        plt.subplot(4,3,c)
        labels = np.load(f'saved_outputs//norm_out32_Feb23_minclus{x}_minsamp{y}.npy')
        # labels = np.load(f"C:\\Users\\tania\Documents\\CU Boulder\CU Fall 2024\\ASEN 6337\\Individual project\\SPICE_DeepLearning\saved_outputs_1Apr25\labels-cos_single64_Feb23_minclus{x}_minsamp{y}.npy")
        imf.map_clusters(labels, dataset_path="C:\\Users\\tania\\Documents\\SPICE\\SPROUTS\\spectra_11Feb2023.nc", selected_clusters=None)
        plt.title(f"min_cluster = {x}\nmin_samples = {y}")
plt.suptitle('norm_out32_Feb23_minclus_single')
        

In [None]:
reducer = umap.UMAP(n_neighbors=50, min_dist=0.1, n_components=2, random_state=42)
projected_data = reducer.fit_transform(stacked_outputs)

In [None]:
#np.save('saved_outputs//umap_data_64sin_fulldata_nei15_dist01_50_5.npy', projected_data)

In [None]:
projected_data = np.load('saved_outputs//umap_data_64sin_fulldata_nei15_dist01_50_5.npy')
# stacked_outputs = np.load('saved_outputs//stacked_outputs_64_fulldata.npy')
#labels = np.load('saved_outputs//labels_single64_fulldata_minclus50_minsamp5_unnormalized.npy')

In [None]:
from matplotlib.colors import Normalize

In [None]:
target_clusters = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15]  # List of cluster labels to plot
selected_points = projected_data[np.isin(labels, target_clusters)]
selected_labels = labels[np.isin(labels, target_clusters)]
norm = Normalize(vmin=labels.min(), vmax=labels.max())

plt.figure(figsize=(10, 6))
plt.scatter(selected_points[:, 0], selected_points[:, 1], c=selected_labels, cmap='tab20', norm=norm, s=3)
plt.title(f"Scatter Plot for Clusters {target_clusters} out of [{labels.min()} - {labels.max()}]\nmin_cluster_size=30, min_samples=5")
plt.colorbar()
plt.show()

In [None]:
plt.figure(figsize=(10, 14))
plt.subplot(211)
scatter = plt.scatter(projected_data[:, 0], projected_data[:, 1], c=labels, cmap='tab20', s=1)
plt.colorbar()
plt.title('UMAP Projection to 2D')
plt.xlabel('UMAP Dimension 1')
plt.ylabel('UMAP Dimension 2')
plt.subplot(212)
plt.hist2d(projected_data[:, 0], projected_data[:, 1], bins=200)
plt.colorbar()
plt.grid(True)
plt.title('Density histogram')
plt.show()

In [None]:
labels = np.load('saved_outputs//clustered_outputs_single64_fulldata_minclus50_minsamp5.npy')

In [None]:
imf.map_clusters(labels, dataset_path="C:\\Users\\tania\\Documents\\SPICE\\SPROUTS\\spectra_train.nc", selected_clusters=None)

In [None]:
imf.map_item_map(item_nbr=9899, dataset="C:\\Users\\tania\\Documents\\SPICE\\SPROUTS\\spectra_11Feb2023.nc", plot=False, title=' ')

In [None]:
imf.plot_n_random_spectra_cluster(labels, stacked_outputs, 1, dataset, nbr_items=3, plot_on_map=True,
                                  dataset_path="C:\\Users\\tania\\Documents\\SPICE\\SPROUTS\\spectra_train_mini.nc")