# Clean file with modules

In [None]:
import os
import sys
from datetime import datetime
parent_dir = os.path.abspath(os.path.join(os.getcwd(), '..'))
sys.path.append(parent_dir)
import pytorch_lightning as pl
from pytorch_lightning.loggers import WandbLogger
from torch import nn
import torch.nn.functional as F
from simspice.data import SproutDataset
from torch.utils.data import DataLoader
from pytorch_lightning.callbacks import ModelCheckpoint
import torch
import numpy as np
import matplotlib.pyplot as plt
import hdbscan

import matplotlib.pyplot as plt
from lightly.loss import NTXentLoss

from simspice.utils import inverse_mapping_functions as imf
from simspice.models import Siamese_Architecture_Transformer as SA

import umap.umap_ as umap
import tqdm
plt.rcParams['image.origin'] = 'lower'

BATCH_SIZE = 64

simspice = 'C:\\Users\\tania\\Documents\\CU Boulder\\CU Fall 2024\\ASEN 6337\\Individual project\\SimSPICE\\'

In [None]:
dataset_path = "C:\\Users\\tania\\Documents\\SPICE\\SPROUTS\\datasets_deepL\\spectra_Feb2023.nc"
dataset_log = SproutDataset.SproutDataset(dataset_path=dataset_path, augmentation_type='single', log_space=True, normalize_intensity=False)

dataloader = DataLoader(
            dataset_log,
            batch_size=BATCH_SIZE,
            shuffle=True)

In [None]:
model = SA.SimSiam(output_dim=64, backbone_output_dim=128, hidden_layer_dim=128)
accelerator = "gpu" if torch.cuda.is_available() else "cpu"

wandb_logger = WandbLogger(project="runs_single_augmentation", name=f"miniTrained_outdim64_log_transformer_15epochs {datetime.today().strftime('%Y-%m-%d')}", log_model=True)

trainer = pl.Trainer(max_epochs=15, devices=1, accelerator=accelerator, logger=wandb_logger)
trainer.fit(model=model, train_dataloaders=dataloader)

## If loading from previous checkpoint

In [None]:
# checkpoint = simspice+"\\notebooks\\FullDataset_64_doubleAug_normalized_spec\\k81c85sl\\checkpoints\\epoch=4-step=9075.ckpt"
# model = SA.SimSiam.load_from_checkpoint(checkpoint)  # Continue epochs

# accelerator = "gpu" if torch.cuda.is_available() else "cpu"
# wandb_logger = WandbLogger(project="FullDataset_64_doubleAug_normalized_spec", log_model=True)
# trainer = pl.Trainer(max_epochs=10, devices=1, accelerator=accelerator, logger=wandb_logger)
# trainer.fit(model=model, train_dataloaders=dataloader)

## Run the trained model on normal dataset

In [None]:
dataset_none = SproutDataset.SproutDataset(dataset_path=dataset_path, augmentation_type=None)
# outputs = SA.run_model(checkpoint, dataset_none)
model.eval()
outputs = []
with torch.no_grad():  # Disable gradient computation for inference
    for i in tqdm.tqdm(range (dataset_none.__len__())):
        spec = dataset_none.__getitem__(i).unsqueeze(0)
        # Move tensor to the same device as the model
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        model = model.to(device)
        spec = spec.to(device)

        outputs.append(model(spec)[0].cpu().numpy())

In [None]:
stacked_outputs = np.stack(outputs).squeeze()
stacked_outputs.shape
np.save(simspice+'jobs\\model_outputs\\stacked_outputs_single64_feb23_log_transformer.npy', stacked_outputs)

In [None]:
stacked_outputs = np.load(simspice+'jobs\\model_outputs\\stacked_outputs_single64_feb23_log_transformer.npy')

## Clustering with hdbscan

In [None]:
for x in [10, 20, 30]:
    for y in tqdm.tqdm([5, 10, 15]):
        clusterer = hdbscan.HDBSCAN(min_cluster_size=x, min_samples=y, metric='euclidean')
        clusterer.fit(stacked_outputs)
        labels = clusterer.labels_
        np.save(simspice+f'jobs\\clustering\\Feb23trained_single_transformer_minclus{x}_minsamp{y}.npy', labels)

In [None]:
c=0
plt.figure(figsize=(15,12), tight_layout=True)
for x in [10, 20, 30]:
    for y in [5, 10, 15]:
        c+=1
        plt.subplot(4,3,c)
        labels = np.load(simspice+f'jobs\\clustering\\Feb23trained_single_transformer_minclus{x}_minsamp{y}.npy')
        imf.map_clusters(labels, dataset_path=dataset_path, selected_clusters=None)
        plt.title(f"min_cluster = {x}\nmin_samples = {y}")
plt.suptitle('Feb23trained_single_transformer 15 epochs')
        

In [None]:
from matplotlib.colors import Normalize

In [None]:
imf.map_item_map(item_nbr=9899, dataset="C:\\Users\\tania\\Documents\\SPICE\\SPROUTS\\spectra_11Feb2023.nc", plot=False, title=' ')

In [None]:
imf.plot_n_random_spectra_cluster(labels, stacked_outputs, 1, dataset, nbr_items=3, plot_on_map=True,
                                  dataset_path="C:\\Users\\tania\\Documents\\SPICE\\SPROUTS\\spectra_train_mini.nc")