### Imports

In [1]:
import os
import yaml
import polars as pl

### Experiment description

1. Predict ClusterID for clustering datasets
2. Use PointTransformer
3. Features = ones
4. Homogeneous graph

### Variables

In [2]:
input_folder = '../../../data/nieves'
output_folder = '../../../output/nieves/expt3'
input_expt_folder = '../../../data/nieves/expt3'

### Preprocess data to suitable output location

In [3]:
folders = [output_folder, os.path.join(input_folder, "config/expt3"), input_expt_folder]

for folder in folders:
    if not os.path.exists(folder):
        os.makedirs(folder)

In [4]:
# Code specific to dataset & experiment 1
root_dir = os.path.join(input_folder, "ml_data")
for file in os.listdir(root_dir):
    df = pl.read_csv(os.path.join(root_dir, file))
        
    if file.startswith('gt_'):
        df = df.rename({'':'x', '_duplicated_0':'y', 'moleculeClusterIndex': 'gt_label'})
    elif file.startswith('multiple_blinking'):
        df = df.drop(['error', 'moleculeIndex'])
        df = df.rename({'clusterIndex': 'gt_label'})
        
    # change all cluster id's which are != 0 to 1 so we can predict not clustered or clustered
    df = df.with_columns((pl.when(pl.col('gt_label') != 0)).then(1).otherwise(0).alias('gt_label'))
    
    # save 
    file_name, _ = os.path.splitext(file)
    file_name = os.path.join(input_expt_folder, file_name + '.parquet')
    df.write_parquet(file_name)

In [5]:
# Export config variables to file

config = {}

# The following are the names of the 
# x column, y column, z column if present, channel, frame,
# in the csvs being processed
config['x_col'] = 'x'
config['y_col'] = 'y'
config['z_col'] = None
config['channel_col'] = None #'channel'
config['frame_col'] = None # 'frame'

# The number of dimensions to consider
# If 2 only deals with x and y 
# If 3 will read in and deal with z as well (currently not fully supported)
config['dim'] = 2

# choice of which channels user wants to consider
# if null considers all
config['channel_choice'] = None

# specify string for each channel
config['channel_label'] = None
  #0: egfr
  #1: ereg

# whether to not drop the column containing
# pixel
config['drop_pixel_col'] = False

# files to include: all for everyone 
config['include_files'] = 'all' 

## Choice of input features
config['features'] = []

## GT label per loc or per FOV
config['gt_label_scope'] = 'loc'

## if gt label specified, whih channel is it in (if its label per fov then only one row has to have value), or leave as null
config['gt_label'] = "gt_label"

## specify the gt label map
config['gt_label_map'] = {0: 'unclustered', 1: 'clustered'}

yaml_save_loc = os.path.join(input_folder, 'config/expt3/preprocess.yaml')
with open(yaml_save_loc, "w") as outfile:
        yaml.dump(config, outfile)

In [6]:
# Run the preprocess script using this file

!python ../src/locpix_points/scripts/preprocess.py -i ../../../data/nieves/expt3 -c ../../../data/nieves/config/expt3/preprocess.yaml -o ../../../output/nieves/expt3 -p

List of files which will be processed


['../../../data/nieves/expt3/gt_scenario_8_diffden_MoleculeList_10.parquet', '../../../data/nieves/expt3/gt_scenario_2_normal_50single_MoleculeList_43.parquet', '../../../data/nieves/expt3/gt_scenario_1_CSR_MoleculeList_23.parquet', '../../../data/nieves/expt3/gt_scenario_8_diffden_MoleculeList_22.parquet', '../../../data/nieves/expt3/multiple_blinking_scenario_7_diffsig_DetectionList1.parquet', '../../../data/nieves/expt3/gt_scenario_2_normal_50single_MoleculeList_7.parquet', '../../../data/nieves/expt3/multiple_blinking_scenario_9_dsigsden_new_DetectionList_38.parquet', '../../../data/nieves/expt3/gt_scenario_7_diffsig_MoleculeList_6.parquet', '../../../data/nieves/expt3/gt_scenario_4_lowden_MoleculeList_6.parquet', '../../../data/nieves/expt3/gt_scenario_0_mongrad_MoleculeList_3.parquet', '../../../data/nieves/expt3/multiple_blinking_scenario_4_lowden_DetectionList_34.parquet', '../../../data/nieves/expt3/gt_scenario_1_CSR_MoleculeList_19.parquet', '../../../data/nieves/expt3/gt_sce

diffsig_DetectionList26.parquet', '../../../data/nieves/expt3/gt_scenario_2_normal_50single_MoleculeList_35.parquet', '../../../data/nieves/expt3/multiple_blinking_scenario_6_elliptical_w_noise_DetectionList_41.parquet', '../../../data/nieves/expt3/gt_scenario_0_mongrad_MoleculeList_43.parquet', '../../../data/nieves/expt3/multiple_blinking_scenario_8_diffden_DetectionList49.parquet', '../../../data/nieves/expt3/multiple_blinking_scenario_0_mongrad_DetectionList_17.parquet', '../../../data/nieves/expt3/gt_scenario_7_diffsig_MoleculeList_11.parquet', '../../../data/nieves/expt3/gt_scenario_4_lowden_MoleculeList_50.parquet', '../../../data/nieves/expt3/gt_scenario_0_mongrad_MoleculeList_30.parquet', '../../../data/nieves/expt3/gt_scenario_6_elliptical_w_noise_MoleculeList_23.parquet', '../../../data/nieves/expt3/multiple_blinking_scenario_6_elliptical_w_noise_DetectionList_46.parquet', '../../../data/nieves/expt3/multiple_blinking_scenario_8_diffden_DetectionList4.parquet', '../../../dat





### Process data

In [35]:
# Export config variables 

config = {}

# whether to process the data as heterogeneous or homogenous
# graph
config['hetero'] = False

# train/test/val splits
config['train_ratio'] = 0.7
config['val_ratio'] = 0.1
config['test_ratio'] = 0.2

# what to load into position
# Options: xy, xyz
config['pos']= 'xy'

# what to load into features
# Options: 
config["feat"] = 'uniform'
    
# label level
# graph or node
config["label_level"] = 'node'

yaml_save_loc = os.path.join(input_folder, 'config/expt3/process.yaml')
with open(yaml_save_loc, "w") as outfile:
        yaml.dump(config, outfile)


In [36]:
# Run the process script 

# copy train test split from expt1

!python ../src/locpix_points/scripts/process.py -i ../../../output/nieves/expt3 -c ../../../data/nieves/config/expt3/process.yaml -r ../../../output/nieves/expt1

Overwriting metadata...
Train set...
Processing...
Done!
Val set...
Processing...
Done!
Test set...
Processing...
Done!


### Train

In [39]:
# Export config variables 

config = {}

# device to train on (gpu or cpu)
config['gpu'] = True

# model parameters
config['model'] = "pointtransformerseg"

# optimiser parameters
config['optimiser'] ="adam"
config['lr'] = 0.001
config['weight_decay'] = 0.0001

# training parameters
config['epochs'] = 5
config['batch_size'] = 1
config['num_workers'] = 1 # generall higher -> faster
config['loss_fn'] = "nll"

config['pointtransformerseg'] = {
  "k": 16,
  "in_channels": 1,
  "out_channels": 2,
  "dim_model": [32, 64, 128, 256, 512],
  "k_up": 3, # trilinear interpolation
  "output_mlp_layers": 64,
  # ratio of points to sample when transition down
  "ratio": 0.25,
  "pos_nn_layers": 64,
  "attn_nn_layers": 64,
}

# what trying to predict
config["label_level"] = "node" # graph

# train/val transforms
# options: ['normalisescale', 'jitter', 'flip', 'randscale', 'rotate', 'shear'] # null
config["transforms"] = {
    #'jitter': 15, 
    'x_flip': None, 
    'y_flip': None, 
    #'randscale': [0.95, 1.05], 
    'z_rotate': None, 
    #'shear': 0.05, 
    'normalisescale': None
}

# wandb parameters
config["wandb_project"] = "nieves_expt3"
config["wandb_dataset"] = "nieves"

yaml_save_loc = os.path.join(input_folder, 'config/expt3/train.yaml')
with open(yaml_save_loc, "w") as outfile:
        yaml.dump(config, outfile)

# 1. Choice of augmentations

# 2. Normalise features


In [40]:
# Run the train script 

!python ../src/locpix_points/scripts/train.py -i ../../../output/nieves/expt3 -c ../../../data/nieves/config/expt3/train.yaml



---- Params -----


Input features:  1
Num classes:  2
Batch size:  1
Epochs:  5
Number train graphs 665
Number val graphs 95
[34m[1mwandb[0m: Currently logged in as: [33moliver-umney[0m ([33mteststest[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: wandb version 0.15.5 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade
[34m[1mwandb[0m: Tracking run with wandb version 0.15.4
[34m[1mwandb[0m: Run data is saved locally in [35m[1m/home/oliver/smlm_cloud/locpix-points/experiments/wandb/run-20230714_160526-qtknk9kl[0m
[34m[1mwandb[0m: Run [1m`wandb offline`[0m to turn off syncing.
[34m[1mwandb[0m: Syncing run [33mtoasty-blaze-1[0m
[34m[1mwandb[0m: ⭐️ View project at [34m[4mhttps://wandb.ai/teststest/nieves_expt3[0m
[34m[1mwandb[0m: 🚀 View run at [34m[4mhttps://wandb.ai/teststest/nieves_expt3/runs/qtknk9kl[0m


---- Model summary (estimate) ----


Layer (type:depth-idx)               

Epoch:  0
Epoch:  1
^C
Traceback (most recent call last):
  File "/home/oliver/smlm_cloud/locpix-points/experiments/../src/locpix_points/scripts/train.py", line 262, in <module>
    main()
  File "/home/oliver/smlm_cloud/locpix-points/experiments/../src/locpix_points/scripts/train.py", line 220, in main
    train.train_loop(
  File "/home/oliver/smlm_cloud/locpix-points/src/locpix_points/training/train.py", line 80, in train_loop
    scaler.scale(loss).backward()
  File "/home/oliver/mambaforge/envs/locpix_points/lib/python3.11/site-packages/torch/_tensor.py", line 487, in backward
    torch.autograd.backward(
  File "/home/oliver/mambaforge/envs/locpix_points/lib/python3.11/site-packages/torch/autograd/__init__.py", line 200, in backward
    Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
KeyboardInterrupt
[34m[1mwandb[0m: Waiting for W&B process to finish... [31m(failed 255).[0m Press Control-C to abort syncing.


### Evaluate

In [None]:
# Export config variables

# Run the evaluate script

python src/locpix_points/scripts/evaluate.py -i ../../output/nieves -c src/locpix_points/templates/evaluate.yaml

### Interpret the results

In [None]:
# Pytorch geometric explainability tools