### Imports

In [1]:
import os
import yaml
import polars as pl

### Experiment description

1. Predict Channel for Cell Pellet dataset
2. Use PointTransformer
3. Features = Everything except channels
4. Homogeneous graph

## Data

The data is from the dc_tg folder i.e. it is drift corrected and temporal grouped and that is it

### Variables

In [2]:
input_folder = '../../../data/c15'
output_folder = '../../../output/c15/expt5'
input_expt_folder = '../../../data/c15/expt5'

### Preprocess data to suitable output location

In [6]:
folders = [output_folder, os.path.join(input_folder, "config/expt5"), input_expt_folder]

for folder in folders:
    if not os.path.exists(folder):
        os.makedirs(folder)

In [7]:
# Export config variables to file

config = {}

# The following are the names of the 
# x column, y column, z column if present, channel, frame,
# in the csvs being processed
config['x_col'] = '# x[nm]'
config['y_col'] = 'y[nm]'
config['z_col'] = None
config['channel_col'] = None #'channel'
config['frame_col'] = None # 'frame'

# The number of dimensions to consider
# If 2 only deals with x and y 
# If 3 will read in and deal with z as well (currently not fully supported)
config['dim'] = 2

# choice of which channels user wants to consider
# if null considers all
config['channel_choice'] = None

# specify string for each channel
config['channel_label'] = None

# whether to not drop the column containing
# pixel
config['drop_pixel_col'] = False

# files to include: all for everyone 
config['include_files'] = 'all' 

## Choice of input features
config['features'] = ["duration[]",
                      "variance_x[nm^2]",
                      "variance_y[nm^2]",
                      "variance_intensity[photons^2]",
                      "variance_background[photons^2/nm^4]",
                      "variance_sigma_x[nm^2]",
                      "variance_sigma_y[nm^2]",
                      "background_mean[photons/nm^2]",
                      "sigmaX_mean[nm]",
                      "sigmaY_mean[nm]",
                      "intensity_mean[photons]"
                     ]

## GT label per loc or per FOV
config['gt_label_scope'] = 'loc'

## if gt label specified, whih channel is it in (if its label per fov then only one row has to have value), or leave as null
config['gt_label'] = "gt_label"

## specify the gt label map
config['gt_label_map'] = {0: 'EGFR', 1: 'EREG'}

yaml_save_loc = os.path.join(input_folder, 'config/expt5/preprocess.yaml')
with open(yaml_save_loc, "w") as outfile:
        yaml.dump(config, outfile)

In [8]:
# Run the preprocess script using this file

!python ../src/locpix_points/scripts/preprocess.py -i ../../../data/c15/expt5 -c ../../../data/c15/config/expt5/preprocess.yaml -o ../../../output/c15/expt5 -p

List of files which will be processed
['../../../data/c15/expt5/C15_EGFR647_EREG568_FOV5.parquet', '../../../data/c15/expt5/C15_EREG647_EGFR568_FOV8.parquet', '../../../data/c15/expt5/C15_EGFR568_EREG647_FOV4.parquet', '../../../data/c15/expt5/C15_EREG647_EGFR568_FOV5.parquet', '../../../data/c15/expt5/C15_EGFR647_EREG568_FOV2.parquet', '../../../data/c15/expt5/C15_EGFR568_EREG647_FOV5.parquet', '../../../data/c15/expt5/C15_EGFR568_EREG647_FOV3.parquet', '../../../data/c15/expt5/C15_EREG568_EGFR647_FOV5.parquet', '../../../data/c15/expt5/C15_EGFR647_EREG568_FOV1.parquet', '../../../data/c15/expt5/C15_EREG647_EGFR568_FOV6.parquet', '../../../data/c15/expt5/C15_EREG647_EGFR568_FOV7.parquet', '../../../data/c15/expt5/C15_EGFR647_EREG568_FOV6.parquet', '../../../data/c15/expt5/C15_EGFR647_EREG568_FOV4.parquet', '../../../data/c15/expt5/C15_EGFR647_EREG568_FOV3.parquet', '../../../data/c15/expt5/C15_EREG568_EGFR647_FOV3.parquet', '../../../data/c15/expt5/C15_EREG647_EGFR568_FOV9.parquet', 

### Process data

In [9]:
# Export config variables 

config = {}

# whether to process the data as heterogeneous or homogenous
# graph
config['hetero'] = False

# train/test/val splits
config['train_ratio'] = None
config['val_ratio'] = None
config['test_ratio'] = None

# what to load into position
# Options: xy, xyz
config['pos']= 'xy'

# what to load into features
# Options: 
config["feat"] = ["duration[]",
                  "variance_x[nm^2]",
                  "variance_y[nm^2]",
                  "variance_intensity[photons^2]",
                  "variance_background[photons^2/nm^4]",
                  "variance_sigma_x[nm^2]",
                  "variance_sigma_y[nm^2]",
                  "background_mean[photons/nm^2]",
                  "sigmaX_mean[nm]",
                  "sigmaY_mean[nm]",
                  "intensity_mean[photons]"
                 ]
    
# label level
# graph or node
config["label_level"] = 'node'

yaml_save_loc = os.path.join(input_folder, 'config/expt5/process.yaml')
with open(yaml_save_loc, "w") as outfile:
        yaml.dump(config, outfile)


In [10]:
# Run the process script 

# train test split from fold 0 of locpix

!python ../src/locpix_points/scripts/process.py -i ../../../output/c15/expt5 -c ../../../data/c15/config/expt5/process.yaml -m 'C15_EGFR647_EREG568_FOV1' 'C15_EGFR568_EREG647_FOV5' 'C15_EGFR647_EREG568_FOV2' 'C15_EREG647_EGFR568_FOV9' 'C15_EGFR568_EREG647_FOV4' 'C15_EGFR568_EREG647_FOV3' 'C15_EGFR647_EREG568_FOV7' 'C15_EGFR647_EREG568_FOV6' 'C15_EGFR647_EREG568_FOV3' 'C15_EREG647_EGFR568_FOV6' 'C15_EREG568_EGFR647_FOV2' 'C15_EREG647_EGFR568_FOV2' 'C15_EREG568_EGFR647_FOV4' 'C15_EREG647_EGFR568_FOV3' 'C15_EREG647_EGFR568_FOV1' -m 'C15_EGFR568_EREG647_FOV1' 'C15_EGFR568_EREG647_FOV2' 'C15_EGFR647_EREG568_FOV5' 'C15_EGFR647_EREG568_FOV4' -m 'C15_EREG647_EGFR568_FOV5' 'C15_EREG647_EGFR568_FOV10' 'C15_EREG568_EGFR647_FOV5' 'C15_EREG647_EGFR568_FOV7' 'C15_EREG568_EGFR647_FOV3' 'C15_EREG568_EGFR647_FOV1' 'C15_EREG647_EGFR568_FOV8'

[['C15_EGFR647_EREG568_FOV1', 'C15_EGFR568_EREG647_FOV5', 'C15_EGFR647_EREG568_FOV2', 'C15_EREG647_EGFR568_FOV9', 'C15_EGFR568_EREG647_FOV4', 'C15_EGFR568_EREG647_FOV3', 'C15_EGFR647_EREG568_FOV7', 'C15_EGFR647_EREG568_FOV6', 'C15_EGFR647_EREG568_FOV3', 'C15_EREG647_EGFR568_FOV6', 'C15_EREG568_EGFR647_FOV2', 'C15_EREG647_EGFR568_FOV2', 'C15_EREG568_EGFR647_FOV4', 'C15_EREG647_EGFR568_FOV3', 'C15_EREG647_EGFR568_FOV1'], ['C15_EGFR568_EREG647_FOV1', 'C15_EGFR568_EREG647_FOV2', 'C15_EGFR647_EREG568_FOV5', 'C15_EGFR647_EREG568_FOV4'], ['C15_EREG647_EGFR568_FOV5', 'C15_EREG647_EGFR568_FOV10', 'C15_EREG568_EGFR647_FOV5', 'C15_EREG647_EGFR568_FOV7', 'C15_EREG568_EGFR647_FOV3', 'C15_EREG568_EGFR647_FOV1', 'C15_EREG647_EGFR568_FOV8']]
Train set...
Processing...
Done!
Val set...
Processing...
Done!
Test set...
Processing...
Done!


### Train

In [46]:
# Export config variables 

config = {}

# device to train on (gpu or cpu)
config['gpu'] = True

# model parameters
config['model'] = "pointtransformerseg"

# optimiser parameters
config['optimiser'] ="adam"
config['lr'] = 0.001
config['weight_decay'] = 0.0001

# training parameters
config['epochs'] = 5
config['batch_size'] = 1
config['num_workers'] = 1 # generall higher -> faster
config['loss_fn'] = "nll"

config['pointtransformerseg'] = {
  "k": 16,
  "in_channels": 11,
  "out_channels": 2,
  "dim_model": [32, 64, 128, 256, 512],
  "k_up": 3, # trilinear interpolation
  "output_mlp_layers": 64,
  # ratio of points to sample when transition down
  "ratio": 0.25,
  "pos_nn_layers": 64,
  "attn_nn_layers": 64,
}

# what trying to predict
config["label_level"] = "node" # graph

# train/val transforms
# options: ['normalisescale', 'jitter', 'flip', 'randscale', 'rotate', 'shear'] # null
config["transforms"] = {
    #'jitter': 15, 
    'x_flip': None, 
    'y_flip': None, 
    #'randscale': [0.95, 1.05], 
    'z_rotate': None, 
    #'shear': 0.05, 
    'normalisescale': None
}

# wandb parameters
config["wandb_project"] = "c15_expt5"
config["wandb_dataset"] = "c15"

yaml_save_loc = os.path.join(input_folder, 'config/expt5/train.yaml')
with open(yaml_save_loc, "w") as outfile:
        yaml.dump(config, outfile)

# 1. Choice of augmentations

# 2. Normalise features


In [47]:
# Run the train script 

!python ../src/locpix_points/scripts/train.py -i ../../../output/c15/expt5 -c ../../../data/c15/config/expt5/train.yaml



---- Params -----


Input features:  11
Num classes:  2
Batch size:  1
Epochs:  5
Number train graphs 15
Number val graphs 4
[34m[1mwandb[0m: Currently logged in as: [33moliver-umney[0m ([33mteststest[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: wandb version 0.15.8 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade
[34m[1mwandb[0m: Tracking run with wandb version 0.15.4
[34m[1mwandb[0m: Run data is saved locally in [35m[1m/home/oliver/smlm_cloud/locpix-points/experiments/wandb/run-20230808_140123-s1yd7gz5[0m
[34m[1mwandb[0m: Run [1m`wandb offline`[0m to turn off syncing.
[34m[1mwandb[0m: Syncing run [33msoft-wind-2[0m
[34m[1mwandb[0m: ⭐️ View project at [34m[4mhttps://wandb.ai/teststest/c15_expt5[0m
[34m[1mwandb[0m: 🚀 View run at [34m[4mhttps://wandb.ai/teststest/c15_expt5/runs/s1yd7gz5[0m


---- Model summary (estimate) ----


Layer (type:depth-idx)                        P

Epoch:  0
^C


### Evaluate

In [None]:
# Export config variables

# Run the evaluate script

python src/locpix_points/scripts/evaluate.py -i ../../output/c15 -c src/locpix_points/templates/evaluate.yaml

### Interpret the results

In [None]:
# Pytorch geometric explainability tools