### Imports

In [None]:
import os
import yaml
import polars as pl

### Experiment description

1. Predict Channel for Cell Pellet dataset
2. Use PointTransformer
3. Features = Everything except channels
4. Homogeneous graph

### Variables

In [None]:
input_folder = '../../../data/c15'
output_folder = '../../../output/c15/expt5'
input_expt_folder = '../../../data/c15/expt5'

### Preprocess data to suitable output location

In [None]:
folders = [output_folder, os.path.join(input_folder, "config/expt5"), input_expt_folder]

for folder in folders:
    if not os.path.exists(folder):
        os.makedirs(folder)

In [None]:
# Export config variables to file

config = {}

# The following are the names of the 
# x column, y column, z column if present, channel, frame,
# in the csvs being processed
config['x_col'] = 'x'
config['y_col'] = 'y'
config['z_col'] = None
config['channel_col'] = None #'channel'
config['frame_col'] = None # 'frame'

# The number of dimensions to consider
# If 2 only deals with x and y 
# If 3 will read in and deal with z as well (currently not fully supported)
config['dim'] = 2

# choice of which channels user wants to consider
# if null considers all
config['channel_choice'] = None

# specify string for each channel
config['channel_label'] = None

# whether to not drop the column containing
# pixel
config['drop_pixel_col'] = False

# files to include: all for everyone 
config['include_files'] = 'all' 

## Choice of input features
config['features'] = ['X precision (nm)', 
                      'Y precision (nm)',	
                      "Photons",	
                      "Background",	
                      "PSF Sigma X (pix)",	
                      "PSF Sigma Y (pix)",	
                      "Sigma X var", 
                      "Sigma Y var", 	
                      "p-value"]

## GT label per loc or per FOV
config['gt_label_scope'] = 'loc'

## if gt label specified, whih channel is it in (if its label per fov then only one row has to have value), or leave as null
config['gt_label'] = "gt_label"

## specify the gt label map
config['gt_label_map'] = {0: 'EGFR', 1: 'EREG'}

yaml_save_loc = os.path.join(input_folder, 'config/expt5/preprocess.yaml')
with open(yaml_save_loc, "w") as outfile:
        yaml.dump(config, outfile)

In [None]:
# Run the preprocess script using this file

!python ../src/locpix_points/scripts/preprocess.py -i ../../../data/nieves/expt3 -c ../../../data/nieves/config/expt3/preprocess.yaml -o ../../../output/nieves/expt3 -p

### Process data

In [None]:
# Export config variables 

config = {}

# whether to process the data as heterogeneous or homogenous
# graph
config['hetero'] = False

# train/test/val splits
config['train_ratio'] = 0.7
config['val_ratio'] = 0.1
config['test_ratio'] = 0.2

# what to load into position
# Options: xy, xyz
config['pos']= 'xy'

# what to load into features
# Options: 
config["feat"] = ['X (nm)', 
                  'Y (nm)', 
                  'Z (nm)', 
                  'X precision (nm)', 
                  'Y precision (nm)', 
                  'Photons', 
                  'Background', 
                  'PSF Sigma X (pix)', 
                  'PSF Sigma Y (pix)', 
                  'Sigma X var', 
                  'Sigma Y var', 
                  'p-value']
    
# label level
# graph or node
config["label_level"] = 'node'

yaml_save_loc = os.path.join(input_folder, 'config/expt3/process.yaml')
with open(yaml_save_loc, "w") as outfile:
        yaml.dump(config, outfile)


In [None]:
# Run the process script 

# copy train test split from expt1

!python ../src/locpix_points/scripts/process.py -i ../../../output/nieves/expt3 -c ../../../data/nieves/config/expt3/process.yaml -r ../../../output/nieves/expt1 -- split

### Train

In [None]:
# Export config variables 

config = {}

# device to train on (gpu or cpu)
config['gpu'] = True

# model parameters
config['model'] = "pointtransformerseg"

# optimiser parameters
config['optimiser'] ="adam"
config['lr'] = 0.001
config['weight_decay'] = 0.0001

# training parameters
config['epochs'] = 5
config['batch_size'] = 1
config['num_workers'] = 1 # generall higher -> faster
config['loss_fn'] = "nll"

config['pointtransformerseg'] = {
  "k": 16,
  "in_channels": 1,
  "out_channels": 2,
  "dim_model": [32, 64, 128, 256, 512],
  "k_up": 3, # trilinear interpolation
  "output_mlp_layers": 64,
  # ratio of points to sample when transition down
  "ratio": 0.25,
  "pos_nn_layers": 64,
  "attn_nn_layers": 64,
}

# what trying to predict
config["label_level"] = "node" # graph

# train/val transforms
# options: ['normalisescale', 'jitter', 'flip', 'randscale', 'rotate', 'shear'] # null
config["transforms"] = {
    #'jitter': 15, 
    'x_flip': None, 
    'y_flip': None, 
    #'randscale': [0.95, 1.05], 
    'z_rotate': None, 
    #'shear': 0.05, 
    'normalisescale': None
}

# wandb parameters
config["wandb_project"] = "nieves_expt3"
config["wandb_dataset"] = "nieves"

yaml_save_loc = os.path.join(input_folder, 'config/expt3/train.yaml')
with open(yaml_save_loc, "w") as outfile:
        yaml.dump(config, outfile)

# 1. Choice of augmentations

# 2. Normalise features


In [None]:
# Run the train script 

!python ../src/locpix_points/scripts/train.py -i ../../../output/nieves/expt3 -c ../../../data/nieves/config/expt3/train.yaml

### Evaluate

In [None]:
# Export config variables

# Run the evaluate script

python src/locpix_points/scripts/evaluate.py -i ../../output/nieves -c src/locpix_points/templates/evaluate.yaml

### Interpret the results

In [None]:
# Pytorch geometric explainability tools