### Imports

In [11]:
import os
import yaml

### Experiment description

1. Predict ClusterID for clustering datasets
2. Use PointNet
3. No features only x & y data
4. Homogeneous graph

### Variables

In [7]:
input_folder = '../../../data/nieves'
output_folder = '../../../output/nieves'

### Preprocess data to suitable output location

In [8]:
os.makedirs(output_folder)
os.makedirs(os.path.join(output_folder, "preprocessed"))
os.makedirs(os.path.join(input_folder, "config"))

In [None]:
# Code specific to dataset

# change all cluster id's which are != 0 to 1 so we can predict not clustered or clustered

In [12]:
# Export config variables to file

config = {}

# The following are the names of the 
# x column, y column, z column if present, channel, frame,
# in the csvs being processed
config['x_col'] = 'x'
config['y_col'] = 'y'
config['z_col'] = None
config['channel_col'] = None #'channel'
config['frame_col'] = None # 'frame'

# The number of dimensions to consider
# If 2 only deals with x and y 
# If 3 will read in and deal with z as well (currently not fully supported)
config['dim'] = 2

# choice of which channels user wants to consider
# if null considers all
config['channel_choice'] = None

# specify string for each channel
config['channel_label'] = None
  #0: egfr
  #1: ereg

# whether to not drop the column containing
# pixel
config['drop_pixel_col'] = False

# files to include: all for everyone 
config['include_files'] = ["mongrad_MoleculeList_12", "mongrad_MoleculeList_11", "mongrad_MoleculeList_10", "mongrad_MoleculeList_1"]

## Choice of input features
config['features'] = []

## GT label per loc or per FOV
config['gt_label_scope'] = 'loc'

## if gt label specified, whih channel is it in (if its label per fov then only one row has to have value), or leave as null
config['gt_label'] = 'index' # gt_label

## specify the gt label map
config['gt_label_map'] = {0: 'unclustered', 1: 'clustered'}

yaml_save_loc = os.path.join(input_folder, 'config/preprocess.yaml')
with open(yaml_save_loc, "w") as outfile:
        yaml.dump(config, outfile)

In [5]:
# Run the preprocess script using this file

#python src/locpix_points/scripts/preprocess.py -i tests/nieves_test_data -c tests/nieves_test_data/config/preprocess.yaml -o ../../output/nieves

FileExistsError: [Errno 17] File exists: '../../../output/nieves'

### Process data

In [None]:
# Export config variables 

config = {}

# whether to process the data as heterogeneous or homogenous
# graph
config['hetero'] = False

# train/test/val splits
config['train_ratio'] = 0.5
config['val_ratio'] = 0.25
config['test_ratio'] = 0.25

# what to load into position
# Options: xy, xyz
pos: 'xy'

# what to load into features
# Options: uniform (ones), xy, ...
feat: 'xy'
    
# label level
# graph or node
label_level: 'node'

yaml_save_loc = os.path.join(input_folder, 'config/process.yaml')
with open(yaml_save_loc, "w") as outfile:
        yaml.dump(config, outfile)


In [None]:
# Run the process script 

#python src/locpix_points/scripts/process.py -i ../../output/nieves -c tests/nieves_test_data/config/process.yaml

### Train

In [None]:
# Export config variables 

config = {}

# device to train on (gpu or cpu)
config['gpu'] =True

# model parameters
config['model'] = "pointnet"

# optimiser parameters
config['optimiser'] ="adam"
config['lr'] = 0.001
config['weight_decay'] = 0.0001

# training parameters
config['epochs'] = 2
config['batch_size'] = 1
config['num_workers'] = 1 # generall higher -> faster
config['loss_fn'] = "nll"

config['pointnetseg'] = {
  "ratio" : [0.2,0.25],
  "radius" : [0.2, 0.4],
  # 2 = dim (2) + dim of feature (0)
  # 130 = 128 + dim (2) + dim of feature (0)
  # 258 = 256 + dim (2) + dim of feature (0)
  "sa_channels" : [[2, 64, 64, 128], [130, 128, 128, 256], [258, 256, 512, 1024]],
  "k" : [1,3,3],
  # 1080 = 1024 + 256 
  # 384 = 256 + 128
  # 130 = 128 + dim (2) + dim of feature (0) 
  "fp_channels" : [[1080, 256, 256], [384, 256, 128], [130, 128, 128, 128]],
  # num classes = 2
  "output_channels": [128, 128, 128, 2],
  "dropout" : 0.5,
  "norm" : None,
}

yaml_save_loc = os.path.join(input_folder, 'config/train.yaml')
with open(yaml_save_loc, "w") as outfile:
        yaml.dump(config, outfile)

# 1. Choice of augmentations

# 2. Normalise features


In [None]:
# Run the train script 

# python src/locpix_points/scripts/train.py -i ../../output/nieves -c tests/nieves_test_data/config/train.yaml

### Evaluate

In [None]:
# Export config variables

# Run the evaluate script

python src/locpix_points/scripts/evaluate.py -i ../../output/nieves -c src/locpix_points/templates/evaluate.yaml

### Interpret the results

In [13]:
# Pytorch geometric explainability tools