# Setup

In [None]:
import os
import chemprop
import pandas as pd

# Train regression model

In [None]:
arguments = [
    '--data_path', '../data/examples/regression.csv',
    '--dataset_type', 'regression',
    '--save_dir', 'test_checkpoints',
    '--epochs', '5'
]

args = chemprop.args.TrainArgs().parse_args(arguments)
mean_score, std_score = chemprop.train.cross_validate(args=args, train_func=chemprop.train.run_training)

# Predict from file

In [None]:
arguments = [
    '--test_path', '../data/examples/regression.csv',
    '--preds_path', 'test_preds.csv',
    '--checkpoint_dir', 'test_checkpoints'
]

args = chemprop.args.PredictArgs().parse_args(arguments)
preds = chemprop.train.make_predictions(args=args)

In [None]:
df = pd.read_csv('../data/examples/regression.csv')
df['preds'] = [x[0] for x in preds]
df

# Predict from SMILES list

In [None]:
smiles = [['CCC'], ['CCCC'], ['OCC']]
arguments = [
    '--test_path', '/dev/null',
    '--preds_path', '/dev/null',
    '--checkpoint_dir', 'test_checkpoints'
]

args = chemprop.args.PredictArgs().parse_args(arguments)
preds = chemprop.train.make_predictions(args=args, smiles=smiles)

# Load model once, predict multiple times

In [None]:
arguments = [
    '--test_path', '/dev/null',
    '--preds_path', '/dev/null',
    '--checkpoint_dir', 'test_checkpoints'
]

args = chemprop.args.PredictArgs().parse_args(arguments)

model_objects = chemprop.train.load_model(args=args)

smiles = [['CCC'], ['CCCC'], ['OCC']]
preds = chemprop.train.make_predictions(args=args, smiles=smiles, model_objects=model_objects)

smiles = [['CCCC'], ['CCCCC'], ['COCC']]
preds = chemprop.train.make_predictions(args=args, smiles=smiles, model_objects=model_objects)

# Reaction Training and Prediction

In [None]:
reaction_reg_df = pd.read_csv('../data/examples/reaction_regression.csv')
reaction_reg_df

In [None]:
arguments = [
    '--data_path', '../data/examples/reaction_regression.csv',
    '--dataset_type', 'regression',
    '--save_dir', 'test_checkpoints',
    '--epochs', '5',
    '--reaction'
]

args = chemprop.args.TrainArgs().parse_args(arguments)
mean_score, std_score = chemprop.train.cross_validate(args=args, train_func=chemprop.train.run_training)

In [None]:
arguments = [
    '--test_path', '../data/examples/reaction_regression.csv',
    '--preds_path', 'test_preds.csv',
    '--checkpoint_dir', 'test_checkpoints'
]

args = chemprop.args.PredictArgs().parse_args(arguments)
preds = chemprop.train.make_predictions(args=args)

# Multiple-Molecule Inputs

In [None]:
multimolecule_df = pd.read_csv('../data/examples/classification_multimolecule.csv')
multimolecule_df

In [None]:
arguments = [
    '--data_path', '../data/examples/classification_multimolecule.csv',
    '--dataset_type', 'classification',
    '--save_dir', 'test_checkpoints',
    '--epochs', '5',
    '--number_of_molecules', '2',
    '--split_key_molecule', '1' # defaults to 0 (1st column) if not specified
]

args = chemprop.args.TrainArgs().parse_args(arguments)
mean_score, std_score = chemprop.train.cross_validate(args=args, train_func=chemprop.train.run_training)

In [None]:
arguments = [
    '--test_path', '../data/examples/classification_multimolecule.csv',
    '--preds_path', 'test_preds.csv',
    '--checkpoint_dir', 'test_checkpoints',
    '--number_of_molecules', '2',
]

args = chemprop.args.PredictArgs().parse_args(arguments)
preds = chemprop.train.make_predictions(args=args)

# Split Type

In [None]:
arguments = [
    '--data_path', '../data/examples/regression.csv',
    '--dataset_type', 'regression',
    '--save_dir', 'test_checkpoints',
    '--epochs', '5',
    '--split_type', 'scaffold_balanced'
]

args = chemprop.args.TrainArgs().parse_args(arguments)
mean_score, std_score = chemprop.train.cross_validate(args=args, train_func=chemprop.train.run_training)

# Ensembling and Uncertainty

In [None]:
arguments = [
    '--data_path', '../data/examples/regression.csv',
    '--dataset_type', 'regression',
    '--save_dir', 'test_checkpoints',
    '--epochs', '5',
    '--ensemble_size', '3'
]

args = chemprop.args.TrainArgs().parse_args(arguments)
mean_score, std_score = chemprop.train.cross_validate(args=args, train_func=chemprop.train.run_training)

In [None]:
arguments = [
    '--test_path', '../data/examples/classification_multimolecule.csv',
    '--preds_path', 'test_preds.csv',
    '--checkpoint_dir', 'test_checkpoints',
    '--ensemble_variance'
]

args = chemprop.args.PredictArgs().parse_args(arguments)
preds = chemprop.train.make_predictions(args=args)

# Fingerprint

In [None]:
arguments = [
    '--data_path', '../data/examples/regression.csv',
    '--dataset_type', 'regression',
    '--save_dir', 'test_checkpoints',
    '--epochs', '5'
]

args = chemprop.args.TrainArgs().parse_args(arguments)
mean_score, std_score = chemprop.train.cross_validate(args=args, train_func=chemprop.train.run_training)

In [None]:
arguments = [
    '--test_path', '../data/examples/regression.csv',
    '--preds_path', 'test_preds.csv',
    '--checkpoint_dir', 'test_checkpoints',
    '--fingerprint_type', 'MPN'
]

args = chemprop.args.FingerprintArgs().parse_args(arguments)
preds = chemprop.train.molecule_fingerprint.molecule_fingerprint(args=args)

# Spectra

In [None]:
arguments = [
    '--data_path', '../data/examples/spectra.csv',
    '--dataset_type', 'spectra',
    '--save_dir', 'test_checkpoints_spectra',
    '--epochs', '5',
    '--features_path', '../data/examples/spectra_features.csv',
    '--split_type', 'random_with_repeated_smiles',
]

args = chemprop.args.TrainArgs().parse_args(arguments)
mean_score, std_score = chemprop.train.cross_validate(args=args, train_func=chemprop.train.run_training)       

In [None]:
arguments = [
    '--test_path', '../data/examples/spectra.csv',
    '--preds_path', 'test_preds_spectra.csv',
    '--checkpoint_dir', 'test_checkpoints_spectra',
    '--features_path', '../data/examples/spectra_features.csv'
]

args = chemprop.args.PredictArgs().parse_args(arguments)
preds = chemprop.train.make_predictions(args=args)

# Pretraining / Transfer Learning

In [None]:
arguments = [
    '--data_path', '../data/examples/regression.csv',
    '--dataset_type', 'regression',
    '--save_dir', 'test_checkpoints_tl',
    '--epochs', '5',
]

args = chemprop.args.TrainArgs().parse_args(arguments)
mean_score, std_score = chemprop.train.cross_validate(args=args, train_func=chemprop.train.run_training)       

In [None]:
arguments = [
    '--data_path', '../data/examples/regression.csv',
    '--dataset_type', 'regression',
    '--save_dir', 'test_checkpoints_tl',
    '--epochs', '5',
    '--checkpoint_frzn', 'test_checkpoints_tl/fold_0/model_0/model.pt'
]

args = chemprop.args.TrainArgs().parse_args(arguments)
mean_score, std_score = chemprop.train.cross_validate(args=args, train_func=chemprop.train.run_training)       

In [None]:
arguments = [
    '--test_path', '../data/examples/spectra.csv',
    '--preds_path', 'test_preds_spectra.csv',
    '--checkpoint_dir', 'test_checkpoints_spectra',
    '--features_path', '../data/examples/spectra_features.csv'
]

args = chemprop.args.PredictArgs().parse_args(arguments)
preds = chemprop.train.make_predictions(args=args)