# Snippet #3

In [1]:
from __future__ import print_function

In [2]:
import numpy as np
from oddt import random_seed
from oddt.toolkits import rdk, ob
from oddt.scoring.models.classifiers import randomforest
from oddt.scoring import cross_validate

### Define fingerprints tu use

In [3]:
fps = ['fp2', 'fp4', 'maccs', 'morgan', 'layered', 'rdkit']
types = ['actives', 'inactives', 'marginal', 'decoys']

### Compute selected fingerprints

In [4]:
%%time
mols = {}
for fp in fps:
    toolkit = ob if fp in ob.fps else rdk
    mols[fp] = {}
    for m in types:
        mol_file = toolkit.readfile('smi', 'cdk2_%s.ism' % m)
        mols[fp][m] = np.array([mol.calcfp(fp).raw for mol in mol_file if mol])

CPU times: user 10min 7s, sys: 1.21 s, total: 10min 8s
Wall time: 10min 8s


### Train and validate Random forest model

In [5]:
random_seed(1)
rf = randomforest(500)
tag = {}
for fp in fps:
    for m in types:
        if m == 'actives':
            tag[m] = np.ones(len(mols[fp][m]))
        else:
            tag[m] = np.zeros(len(mols[fp][m]))

    # Train on actives and inactives
    train_descs = np.vstack((mols[fp]['actives'], mols[fp]['inactives'], mols[fp]['marginal']))
    train_tag = np.hstack((tag['actives'], tag['inactives'], tag['marginal']))
    
    # Test on active and decoys
    test_descs = np.vstack((mols[fp]['actives'], mols[fp]['decoys']))
    test_tag = np.hstack((tag['actives'], tag['decoys']))
    
    rf.fit(train_descs, train_tag)
    cv_desc = np.vstack((train_descs, test_descs))
    cv_tag = np.hstack((train_tag, test_tag))
    cv = cross_validate(rf, cv_desc, cv_tag, shuffle=True, n=10, n_jobs=-1)
    r2 = rf.score(test_descs, test_tag)
    print('%s\t R^2: %.4f CV: %.4f CV_std: %.4f' % (fp, r2, cv.mean(), cv.std()))

fp2	 R^2: 0.9508 CV: 0.9946 CV_std: 0.0016
fp4	 R^2: 0.8564 CV: 0.9938 CV_std: 0.0016
maccs	 R^2: 0.8788 CV: 0.9947 CV_std: 0.0014
morgan	 R^2: 0.9903 CV: 0.9957 CV_std: 0.0011
layered	 R^2: 0.9286 CV: 0.9947 CV_std: 0.0019
rdkit	 R^2: 0.9750 CV: 0.9950 CV_std: 0.0013
