In [6]:
import pandas as pd
import numpy as np
from scipy import stats

import rdkit
from rdkit import Chem
from rdkit import DataStructs
from rdkit.Chem import AllChem

import pickle
from sklearn.metrics import r2_score,mean_squared_error,mean_absolute_error

In [30]:
target_list = ['erbB4', 'egfr', 'met', 'alk', 'erbB2', 'ret', 'ros1']

def count_taskDataset(df, target_list):
    tk_df = pd.DataFrame()
    result = pd.DataFrame(index=[0], columns=target_list)
    result['total'] = df.shape[0]
    #print('total data: '+str(df.shape[0]))
    for tar in target_list:
        tar_df = df.dropna(subset=['pIC50_'+tar])
        tar_df = tar_df[['SMILES_NS','pIC50_'+tar]]
        tar_df.rename(columns={'pIC50_'+tar:'pIC50'},inplace=True)
        tar_df.loc[:,'target'] = tar
        tk_df = pd.concat([tk_df, tar_df],axis=0)
        result[tar] = tar_df.shape[0]
        #print(tar+' data: '+str(tar_df.shape[0]))
    tk_df.sort_index(inplace=True)
    tk_df.reset_index(drop=True, inplace=True)
    return result, tk_df

def count_taskDataset_ana(df, target_list):
    tk_df = pd.DataFrame()
    result = pd.DataFrame(index=[0], columns=target_list)
    result['total'] = df.shape[0]
    #print('total data: '+str(df.shape[0]))
    for tar in target_list:
        tar_df = df.dropna(subset=['pIC50_'+tar])
        tar_df = tar_df[['SMILES_NS','pIC50_'+tar,'predicted_pIC50_'+tar,'nearest_neighbor', 'fing_zscore']]
        tar_df.rename(columns={'pIC50_'+tar:'pIC50','predicted_pIC50_'+tar:'predicted_pIC50'},inplace=True)
        tar_df.loc[:,'target'] = tar
        tk_df = pd.concat([tk_df, tar_df],axis=0)
        result[tar] = tar_df.shape[0]
        #print(tar+' data: '+str(tar_df.shape[0]))
    tk_df.sort_index(inplace=True)
    tk_df.reset_index(drop=True, inplace=True)
    return result, tk_df

def validationParams(df):
    eva = {'MAE':np.nan,'MSE':np.nan,'RMSE':np.nan,'R2':np.nan}
    if df.shape[0] == 0:
        return eva
    MAE = mean_absolute_error(df.pIC50, df.predicted_pIC50)
    eva['MAE'] = float('%.4f'%MAE)
    MSE = mean_squared_error(df.pIC50, df.predicted_pIC50)
    eva['MSE'] = float('%.4f'%MSE)
    RMSE = mean_squared_error(df.pIC50, df.predicted_pIC50, squared=False)
    eva['RMSE'] = float('%.4f'%RMSE)
    R2 = r2_score(df.pIC50, df.predicted_pIC50)
    eva['R2'] = float('%.4f'%R2)
    return eva

In [8]:
#similarity-based functions

def getECFP(smiles_list, rad):
    mols = [Chem.MolFromSmiles(smi) for smi in smiles_list]
    fp = [AllChem.GetMorganFingerprint(mol,rad) for mol in mols]
    return fp

def getTanimotoSim(train_fp, test_fp):
    index_name = []
    for ind in range(len(test_fp)):
        index = 'valid_'+str(ind)
        index_name.append(index)
    col_name = []
    for c in range(len(train_fp)):
        col = 'train_'+str(c)
        col_name.append(col)
    test_sim_df = pd.DataFrame(index=index_name, columns=col_name)
    for i in range(len(test_fp)):
        print('index-',i)
        ref_fp = test_fp[i]
        for j in range(len(train_fp)):
            com_fp = train_fp[j]
            sim = DataStructs.TanimotoSimilarity(ref_fp, com_fp)
            test_sim_df.iloc[i,j] = float('%.4f'%sim)
    return test_sim_df

Similarity Measurement of Cross-validation data

In [4]:
Dir = './'
cv_fing_df = {}
for fold in range(10):
    print('Fold-',fold)
    train_df = pd.read_csv(Dir+'train_kfold-'+str(fold)+'.csv', index_col = 0)
    print('Training set',train_df.shape)
    valid_df = pd.read_csv(Dir+'valid_kfold-'+str(fold)+'.csv', index_col = 0)
    print('Validation set',valid_df.shape)
    print('Fingerprint-based similarity measuring')
    
    #fingerprint-based similarity measurement
    train_fp = getECFP(train_df.SMILES_NS, 4)
    valid_fp = getECFP(valid_df.SMILES_NS, 4)
    valid_fp_sim = getTanimotoSim(train_fp, valid_fp)
    cv_fing_df[fold] = valid_fp_sim

Fold- 0
Training set (9004, 9)
Validation set (1001, 9)
Fingerprint-based similarity measuring
index- 0
index- 1
index- 2
index- 3
index- 4
index- 5
index- 6
index- 7
index- 8
index- 9
index- 10
index- 11
index- 12
index- 13
index- 14
index- 15
index- 16
index- 17
index- 18
index- 19
index- 20
index- 21
index- 22
index- 23
index- 24
index- 25
index- 26
index- 27
index- 28
index- 29
index- 30
index- 31
index- 32
index- 33
index- 34
index- 35
index- 36
index- 37
index- 38
index- 39
index- 40
index- 41
index- 42
index- 43
index- 44
index- 45
index- 46
index- 47
index- 48
index- 49
index- 50
index- 51
index- 52
index- 53
index- 54
index- 55
index- 56
index- 57
index- 58
index- 59
index- 60
index- 61
index- 62
index- 63
index- 64
index- 65
index- 66
index- 67
index- 68
index- 69
index- 70
index- 71
index- 72
index- 73
index- 74
index- 75
index- 76
index- 77
index- 78
index- 79
index- 80
index- 81
index- 82
index- 83
index- 84
index- 85
index- 86
index- 87
index- 88
index- 89
index- 90
index

index- 747
index- 748
index- 749
index- 750
index- 751
index- 752
index- 753
index- 754
index- 755
index- 756
index- 757
index- 758
index- 759
index- 760
index- 761
index- 762
index- 763
index- 764
index- 765
index- 766
index- 767
index- 768
index- 769
index- 770
index- 771
index- 772
index- 773
index- 774
index- 775
index- 776
index- 777
index- 778
index- 779
index- 780
index- 781
index- 782
index- 783
index- 784
index- 785
index- 786
index- 787
index- 788
index- 789
index- 790
index- 791
index- 792
index- 793
index- 794
index- 795
index- 796
index- 797
index- 798
index- 799
index- 800
index- 801
index- 802
index- 803
index- 804
index- 805
index- 806
index- 807
index- 808
index- 809
index- 810
index- 811
index- 812
index- 813
index- 814
index- 815
index- 816
index- 817
index- 818
index- 819
index- 820
index- 821
index- 822
index- 823
index- 824
index- 825
index- 826
index- 827
index- 828
index- 829
index- 830
index- 831
index- 832
index- 833
index- 834
index- 835
index- 836
index- 837

index- 493
index- 494
index- 495
index- 496
index- 497
index- 498
index- 499
index- 500
index- 501
index- 502
index- 503
index- 504
index- 505
index- 506
index- 507
index- 508
index- 509
index- 510
index- 511
index- 512
index- 513
index- 514
index- 515
index- 516
index- 517
index- 518
index- 519
index- 520
index- 521
index- 522
index- 523
index- 524
index- 525
index- 526
index- 527
index- 528
index- 529
index- 530
index- 531
index- 532
index- 533
index- 534
index- 535
index- 536
index- 537
index- 538
index- 539
index- 540
index- 541
index- 542
index- 543
index- 544
index- 545
index- 546
index- 547
index- 548
index- 549
index- 550
index- 551
index- 552
index- 553
index- 554
index- 555
index- 556
index- 557
index- 558
index- 559
index- 560
index- 561
index- 562
index- 563
index- 564
index- 565
index- 566
index- 567
index- 568
index- 569
index- 570
index- 571
index- 572
index- 573
index- 574
index- 575
index- 576
index- 577
index- 578
index- 579
index- 580
index- 581
index- 582
index- 583

index- 239
index- 240
index- 241
index- 242
index- 243
index- 244
index- 245
index- 246
index- 247
index- 248
index- 249
index- 250
index- 251
index- 252
index- 253
index- 254
index- 255
index- 256
index- 257
index- 258
index- 259
index- 260
index- 261
index- 262
index- 263
index- 264
index- 265
index- 266
index- 267
index- 268
index- 269
index- 270
index- 271
index- 272
index- 273
index- 274
index- 275
index- 276
index- 277
index- 278
index- 279
index- 280
index- 281
index- 282
index- 283
index- 284
index- 285
index- 286
index- 287
index- 288
index- 289
index- 290
index- 291
index- 292
index- 293
index- 294
index- 295
index- 296
index- 297
index- 298
index- 299
index- 300
index- 301
index- 302
index- 303
index- 304
index- 305
index- 306
index- 307
index- 308
index- 309
index- 310
index- 311
index- 312
index- 313
index- 314
index- 315
index- 316
index- 317
index- 318
index- 319
index- 320
index- 321
index- 322
index- 323
index- 324
index- 325
index- 326
index- 327
index- 328
index- 329

index- 984
index- 985
index- 986
index- 987
index- 988
index- 989
index- 990
index- 991
index- 992
index- 993
index- 994
index- 995
index- 996
index- 997
index- 998
index- 999
index- 1000
Fold- 3
Training set (9004, 9)
Validation set (1001, 9)
Fingerprint-based similarity measuring
index- 0
index- 1
index- 2
index- 3
index- 4
index- 5
index- 6
index- 7
index- 8
index- 9
index- 10
index- 11
index- 12
index- 13
index- 14
index- 15
index- 16
index- 17
index- 18
index- 19
index- 20
index- 21
index- 22
index- 23
index- 24
index- 25
index- 26
index- 27
index- 28
index- 29
index- 30
index- 31
index- 32
index- 33
index- 34
index- 35
index- 36
index- 37
index- 38
index- 39
index- 40
index- 41
index- 42
index- 43
index- 44
index- 45
index- 46
index- 47
index- 48
index- 49
index- 50
index- 51
index- 52
index- 53
index- 54
index- 55
index- 56
index- 57
index- 58
index- 59
index- 60
index- 61
index- 62
index- 63
index- 64
index- 65
index- 66
index- 67
index- 68
index- 69
index- 70
index- 71
index- 

index- 730
index- 731
index- 732
index- 733
index- 734
index- 735
index- 736
index- 737
index- 738
index- 739
index- 740
index- 741
index- 742
index- 743
index- 744
index- 745
index- 746
index- 747
index- 748
index- 749
index- 750
index- 751
index- 752
index- 753
index- 754
index- 755
index- 756
index- 757
index- 758
index- 759
index- 760
index- 761
index- 762
index- 763
index- 764
index- 765
index- 766
index- 767
index- 768
index- 769
index- 770
index- 771
index- 772
index- 773
index- 774
index- 775
index- 776
index- 777
index- 778
index- 779
index- 780
index- 781
index- 782
index- 783
index- 784
index- 785
index- 786
index- 787
index- 788
index- 789
index- 790
index- 791
index- 792
index- 793
index- 794
index- 795
index- 796
index- 797
index- 798
index- 799
index- 800
index- 801
index- 802
index- 803
index- 804
index- 805
index- 806
index- 807
index- 808
index- 809
index- 810
index- 811
index- 812
index- 813
index- 814
index- 815
index- 816
index- 817
index- 818
index- 819
index- 820

index- 476
index- 477
index- 478
index- 479
index- 480
index- 481
index- 482
index- 483
index- 484
index- 485
index- 486
index- 487
index- 488
index- 489
index- 490
index- 491
index- 492
index- 493
index- 494
index- 495
index- 496
index- 497
index- 498
index- 499
index- 500
index- 501
index- 502
index- 503
index- 504
index- 505
index- 506
index- 507
index- 508
index- 509
index- 510
index- 511
index- 512
index- 513
index- 514
index- 515
index- 516
index- 517
index- 518
index- 519
index- 520
index- 521
index- 522
index- 523
index- 524
index- 525
index- 526
index- 527
index- 528
index- 529
index- 530
index- 531
index- 532
index- 533
index- 534
index- 535
index- 536
index- 537
index- 538
index- 539
index- 540
index- 541
index- 542
index- 543
index- 544
index- 545
index- 546
index- 547
index- 548
index- 549
index- 550
index- 551
index- 552
index- 553
index- 554
index- 555
index- 556
index- 557
index- 558
index- 559
index- 560
index- 561
index- 562
index- 563
index- 564
index- 565
index- 566

index- 222
index- 223
index- 224
index- 225
index- 226
index- 227
index- 228
index- 229
index- 230
index- 231
index- 232
index- 233
index- 234
index- 235
index- 236
index- 237
index- 238
index- 239
index- 240
index- 241
index- 242
index- 243
index- 244
index- 245
index- 246
index- 247
index- 248
index- 249
index- 250
index- 251
index- 252
index- 253
index- 254
index- 255
index- 256
index- 257
index- 258
index- 259
index- 260
index- 261
index- 262
index- 263
index- 264
index- 265
index- 266
index- 267
index- 268
index- 269
index- 270
index- 271
index- 272
index- 273
index- 274
index- 275
index- 276
index- 277
index- 278
index- 279
index- 280
index- 281
index- 282
index- 283
index- 284
index- 285
index- 286
index- 287
index- 288
index- 289
index- 290
index- 291
index- 292
index- 293
index- 294
index- 295
index- 296
index- 297
index- 298
index- 299
index- 300
index- 301
index- 302
index- 303
index- 304
index- 305
index- 306
index- 307
index- 308
index- 309
index- 310
index- 311
index- 312

index- 967
index- 968
index- 969
index- 970
index- 971
index- 972
index- 973
index- 974
index- 975
index- 976
index- 977
index- 978
index- 979
index- 980
index- 981
index- 982
index- 983
index- 984
index- 985
index- 986
index- 987
index- 988
index- 989
index- 990
index- 991
index- 992
index- 993
index- 994
index- 995
index- 996
index- 997
index- 998
index- 999
Fold- 6
Training set (9005, 9)
Validation set (1000, 9)
Fingerprint-based similarity measuring
index- 0
index- 1
index- 2
index- 3
index- 4
index- 5
index- 6
index- 7
index- 8
index- 9
index- 10
index- 11
index- 12
index- 13
index- 14
index- 15
index- 16
index- 17
index- 18
index- 19
index- 20
index- 21
index- 22
index- 23
index- 24
index- 25
index- 26
index- 27
index- 28
index- 29
index- 30
index- 31
index- 32
index- 33
index- 34
index- 35
index- 36
index- 37
index- 38
index- 39
index- 40
index- 41
index- 42
index- 43
index- 44
index- 45
index- 46
index- 47
index- 48
index- 49
index- 50
index- 51
index- 52
index- 53
index- 54
in

index- 714
index- 715
index- 716
index- 717
index- 718
index- 719
index- 720
index- 721
index- 722
index- 723
index- 724
index- 725
index- 726
index- 727
index- 728
index- 729
index- 730
index- 731
index- 732
index- 733
index- 734
index- 735
index- 736
index- 737
index- 738
index- 739
index- 740
index- 741
index- 742
index- 743
index- 744
index- 745
index- 746
index- 747
index- 748
index- 749
index- 750
index- 751
index- 752
index- 753
index- 754
index- 755
index- 756
index- 757
index- 758
index- 759
index- 760
index- 761
index- 762
index- 763
index- 764
index- 765
index- 766
index- 767
index- 768
index- 769
index- 770
index- 771
index- 772
index- 773
index- 774
index- 775
index- 776
index- 777
index- 778
index- 779
index- 780
index- 781
index- 782
index- 783
index- 784
index- 785
index- 786
index- 787
index- 788
index- 789
index- 790
index- 791
index- 792
index- 793
index- 794
index- 795
index- 796
index- 797
index- 798
index- 799
index- 800
index- 801
index- 802
index- 803
index- 804

index- 461
index- 462
index- 463
index- 464
index- 465
index- 466
index- 467
index- 468
index- 469
index- 470
index- 471
index- 472
index- 473
index- 474
index- 475
index- 476
index- 477
index- 478
index- 479
index- 480
index- 481
index- 482
index- 483
index- 484
index- 485
index- 486
index- 487
index- 488
index- 489
index- 490
index- 491
index- 492
index- 493
index- 494
index- 495
index- 496
index- 497
index- 498
index- 499
index- 500
index- 501
index- 502
index- 503
index- 504
index- 505
index- 506
index- 507
index- 508
index- 509
index- 510
index- 511
index- 512
index- 513
index- 514
index- 515
index- 516
index- 517
index- 518
index- 519
index- 520
index- 521
index- 522
index- 523
index- 524
index- 525
index- 526
index- 527
index- 528
index- 529
index- 530
index- 531
index- 532
index- 533
index- 534
index- 535
index- 536
index- 537
index- 538
index- 539
index- 540
index- 541
index- 542
index- 543
index- 544
index- 545
index- 546
index- 547
index- 548
index- 549
index- 550
index- 551

index- 208
index- 209
index- 210
index- 211
index- 212
index- 213
index- 214
index- 215
index- 216
index- 217
index- 218
index- 219
index- 220
index- 221
index- 222
index- 223
index- 224
index- 225
index- 226
index- 227
index- 228
index- 229
index- 230
index- 231
index- 232
index- 233
index- 234
index- 235
index- 236
index- 237
index- 238
index- 239
index- 240
index- 241
index- 242
index- 243
index- 244
index- 245
index- 246
index- 247
index- 248
index- 249
index- 250
index- 251
index- 252
index- 253
index- 254
index- 255
index- 256
index- 257
index- 258
index- 259
index- 260
index- 261
index- 262
index- 263
index- 264
index- 265
index- 266
index- 267
index- 268
index- 269
index- 270
index- 271
index- 272
index- 273
index- 274
index- 275
index- 276
index- 277
index- 278
index- 279
index- 280
index- 281
index- 282
index- 283
index- 284
index- 285
index- 286
index- 287
index- 288
index- 289
index- 290
index- 291
index- 292
index- 293
index- 294
index- 295
index- 296
index- 297
index- 298

index- 953
index- 954
index- 955
index- 956
index- 957
index- 958
index- 959
index- 960
index- 961
index- 962
index- 963
index- 964
index- 965
index- 966
index- 967
index- 968
index- 969
index- 970
index- 971
index- 972
index- 973
index- 974
index- 975
index- 976
index- 977
index- 978
index- 979
index- 980
index- 981
index- 982
index- 983
index- 984
index- 985
index- 986
index- 987
index- 988
index- 989
index- 990
index- 991
index- 992
index- 993
index- 994
index- 995
index- 996
index- 997
index- 998
index- 999
Fold- 9
Training set (9005, 9)
Validation set (1000, 9)
Fingerprint-based similarity measuring
index- 0
index- 1
index- 2
index- 3
index- 4
index- 5
index- 6
index- 7
index- 8
index- 9
index- 10
index- 11
index- 12
index- 13
index- 14
index- 15
index- 16
index- 17
index- 18
index- 19
index- 20
index- 21
index- 22
index- 23
index- 24
index- 25
index- 26
index- 27
index- 28
index- 29
index- 30
index- 31
index- 32
index- 33
index- 34
index- 35
index- 36
index- 37
index- 38
index- 3

index- 700
index- 701
index- 702
index- 703
index- 704
index- 705
index- 706
index- 707
index- 708
index- 709
index- 710
index- 711
index- 712
index- 713
index- 714
index- 715
index- 716
index- 717
index- 718
index- 719
index- 720
index- 721
index- 722
index- 723
index- 724
index- 725
index- 726
index- 727
index- 728
index- 729
index- 730
index- 731
index- 732
index- 733
index- 734
index- 735
index- 736
index- 737
index- 738
index- 739
index- 740
index- 741
index- 742
index- 743
index- 744
index- 745
index- 746
index- 747
index- 748
index- 749
index- 750
index- 751
index- 752
index- 753
index- 754
index- 755
index- 756
index- 757
index- 758
index- 759
index- 760
index- 761
index- 762
index- 763
index- 764
index- 765
index- 766
index- 767
index- 768
index- 769
index- 770
index- 771
index- 772
index- 773
index- 774
index- 775
index- 776
index- 777
index- 778
index- 779
index- 780
index- 781
index- 782
index- 783
index- 784
index- 785
index- 786
index- 787
index- 788
index- 789
index- 790

Optimization of Similarity Threshold

In [32]:
perform_df = pd.DataFrame(columns=['fold','cutoff_error','num_data','coverage','cutoff_fing_similarity'], index=range(10))
z_crit = -(stats.norm.ppf(.95))
for fold in range(10):
    print('Fold-',fold)
    perform_df.loc[fold,'fold'] = fold
    
    pred_dir = './'
    valid_pred = pd.read_csv(pred_dir+'prediction_kfold-'+str(fold)+'.csv',index_col=0) 
    sim_df = cv_fing_df[fold]
    valid_pred.loc[:,'nearest_neighbor'] = np.nan
    for ind in valid_pred.index:
        max_sim = max(sim_df.loc['valid_'+str(ind)].values)
        valid_pred.loc[ind,'nearest_neighbor'] = max_sim
    fing_zscore = stats.zscore(valid_pred.nearest_neighbor)
    valid_pred.loc[:,'fing_zscore'] = fing_zscore
    
    _,valid_pred_sim = count_taskDataset_ana(valid_pred, target_list)
    valid_pred_sim.loc[:,'error'] = (valid_pred_sim.pIC50.values-valid_pred_sim.predicted_pIC50.values)**2
    
    error_q3 = valid_pred_sim['error'].quantile(0.75, interpolation='linear')
    print('Q3',error_q3)
    t_pred = valid_pred_sim[valid_pred_sim.error < error_q3]
    t_perform = validationParams(t_pred)
    perform_df.loc[fold,'cutoff_error'] = error_q3
    perform_df.loc[fold,'num_data'] = t_pred.shape[0]
    perform_df.loc[fold,'coverage'] = round((t_pred.shape[0]/valid_pred.shape[0])*100,2)
    
    fing_zscore = stats.zscore(t_pred.nearest_neighbor)
    t_pred.loc[:,'fing_zscore'] = fing_zscore    
    cutoff_val = min(t_pred.nearest_neighbor[t_pred.fing_zscore > z_crit])
    t_cutoff = cutoff_val
    print('cutoff fingerprint-based similarity' , t_cutoff)
    perform_df.loc[fold,'cutoff_fing_similarity'] = t_cutoff
perform_df

Fold- 0
Q3 0.28090000000000026
cutoff fingerprint-based similarity 0.5575
Fold- 1


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(ilocs[0], value)


Q3 0.29160000000000025
cutoff fingerprint-based similarity 0.5649
Fold- 2


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(ilocs[0], value)


Q3 0.2916
cutoff fingerprint-based similarity 0.5376
Fold- 3


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(ilocs[0], value)


Q3 0.26010000000000066
cutoff fingerprint-based similarity 0.5556
Fold- 4


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(ilocs[0], value)


Q3 0.2916
cutoff fingerprint-based similarity 0.5489
Fold- 5


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(ilocs[0], value)


Q3 0.2916000000000005
cutoff fingerprint-based similarity 0.5394
Fold- 6


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(ilocs[0], value)


Q3 0.3248999999999993
cutoff fingerprint-based similarity 0.5545
Fold- 7


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(ilocs[0], value)


Q3 0.2756499999999999
cutoff fingerprint-based similarity 0.5478
Fold- 8


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(ilocs[0], value)


Q3 0.2916
cutoff fingerprint-based similarity 0.5255
Fold- 9


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(ilocs[0], value)


Q3 0.3363999999999993
cutoff fingerprint-based similarity 0.5569


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(ilocs[0], value)


Unnamed: 0,fold,cutoff_error,num_data,coverage,cutoff_fing_similarity
0,0,0.2809,833,83.22,0.5575
1,1,0.2916,849,84.82,0.5649
2,2,0.2916,841,84.02,0.5376
3,3,0.2601,841,84.02,0.5556
4,4,0.2916,847,84.62,0.5489
5,5,0.2916,848,84.8,0.5394
6,6,0.3249,837,83.7,0.5545
7,7,0.27565,845,84.5,0.5478
8,8,0.2916,844,84.4,0.5255
9,9,0.3364,848,84.8,0.5569


In [34]:
cutoff_fing = round(np.mean(perform_df.cutoff_fing_similarity),4)
print('similarity threshold',cutoff_fing)

similarity threshold 0.5489


Calculation of 10% of the training endpoint range for each target

In [35]:
Dir = './'
train_df = pd.read_csv(Dir+'train.csv', index_col = 0)
_,train_tk = count_taskDataset(train_df, target_list)

In [37]:
train_cutoff = {}
for tar in target_list:
    tar_train = train_tk[train_tk.target == tar]
    range_train = round((max(tar_train.pIC50)-min(tar_train.pIC50))*0.1,3)
    train_cutoff[tar] = range_train
print('10% training range')
print(train_cutoff)

10% training range
{'erbB4': 0.54, 'egfr': 0.84, 'met': 0.654, 'alk': 0.594, 'erbB2': 0.6, 'ret': 0.64, 'ros1': 0.513}
