In [1]:
import pickle
import pandas as pd
import numpy as np
from sklearn.model_selection import cross_validate
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression, LogisticRegressionCV
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, PowerTransformer
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import GridSearchCV
from tqdm import tqdm


rng = np.random.RandomState(20210414)

In [2]:
SWCAll =  pickle.load( open( "../FeatureExtraction/DataSets/SWCFeatures/SWCFeat.p", "rb" ) )
SWCAll['class'].value_counts()

0.0    31456
1.0     7827
Name: class, dtype: int64

In [3]:
SWCAll =  pickle.load( open( "../FeatureExtraction/DataSets/SWCFeatures/SWCFeat.p", "rb" ) )

train = SWCAll.sample(frac=0.80, random_state=rng)
subTrain = train.sample(frac=0.80, random_state=rng)
tuneMask = pd.Series(True, index=train.index)
tuneMask[subTrain.index] = False
tune = train[tuneMask].copy()

In [4]:
len(tune)

6285

In [5]:
subTrain.columns.tolist()

['class',
 'cc',
 'cd',
 'dt',
 'ex',
 'fw',
 'in',
 'jj',
 'jjr',
 'jjs',
 'ls',
 'md',
 'nn',
 'nnp',
 'nnps',
 'nns',
 'pdt',
 'pos',
 'prp',
 'rb',
 'rbr',
 'rbs',
 'rp',
 'sym',
 'to',
 'uh',
 'vb',
 'vbd',
 'vbg',
 'vbn',
 'vbp',
 'vbz',
 'wdt',
 'wp',
 'wrb',
 'nn nn',
 'jj nn',
 'nn nns',
 'to vb',
 'jj nns',
 'jj to',
 'nn in',
 'nns in',
 'in nn',
 'dt nn',
 'jj nn nn',
 'nn nn nn',
 'jj to vb',
 'nn nn nns',
 'to vb nn',
 ' Level0',
 ' Level1',
 ' Level2',
 ' Level3',
 ' Level4',
 ' Level5',
 ' Level6',
 ' Level7',
 ' MeanLevel',
 'totalSyl',
 'avgSyl',
 'simWords',
 'comWords',
 'greatestSyl',
 'leastSyl',
 'numChars',
 'numWords',
 'avgLenWord',
 'ld',
 'ls1',
 'ls2',
 'vs1',
 'vs2',
 'cvs1',
 'ndw',
 'ttr',
 'cttr',
 'rttr',
 'logttr',
 'lv',
 'vv1',
 'svv1',
 'cvv1',
 'vv2',
 'nv',
 'adjv',
 'numSpellingErrors',
 'offByOne',
 'kidsError',
 'punct',
 'casing',
 'coreVocab',
 'nonCoreVocab',
 'vocabMin',
 'vocabMax',
 'queryComplexity',
 'addVocab',
 'SVEN',
 'top250All',


In [6]:
feat = subTrain.columns.tolist()
feat.remove('class')
#feat.remove(' Sentences')
# feat.remove('tfidfNA')
# feat.remove('tfidf')
# feat.remove('tfidfAll')
featCols = feat
outCol = 'class'

subTrainX = subTrain[featCols]
subTrainY = subTrain[outCol]
tuneX = tune[featCols]
tuneY = tune[outCol]

In [7]:
tune['class'].value_counts()

0.0    5077
1.0    1208
Name: class, dtype: int64

In [8]:
##Average tp over each model numIts times

numIts = 5
numEstimatorParam = [50, 100, 150, 200, 250, 300, 350, 400, 450, 500]
bootStrapParam = [True, False]
criterionParam = ['gini', 'entropy']
classWeightParam = ['None', 'balanced']
scalers = [StandardScaler(), '']
bestTP = 0
bestTN = 0
bestParam =	{
  "numEstimators": 0,
  "bootStrap": False,
  "criterion": "",
  "classWeight" : "",
  "scaler" : ""
}
length = (len(numEstimatorParam)*len(bootStrapParam)*len(criterionParam)
          *len(classWeightParam)*len(scalers))
with tqdm(total = length) as pbar:
    for numE in numEstimatorParam:
        for bS in bootStrapParam:
            for c in criterionParam:
                for scale in scalers:
                    tpAvg = 0
                    tnAvg = 0
                    for x in range(numIts):
                        if scale:
                            tune_pipe = Pipeline([
                                ('standardize', scale),
                                ('classify', RandomForestClassifier(criterion = c, n_estimators=numE, bootstrap = bS, class_weight = 'balanced', n_jobs=-1, random_state = rng ))
                            ])
                        else:
                            tune_pipe = Pipeline([
                                ('classify', RandomForestClassifier(criterion = c, n_estimators=numE, bootstrap = bS, class_weight = 'balanced', n_jobs=-1, random_state = rng )) 
                            ])
                        tune_pipe.fit(subTrainX, subTrainY)

                        tuneAcc = accuracy_score(tuneY, tune_pipe.predict(tuneX))
                        tn, fp, fn, tp = confusion_matrix(tuneY, tune_pipe.predict(tuneX)).ravel()
                        tpAvg += tp
                        tnAvg += tn
                    tpAvg = tpAvg/numIts
                    tnAvg = tnAvg/numIts
                    print(numE, bS, c, scale, tuneAcc, tpAvg)
                    if tpAvg > bestTP:
                        bestTP = tpAvg
                        bestTN = tnAvg
                        bestParam["numEstimators"] = numE
                        bestParam["bootStrap"] = bS
                        bestParam["criterion"] = c
                        bestParam["classWeight"] = 'balanced'
                        bestParam["scaler"] = scale
                        print("Tune TP: " + str(tpAvg))
                    if tpAvg == bestTP:
                        if tnAvg >bestTN:
                            bestTP = tpAvg
                            bestTN = tnAvg
                            bestParam["numEstimators"] = numE
                            bestParam["bootStrap"] = bS
                            bestParam["criterion"] = c
                            bestParam["classWeight"] = 'balanced'
                            bestParam["scaler"] = scale
                            print("Tune TP: " + str(tpAvg))
                            print("Tune TN: " + str(tnAvg))
                    pbar.update()
    for numE in numEstimatorParam:
        for bS in bootStrapParam:
            for c in criterionParam:
                for scale in scalers:
                    tpAvg = 0
                    tnAvg = 0
                    for x in range(numIts):
                        if scale:
                            tune_pipe = Pipeline([
                                ('standardize', scale),
                                ('classify', RandomForestClassifier(criterion = c, n_estimators=numE, bootstrap = bS, n_jobs=-1, random_state = rng))
                            ])
                        else:
                            tune_pipe = Pipeline([
                                 ('classify', RandomForestClassifier(criterion = c, n_estimators=numE, bootstrap = bS, n_jobs=-1, random_state = rng))
                            ])
                        tune_pipe.fit(subTrainX, subTrainY)

                        tuneAcc = accuracy_score(tuneY, tune_pipe.predict(tuneX))
                        tn, fp, fn, tp = confusion_matrix(tuneY, tune_pipe.predict(tuneX)).ravel()
                        tpAvg += tp
                        tnAvg += tn
                    tpAvg = tpAvg/numIts
                    tnAvg = tnAvg/numIts
                    print(numE, bS, c, scale, tuneAcc, tpAvg)
                    if tpAvg > bestTP:
                        bestTP = tpAvg
                        bestTN = tnAvg
                        bestParam["numEstimators"] = numE
                        bestParam["bootStrap"] = bS
                        bestParam["criterion"] = c
                        bestParam["classWeight"] = ''
                        bestParam["scaler"] = scale
                        print("Tune TP: " + str(tpAvg))
                    if tpAvg == bestTP:
                        if tnAvg >bestTN:
                            bestTP = tpAvg
                            bestTN = tnAvg
                            bestParam["numEstimators"] = numE
                            bestParam["bootStrap"] = bS
                            bestParam["criterion"] = c
                            bestParam["classWeight"] = ''
                            bestParam["scaler"] = scale
                            print("Tune TP: " + str(tpAvg))
                            print("Tune TN: " + str(tnAvg))
                    pbar.update()

  1%|          | 1/160 [00:05<15:00,  5.66s/it]

50 True gini StandardScaler() 0.9406631762652705 968.0
Tune TP: 968.0


  1%|▏         | 2/160 [00:11<14:47,  5.61s/it]

50 True gini  0.939235284785023 962.8


  2%|▏         | 3/160 [00:17<15:29,  5.92s/it]

50 True entropy StandardScaler() 0.9409804854831033 984.6
Tune TP: 984.6


  2%|▎         | 4/160 [00:24<15:59,  6.15s/it]

50 True entropy  0.9409804854831033 980.2


  3%|▎         | 5/160 [00:33<18:05,  7.00s/it]

50 False gini StandardScaler() 0.9414564493098525 984.4


  4%|▍         | 6/160 [00:41<18:59,  7.40s/it]

50 False gini  0.9435189592257656 985.4
Tune TP: 985.4


  4%|▍         | 7/160 [00:52<21:05,  8.27s/it]

50 False entropy StandardScaler() 0.9436776138346818 998.6
Tune TP: 998.6


  5%|▌         | 8/160 [01:01<22:09,  8.75s/it]

50 False entropy  0.9438362684435982 1002.0
Tune TP: 1002.0


  6%|▌         | 9/160 [01:13<24:13,  9.62s/it]

100 True gini StandardScaler() 0.9403458670474377 973.0


  6%|▋         | 10/160 [01:24<25:20, 10.14s/it]

100 True gini  0.9390766301761067 972.0


  7%|▋         | 11/160 [01:38<27:21, 11.02s/it]

100 True entropy StandardScaler() 0.9428843407901 986.6


  8%|▊         | 12/160 [01:50<28:02, 11.37s/it]

100 True entropy  0.9432016500079328 984.8


  8%|▊         | 13/160 [02:08<33:05, 13.51s/it]

100 False gini StandardScaler() 0.9435189592257656 989.8


  9%|▉         | 14/160 [02:23<34:02, 13.99s/it]

100 False gini  0.9454228145327622 993.4


  9%|▉         | 15/160 [02:46<39:44, 16.45s/it]

100 False entropy StandardScaler() 0.9466920514040933 1005.2
Tune TP: 1005.2


 10%|█         | 16/160 [03:07<43:04, 17.95s/it]

100 False entropy  0.9454228145327622 1005.6
Tune TP: 1005.6


 11%|█         | 17/160 [03:24<41:52, 17.57s/it]

150 True gini StandardScaler() 0.9406631762652705 974.0


 11%|█▏        | 18/160 [03:40<40:53, 17.28s/it]

150 True gini  0.9409804854831033 976.0


 12%|█▏        | 19/160 [04:04<45:12, 19.24s/it]

150 True entropy StandardScaler() 0.9414564493098525 980.0


 12%|█▎        | 20/160 [04:30<49:54, 21.39s/it]

150 True entropy  0.9416151039187688 983.8


 13%|█▎        | 21/160 [05:13<1:04:13, 27.72s/it]

150 False gini StandardScaler() 0.9424083769633508 993.0


 14%|█▍        | 22/160 [05:48<1:08:39, 29.85s/it]

150 False gini  0.944153577661431 994.0


 14%|█▍        | 23/160 [06:22<1:10:49, 31.02s/it]

150 False entropy StandardScaler() 0.9471680152308425 1010.0
Tune TP: 1010.0


 15%|█▌        | 24/160 [06:53<1:10:42, 31.20s/it]

150 False entropy  0.9462160875773441 1006.6


 16%|█▌        | 25/160 [07:20<1:07:30, 30.01s/it]

200 True gini StandardScaler() 0.9401872124385213 973.2


 16%|█▋        | 26/160 [07:47<1:04:41, 28.96s/it]

200 True gini  0.9405045216563541 966.8


 17%|█▋        | 27/160 [08:16<1:04:25, 29.06s/it]

200 True entropy StandardScaler() 0.9424083769633508 985.6


 18%|█▊        | 28/160 [08:48<1:05:30, 29.77s/it]

200 True entropy  0.9419324131366016 986.8


 18%|█▊        | 29/160 [09:35<1:16:31, 35.05s/it]

200 False gini StandardScaler() 0.9439949230525146 994.2


 19%|█▉        | 30/160 [10:23<1:24:13, 38.87s/it]

200 False gini  0.9430429953990164 995.4


 19%|█▉        | 31/160 [11:19<1:34:56, 44.16s/it]

200 False entropy StandardScaler() 0.9460574329684277 1011.6
Tune TP: 1011.6


 20%|██        | 32/160 [12:21<1:45:43, 49.56s/it]

200 False entropy  0.9466920514040933 1007.2


 21%|██        | 33/160 [13:09<1:43:51, 49.07s/it]

250 True gini StandardScaler() 0.9411391400920197 976.0


 21%|██▏       | 34/160 [14:05<1:47:04, 50.98s/it]

250 True gini  0.9408218308741869 973.0


 22%|██▏       | 35/160 [14:52<1:43:34, 49.71s/it]

250 True entropy StandardScaler() 0.9427256861811836 983.4


 22%|██▎       | 36/160 [15:48<1:46:51, 51.70s/it]

250 True entropy  0.9427256861811836 984.4


 23%|██▎       | 37/160 [17:10<2:04:52, 60.92s/it]

250 False gini StandardScaler() 0.9439949230525146 992.0


 24%|██▍       | 38/160 [18:50<2:27:28, 72.53s/it]

250 False gini  0.9451055053149294 997.6


 24%|██▍       | 39/160 [20:11<2:31:22, 75.06s/it]

250 False entropy StandardScaler() 0.9452641599238458 1009.8


 25%|██▌       | 40/160 [21:22<2:27:49, 73.91s/it]

250 False entropy  0.9471680152308425 1012.0
Tune TP: 1012.0


 26%|██▌       | 41/160 [23:10<2:46:48, 84.11s/it]

300 True gini StandardScaler() 0.9403458670474377 966.6


 26%|██▋       | 42/160 [24:53<2:56:44, 89.86s/it]

300 True gini  0.9416151039187688 973.2


 27%|██▋       | 43/160 [26:19<2:52:41, 88.56s/it]

300 True entropy StandardScaler() 0.9436776138346818 986.0


 28%|██▊       | 44/160 [27:46<2:50:27, 88.17s/it]

300 True entropy  0.9424083769633508 985.4


 28%|██▊       | 45/160 [29:48<3:08:09, 98.17s/it]

300 False gini StandardScaler() 0.9425670315722672 996.2


 29%|██▉       | 46/160 [31:49<3:19:39, 105.08s/it]

300 False gini  0.944946850706013 998.4


 29%|██▉       | 47/160 [33:14<3:06:25, 98.99s/it] 

300 False entropy StandardScaler() 0.9465333967951769 1012.4
Tune TP: 1012.4


 30%|███       | 48/160 [34:32<2:53:02, 92.70s/it]

300 False entropy  0.9484372521021736 1015.0
Tune TP: 1015.0


 31%|███       | 49/160 [35:28<2:31:11, 81.72s/it]

350 True gini StandardScaler() 0.9405045216563541 973.6


 31%|███▏      | 50/160 [36:22<2:14:57, 73.61s/it]

350 True gini  0.9406631762652705 971.8


 32%|███▏      | 51/160 [37:22<2:06:03, 69.39s/it]

350 True entropy StandardScaler() 0.942091067745518 984.6


 32%|███▎      | 52/160 [38:19<1:58:22, 65.77s/it]

350 True entropy  0.942091067745518 986.6


 33%|███▎      | 53/160 [39:41<2:05:37, 70.45s/it]

350 False gini StandardScaler() 0.944153577661431 996.8


 34%|███▍      | 54/160 [41:00<2:09:22, 73.23s/it]

350 False gini  0.9447881960970966 997.6


 34%|███▍      | 55/160 [42:32<2:17:36, 78.64s/it]

350 False entropy StandardScaler() 0.9471680152308425 1012.2


 35%|███▌      | 56/160 [45:00<2:52:28, 99.50s/it]

350 False entropy  0.9479612882754244 1011.8


 36%|███▌      | 57/160 [46:52<2:57:13, 103.23s/it]

400 True gini StandardScaler() 0.9409804854831033 970.8


 36%|███▋      | 58/160 [48:37<2:56:23, 103.76s/it]

400 True gini  0.9409804854831033 973.6


 37%|███▋      | 59/160 [50:45<3:07:06, 111.15s/it]

400 True entropy StandardScaler() 0.9425670315722672 988.8


 38%|███▊      | 60/160 [52:34<3:03:53, 110.34s/it]

400 True entropy  0.9432016500079328 986.6


 38%|███▊      | 61/160 [55:31<3:35:11, 130.42s/it]

400 False gini StandardScaler() 0.9452641599238458 996.8


 39%|███▉      | 62/160 [58:22<3:52:57, 142.62s/it]

400 False gini  0.9452641599238458 997.6


 39%|███▉      | 63/160 [1:01:38<4:16:40, 158.77s/it]

400 False entropy StandardScaler() 0.9457401237505949 1010.8


 40%|████      | 64/160 [1:05:25<4:46:40, 179.17s/it]

400 False entropy  0.9474853244486753 1016.6
Tune TP: 1016.6


 41%|████      | 65/160 [1:07:49<4:26:51, 168.54s/it]

450 True gini StandardScaler() 0.9417737585276852 971.4


 41%|████▏     | 66/160 [1:09:52<4:02:36, 154.86s/it]

450 True gini  0.9427256861811836 974.2


 42%|████▏     | 67/160 [1:12:04<3:49:27, 148.04s/it]

450 True entropy StandardScaler() 0.9416151039187688 983.4


 42%|████▎     | 68/160 [1:14:19<3:40:47, 144.00s/it]

450 True entropy  0.944153577661431 990.0


 43%|████▎     | 69/160 [1:17:46<4:07:29, 163.19s/it]

450 False gini StandardScaler() 0.9436776138346818 997.8


 44%|████▍     | 70/160 [1:22:01<4:45:56, 190.63s/it]

450 False gini  0.9454228145327622 999.0


 44%|████▍     | 71/160 [1:27:05<5:33:10, 224.61s/it]

450 False entropy StandardScaler() 0.9460574329684277 1013.4


 45%|████▌     | 72/160 [1:29:32<4:55:21, 201.38s/it]

450 False entropy  0.9460574329684277 1012.4


 46%|████▌     | 73/160 [1:31:06<4:05:18, 169.18s/it]

500 True gini StandardScaler() 0.9409804854831033 968.8


 46%|████▋     | 74/160 [1:32:28<3:24:55, 142.98s/it]

500 True gini  0.9414564493098525 975.0


 47%|████▋     | 75/160 [1:34:09<3:04:32, 130.26s/it]

500 True entropy StandardScaler() 0.9419324131366016 985.4


 48%|████▊     | 76/160 [1:36:00<2:54:26, 124.60s/it]

500 True entropy  0.9424083769633508 986.0


 48%|████▊     | 77/160 [1:39:08<3:18:42, 143.64s/it]

500 False gini StandardScaler() 0.9451055053149294 997.4


 49%|████▉     | 78/160 [1:41:08<3:06:32, 136.49s/it]

500 False gini  0.944153577661431 997.8


 49%|████▉     | 79/160 [1:43:14<3:00:08, 133.44s/it]

500 False entropy StandardScaler() 0.9463747421862605 1012.4


 50%|█████     | 80/160 [1:45:25<2:56:41, 132.52s/it]

500 False entropy  0.9471680152308425 1017.0
Tune TP: 1017.0


 51%|█████     | 81/160 [1:45:34<2:05:42, 95.47s/it] 

50 True gini StandardScaler() 0.9395525940028557 979.8


 51%|█████▏    | 82/160 [1:45:42<1:30:00, 69.24s/it]

50 True gini  0.9414564493098525 978.8


 52%|█████▏    | 83/160 [1:45:51<1:05:41, 51.19s/it]

50 True entropy StandardScaler() 0.9417737585276852 985.2


 52%|█████▎    | 84/160 [1:45:59<48:31, 38.31s/it]  

50 True entropy  0.9430429953990164 984.4


 53%|█████▎    | 85/160 [1:46:11<38:06, 30.49s/it]

50 False gini StandardScaler() 0.9443122322703474 1000.6


 54%|█████▍    | 86/160 [1:46:23<30:32, 24.77s/it]

50 False gini  0.9438362684435982 1004.8


 54%|█████▍    | 87/160 [1:46:35<25:42, 21.14s/it]

50 False entropy StandardScaler() 0.9452641599238458 1007.4


 55%|█████▌    | 88/160 [1:46:47<22:01, 18.36s/it]

50 False entropy  0.9425670315722672 999.0


 56%|█████▌    | 89/160 [1:47:04<21:01, 17.76s/it]

100 True gini StandardScaler() 0.9425670315722672 991.2


 56%|█████▋    | 90/160 [1:47:20<20:10, 17.29s/it]

100 True gini  0.944153577661431 985.8


 57%|█████▋    | 91/160 [1:47:37<19:45, 17.18s/it]

100 True entropy StandardScaler() 0.9433603046168492 992.4


 57%|█████▊    | 92/160 [1:47:53<19:08, 16.89s/it]

100 True entropy  0.9436776138346818 991.4


 58%|█████▊    | 93/160 [1:48:16<21:01, 18.82s/it]

100 False gini StandardScaler() 0.9474853244486753 1013.0


 59%|█████▉    | 94/160 [1:48:40<22:19, 20.29s/it]

100 False gini  0.9466920514040933 1012.4


 59%|█████▉    | 95/160 [1:49:04<23:14, 21.46s/it]

100 False entropy StandardScaler() 0.9452641599238458 1013.8


 60%|██████    | 96/160 [1:49:29<24:04, 22.57s/it]

100 False entropy  0.9471680152308425 1017.4
Tune TP: 1017.4


 61%|██████    | 97/160 [1:49:54<24:16, 23.12s/it]

150 True gini StandardScaler() 0.9438362684435982 987.2


 61%|██████▏   | 98/160 [1:50:17<24:03, 23.28s/it]

150 True gini  0.9425670315722672 991.8


 62%|██████▏   | 99/160 [1:50:42<24:09, 23.76s/it]

150 True entropy StandardScaler() 0.9432016500079328 993.6


 62%|██████▎   | 100/160 [1:51:06<23:48, 23.80s/it]

150 True entropy  0.9409804854831033 988.8


 63%|██████▎   | 101/160 [1:51:40<26:27, 26.91s/it]

150 False gini StandardScaler() 0.9466920514040933 1009.4


 64%|██████▍   | 102/160 [1:52:14<27:56, 28.90s/it]

150 False gini  0.9468507060130097 1014.4


 64%|██████▍   | 103/160 [1:52:49<29:11, 30.73s/it]

150 False entropy StandardScaler() 0.9462160875773441 1015.4


 65%|██████▌   | 104/160 [1:53:24<29:49, 31.96s/it]

150 False entropy  0.9471680152308425 1015.4


 66%|██████▌   | 105/160 [1:53:56<29:18, 31.98s/it]

200 True gini StandardScaler() 0.9446295414881802 990.6


 66%|██████▋   | 106/160 [1:54:27<28:37, 31.80s/it]

200 True gini  0.9430429953990164 988.4


 67%|██████▋   | 107/160 [1:55:00<28:17, 32.03s/it]

200 True entropy StandardScaler() 0.9452641599238458 993.2


 68%|██████▊   | 108/160 [1:55:31<27:38, 31.89s/it]

200 True entropy  0.9436776138346818 996.2


 68%|██████▊   | 109/160 [1:56:17<30:39, 36.07s/it]

200 False gini StandardScaler() 0.947802633666508 1012.8


 69%|██████▉   | 110/160 [1:57:02<32:20, 38.82s/it]

200 False gini  0.9462160875773441 1015.2


 69%|██████▉   | 111/160 [1:57:49<33:36, 41.15s/it]

200 False entropy StandardScaler() 0.9479612882754244 1021.0
Tune TP: 1021.0


 70%|███████   | 112/160 [1:58:35<34:03, 42.58s/it]

200 False entropy  0.9474853244486753 1019.4


 71%|███████   | 113/160 [1:59:15<32:44, 41.79s/it]

250 True gini StandardScaler() 0.9436776138346818 993.2


 71%|███████▏  | 114/160 [1:59:53<31:13, 40.72s/it]

250 True gini  0.9433603046168492 990.4


 72%|███████▏  | 115/160 [2:00:32<30:03, 40.08s/it]

250 True entropy StandardScaler() 0.9435189592257656 992.6


 72%|███████▎  | 116/160 [2:01:09<28:53, 39.41s/it]

250 True entropy  0.9454228145327622 994.0


 73%|███████▎  | 117/160 [2:02:05<31:46, 44.34s/it]

250 False gini StandardScaler() 0.9452641599238458 1014.0


 74%|███████▍  | 118/160 [2:03:01<33:29, 47.86s/it]

250 False gini  0.9470093606219261 1014.2


 74%|███████▍  | 119/160 [2:03:59<34:43, 50.82s/it]

250 False entropy StandardScaler() 0.9473266698397589 1019.6


 75%|███████▌  | 120/160 [2:04:56<35:02, 52.56s/it]

250 False entropy  0.9463747421862605 1016.6


 76%|███████▌  | 121/160 [2:05:42<33:01, 50.81s/it]

300 True gini StandardScaler() 0.9432016500079328 993.6


 76%|███████▋  | 122/160 [2:06:27<31:05, 49.10s/it]

300 True gini  0.9444708868792638 989.8


 77%|███████▋  | 123/160 [2:07:14<29:50, 48.39s/it]

300 True entropy StandardScaler() 0.9428843407901 994.2


 78%|███████▊  | 124/160 [2:08:00<28:38, 47.74s/it]

300 True entropy  0.9435189592257656 993.2


 78%|███████▊  | 125/160 [2:09:07<31:04, 53.27s/it]

300 False gini StandardScaler() 0.9458987783595113 1017.0


 79%|███████▉  | 126/160 [2:10:11<32:09, 56.74s/it]

300 False gini  0.9468507060130097 1014.6


 79%|███████▉  | 127/160 [2:11:19<32:57, 59.92s/it]

300 False entropy StandardScaler() 0.9482785974932572 1017.2


 80%|████████  | 128/160 [2:12:48<36:40, 68.77s/it]

300 False entropy  0.9476439790575916 1017.8


 81%|████████  | 129/160 [2:15:17<47:53, 92.68s/it]

350 True gini StandardScaler() 0.9424083769633508 991.6


 81%|████████▏ | 130/160 [2:17:21<51:07, 102.24s/it]

350 True gini  0.9438362684435982 990.6


 82%|████████▏ | 131/160 [2:18:34<45:11, 93.51s/it] 

350 True entropy StandardScaler() 0.9425670315722672 993.8


 82%|████████▎ | 132/160 [2:20:32<47:04, 100.86s/it]

350 True entropy  0.9444708868792638 994.6


 83%|████████▎ | 133/160 [2:23:28<55:28, 123.28s/it]

350 False gini StandardScaler() 0.9470093606219261 1013.0


 84%|████████▍ | 134/160 [2:26:33<1:01:30, 141.94s/it]

350 False gini  0.9473266698397589 1017.0


 84%|████████▍ | 135/160 [2:29:15<1:01:37, 147.90s/it]

350 False entropy StandardScaler() 0.947802633666508 1019.4


 85%|████████▌ | 136/160 [2:32:05<1:01:44, 154.36s/it]

350 False entropy  0.9460574329684277 1014.8


 86%|████████▌ | 137/160 [2:35:37<1:05:47, 171.63s/it]

400 True gini StandardScaler() 0.9435189592257656 990.4


 86%|████████▋ | 138/160 [2:38:45<1:04:47, 176.69s/it]

400 True gini  0.9438362684435982 993.4


 87%|████████▋ | 139/160 [2:40:23<53:32, 152.96s/it]  

400 True entropy StandardScaler() 0.9433603046168492 991.0


 88%|████████▊ | 140/160 [2:41:36<42:58, 128.91s/it]

400 True entropy  0.9443122322703474 992.6


 88%|████████▊ | 141/160 [2:46:09<54:30, 172.14s/it]

400 False gini StandardScaler() 0.9465333967951769 1016.4


 89%|████████▉ | 142/160 [2:49:21<53:29, 178.33s/it]

400 False gini  0.9463747421862605 1013.4


 89%|████████▉ | 143/160 [2:51:28<46:08, 162.88s/it]

400 False entropy StandardScaler() 0.947802633666508 1021.0


 90%|█████████ | 144/160 [2:53:02<37:56, 142.29s/it]

400 False entropy  0.9465333967951769 1016.6


 91%|█████████ | 145/160 [2:54:44<32:29, 129.99s/it]

450 True gini StandardScaler() 0.9430429953990164 991.4


 91%|█████████▏| 146/160 [2:56:03<26:46, 114.76s/it]

450 True gini  0.942091067745518 992.4


 92%|█████████▏| 147/160 [2:57:27<22:52, 105.59s/it]

450 True entropy StandardScaler() 0.9443122322703474 995.0


 92%|█████████▎| 148/160 [3:00:06<24:17, 121.48s/it]

450 True entropy  0.9436776138346818 993.6


 93%|█████████▎| 149/160 [3:02:07<22:14, 121.31s/it]

450 False gini StandardScaler() 0.9474853244486753 1015.6


 94%|█████████▍| 150/160 [3:04:23<20:57, 125.71s/it]

450 False gini  0.9471680152308425 1014.6


 94%|█████████▍| 151/160 [3:06:31<18:57, 126.43s/it]

450 False entropy StandardScaler() 0.947802633666508 1019.2


 95%|█████████▌| 152/160 [3:08:31<16:36, 124.54s/it]

450 False entropy  0.9466920514040933 1021.2
Tune TP: 1021.2


 96%|█████████▌| 153/160 [3:10:16<13:51, 118.75s/it]

500 True gini StandardScaler() 0.9424083769633508 993.4


 96%|█████████▋| 154/160 [3:11:54<11:16, 112.67s/it]

500 True gini  0.9433603046168492 993.4


 97%|█████████▋| 155/160 [3:13:47<09:23, 112.69s/it]

500 True entropy StandardScaler() 0.9425670315722672 992.0


 98%|█████████▊| 156/160 [3:15:37<07:27, 111.84s/it]

500 True entropy  0.9438362684435982 994.0


 98%|█████████▊| 157/160 [3:18:09<06:11, 123.86s/it]

500 False gini StandardScaler() 0.9466920514040933 1013.6


 99%|█████████▉| 158/160 [3:20:33<04:20, 130.02s/it]

500 False gini  0.9460574329684277 1015.2


 99%|█████████▉| 159/160 [3:22:53<02:12, 132.79s/it]

500 False entropy StandardScaler() 0.947802633666508 1021.0


100%|██████████| 160/160 [3:25:09<00:00, 76.93s/it] 

500 False entropy  0.9470093606219261 1016.6





In [9]:
bestParam

{'numEstimators': 450,
 'bootStrap': False,
 'criterion': 'entropy',
 'classWeight': '',
 'scaler': ''}

In [8]:
toRemove = tune.index.tolist()

In [9]:
SWC = pickle.load( open( "../Data/DataSets/SWC/SWC.p", "rb" ) )
SWCFeatNoTune = SWCAll.drop(tune.index)
SWCNoTune = SWC[~SWC['sID'].isin(toRemove)]

In [10]:
pickle.dump(SWCFeatNoTune, open( "Pickles/SWCFeatNoTune.p", "wb" ))
pickle.dump(SWCNoTune, open( "Pickles/SWCNoTune.p", "wb" ))
pickle.dump(bestParam, open( "Pickles/BestParam.p", "wb" ))