In [1]:
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from utils import get_saved_data, get_feature_labels, get_binetflow_files

In [2]:
# For random forest
tuned_parameters = [
    {
        'n_estimators': [12, 16, 20, 50],
        'criterion': ['gini', 'entropy'],
        'max_features': ['auto', 'sqrt', 'log2', None],
        'max_depth': [5, None],
        'n_jobs': [2]
    }
]

clf = GridSearchCV(RandomForestClassifier(), tuned_parameters)
binet_files = get_binetflow_files()
data = get_saved_data(0.15, binet_files[7])
feature, label = get_feature_labels(data)

clf.fit(feature, label)
print('random forest:', clf.best_params_)

saved_data/saved_0.15s_16-3.pk1




random forest: {'criterion': 'entropy', 'max_depth': None, 'max_features': None, 'n_estimators': 20, 'n_jobs': 2}


In [3]:
# for decision trees
tuned_parameters = [
    {
        'criterion': ['gini', 'entropy'],
        'splitter': ['best', 'random'],
        'class_weight': ['balanced', None],
    }
]

clf = GridSearchCV(DecisionTreeClassifier(), tuned_parameters)
binet_files = get_binetflow_files()
data = get_saved_data(0.15, binet_files[7])
feature, label = get_feature_labels(data)

clf.fit(feature, label)
print('decision trees:', clf.best_params_)

saved_data/saved_0.15s_16-3.pk1




decision trees: {'class_weight': 'balanced', 'criterion': 'entropy', 'splitter': 'best'}
