In [1]:
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer

newsgroups_train = fetch_20newsgroups(subset='train', remove=('headers', 'footers', 'quotes'))
newsgroups_test = fetch_20newsgroups(subset='test', remove=('headers', 'footers', 'quotes'))

count_vect = CountVectorizer()
x_train_counts = count_vect.fit_transform(newsgroups_train.data)
x_test_counts = count_vect.transform(newsgroups_test.data)

tfidf_transformer = TfidfTransformer()
x_train_tfidf = tfidf_transformer.fit_transform(x_train_counts)
x_test_tfidf = tfidf_transformer.transform(x_test_counts)

In [2]:
import classifiers

parameters = {
    'max_iter':[100, 500, 1000],
    'solver':['lbfgs', 'liblinear', 'newton-cg']
}

classifiers.lr_fold(x_train_tfidf, x_test_tfidf, newsgroups_train, newsgroups_test, parameters)

Best Parameters: {'max_iter': 100, 'solver': 'liblinear'}
Best Score: 0.7238821341903916
Parameters: {'max_iter': 100, 'solver': 'lbfgs'}, Score: 0.723 (+/- 0.005)
Parameters: {'max_iter': 100, 'solver': 'liblinear'}, Score: 0.724 (+/- 0.006)
Parameters: {'max_iter': 100, 'solver': 'newton-cg'}, Score: 0.723 (+/- 0.005)
Parameters: {'max_iter': 500, 'solver': 'lbfgs'}, Score: 0.723 (+/- 0.005)
Parameters: {'max_iter': 500, 'solver': 'liblinear'}, Score: 0.724 (+/- 0.006)
Parameters: {'max_iter': 500, 'solver': 'newton-cg'}, Score: 0.723 (+/- 0.005)
Parameters: {'max_iter': 1000, 'solver': 'lbfgs'}, Score: 0.723 (+/- 0.005)
Parameters: {'max_iter': 1000, 'solver': 'liblinear'}, Score: 0.724 (+/- 0.006)
Parameters: {'max_iter': 1000, 'solver': 'newton-cg'}, Score: 0.723 (+/- 0.005)
Test Accuracy: 0.6775


In [3]:
import classifiers

parameters = {
    'criterion':['gini', 'entropy', 'log_loss'],
    'splitter':['best', 'random']
}

classifiers.decision_tree_fold(x_train_tfidf, x_test_tfidf, newsgroups_train, newsgroups_test, parameters)

Best Parameters: {'criterion': 'gini', 'splitter': 'random'}
Best Score: 0.44210743467451835
Parameters: {'criterion': 'gini', 'splitter': 'best'}, Score: 0.433 (+/- 0.017)
Parameters: {'criterion': 'gini', 'splitter': 'random'}, Score: 0.442 (+/- 0.012)
Parameters: {'criterion': 'entropy', 'splitter': 'best'}, Score: 0.298 (+/- 0.005)
Parameters: {'criterion': 'entropy', 'splitter': 'random'}, Score: 0.298 (+/- 0.014)
Parameters: {'criterion': 'log_loss', 'splitter': 'best'}, Score: 0.291 (+/- 0.006)
Parameters: {'criterion': 'log_loss', 'splitter': 'random'}, Score: 0.289 (+/- 0.013)
Test Accuracy: 0.4166


In [12]:
import classifiers

parameters = {
    'max_iter':[100, 500, 1000],
}

classifiers.svc_fold(x_train_tfidf, x_test_tfidf, newsgroups_train, newsgroups_test, parameters)

Best Parameters: {'max_iter': 100}
Best Score: 0.7601208930189379
Parameters: {'max_iter': 100}, Score: 0.760 (+/- 0.007)
Parameters: {'max_iter': 500}, Score: 0.760 (+/- 0.007)
Parameters: {'max_iter': 1000}, Score: 0.760 (+/- 0.007)
Test Accuracy: 0.6920


In [10]:
import classifiers

parameters = {
    'n_estimators':[10, 25, 50],
    'learning_rate':[0.01, 0.1, 1]
}

classifiers.ada_boost_fold(x_train_tfidf, x_test_tfidf, newsgroups_train, newsgroups_test, parameters)

Best Parameters: {'learning_rate': 1, 'n_estimators': 50}
Best Score: 0.39923987664551763
Parameters: {'learning_rate': 0.01, 'n_estimators': 10}, Score: 0.167 (+/- 0.009)
Parameters: {'learning_rate': 0.01, 'n_estimators': 25}, Score: 0.230 (+/- 0.004)
Parameters: {'learning_rate': 0.01, 'n_estimators': 50}, Score: 0.258 (+/- 0.005)
Parameters: {'learning_rate': 0.1, 'n_estimators': 10}, Score: 0.204 (+/- 0.007)
Parameters: {'learning_rate': 0.1, 'n_estimators': 25}, Score: 0.290 (+/- 0.006)
Parameters: {'learning_rate': 0.1, 'n_estimators': 50}, Score: 0.324 (+/- 0.011)
Parameters: {'learning_rate': 1, 'n_estimators': 10}, Score: 0.217 (+/- 0.004)
Parameters: {'learning_rate': 1, 'n_estimators': 25}, Score: 0.316 (+/- 0.008)
Parameters: {'learning_rate': 1, 'n_estimators': 50}, Score: 0.399 (+/- 0.008)
Test Accuracy: 0.3747


In [8]:
import classifiers

parameters = {
    'n_estimators':[25, 50, 100],
    'max_depth':[10, 100, None],
    'n_jobs':[-1]
}

classifiers.random_forest_fold(x_train_tfidf, x_test_tfidf, newsgroups_train, newsgroups_test, parameters)

Best Parameters: {'max_depth': 100, 'n_estimators': 100, 'n_jobs': -1}
Best Score: 0.642125133768817
Parameters: {'max_depth': 10, 'n_estimators': 25, 'n_jobs': -1}, Score: 0.411 (+/- 0.005)
Parameters: {'max_depth': 10, 'n_estimators': 50, 'n_jobs': -1}, Score: 0.497 (+/- 0.008)
Parameters: {'max_depth': 10, 'n_estimators': 100, 'n_jobs': -1}, Score: 0.554 (+/- 0.005)
Parameters: {'max_depth': 100, 'n_estimators': 25, 'n_jobs': -1}, Score: 0.564 (+/- 0.016)
Parameters: {'max_depth': 100, 'n_estimators': 50, 'n_jobs': -1}, Score: 0.612 (+/- 0.007)
Parameters: {'max_depth': 100, 'n_estimators': 100, 'n_jobs': -1}, Score: 0.642 (+/- 0.009)
Parameters: {'max_depth': None, 'n_estimators': 25, 'n_jobs': -1}, Score: 0.552 (+/- 0.008)
Parameters: {'max_depth': None, 'n_estimators': 50, 'n_jobs': -1}, Score: 0.606 (+/- 0.009)
Parameters: {'max_depth': None, 'n_estimators': 100, 'n_jobs': -1}, Score: 0.633 (+/- 0.008)
Test Accuracy: 0.6022
