In [34]:
from sklearn.ensemble import RandomForestClassifier,BaggingClassifier, VotingClassifier

In [83]:
?BaggingClassifier

In [36]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score,precision_score, recall_score, f1_score, classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC

In [37]:
sms = pd.read_table("./sms.tsv", header=None, names=["label", "message"])
sms["label"] = sms.label.map({"ham":0, "spam":1})
X = sms.message
y = sms.label
X_test, X_train, y_test, y_train = train_test_split(X,y, test_size=1000,random_state=123)

In [89]:
steps = {
    "random": {
        "pipe": Pipeline([('counter',CountVectorizer(max_df=0.8,min_df=0.0)),
                          ('scaler',StandardScaler(with_mean=False)),
                          ('random',RandomForestClassifier())]),
        "param_grid":{
            'random__n_estimators':[10,20,50,100]
        }
    },
    
    "bagging":{
        "pipe":Pipeline([
            ("vectorizing",CountVectorizer()),
            ("bagging",BaggingClassifier())])
        ,
        "param_grid":{
            'bagging__n_estimators':[10,20,50,75],
            'bagging__max_samples':[1.0,0.5,0.3],
            'bagging__max_features':[1.0,0.5,0.3]
        } 
    }
}

In [93]:
for key in steps:
    pipe = steps[key]['pipe']
    param_grid = steps[key]['param_grid']
    gs = GridSearchCV(pipe, param_grid, cv = 5)
    gs.fit(X_train,y_train)
    accuracy = accuracy_score(gs.best_estimator_.predict(X_test),y_test)
    f1 =  f1_score(gs.best_estimator_.predict(X_test),y_test)
    results[key] = {"accuracy":accuracy,"best_params":gs.best_params_, "f1":f1}

In [94]:
import json
print(json.dumps(results, indent=4))

{
    "bagging": {
        "best_params": {
            "bagging__n_estimators": 50, 
            "bagging__max_samples": 1.0, 
            "bagging__max_features": 0.5
        }, 
        "f1": 0.8661710037174721, 
        "accuracy": 0.968503937007874
    }, 
    "random": {
        "best_params": {
            "random__n_estimators": 100
        }, 
        "f1": 0.7771084337349398, 
        "accuracy": 0.9514435695538058
    }
}
