In [9]:
#Comitê com votação entre os classificadores com AdaBoostClassifier
import numpy as np
import pandas
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier
from sklearn import tree
seed = 1075
np.random.seed(seed)

names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
dataframe = pandas.read_csv("dados/pima-indians-diabetes.csv", names = names)
array = dataframe.values
X = array[:,0:8]
Y = array[:,8]

# Create classifiers
dt = tree.DecisionTreeClassifier()
rf = RandomForestClassifier()

clf_array = [dt, rf]
for clf in clf_array:
    pima_scores = cross_val_score(clf, X, Y, cv=10)
    boosting_clf = AdaBoostClassifier(base_estimator = clf, n_estimators = 15, random_state = seed)
    boosting_scores = cross_val_score(boosting_clf, X, Y, cv = 10)    
    print('Media clf ', pima_scores.mean(), 'Desvio ', pima_scores.std(), 'Media Bagging', boosting_scores.mean(), 'Desvio ',boosting_scores.std())

Media clf  0.7069548872180451 Desvio  0.06439963671178817 Media Bagging 0.7069548872180451 Desvio  0.06992444662948437
Media clf  0.7643028024606973 Desvio  0.04854353516635274 Media Bagging 0.7707963089542036 Desvio  0.05129789583709469


In [8]:
# Get some classifiers to evaluate with BaggingClassifier
import numpy as np
import pandas
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import BaggingClassifier,RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import RidgeClassifier
from sklearn.neural_network import MLPClassifier

seed = 1075
np.random.seed(seed)

names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
dataframe = pandas.read_csv("pima-indians-diabetes.csv", names = names)
array = dataframe.values
X = array[:,0:8]
Y = array[:,8]

# Create classifiers
rf = RandomForestClassifier()
knn = KNeighborsClassifier()
mlpc = MLPClassifier(solver = 'lbfgs', alpha = 1e-5, hidden_layer_sizes = (8, 8), random_state = seed)
rg = RidgeClassifier()

clf_array = [rf, knn, mlpc, rg]
for clf in clf_array:
    pima_scores = cross_val_score(clf, X, Y, cv = 10)
    bagging_clf = BaggingClassifier(clf, max_samples = 0.8, random_state = seed)
    bagging_scores = cross_val_score(bagging_clf, X, Y, cv = 10)    
    print('Media clf ', pima_scores.mean(), 'Desvio ', pima_scores.std(), 'Media Bagging', bagging_scores.mean(), 'Desvio ',bagging_scores.std())

Media clf  0.7617224880382775 Desvio  0.049688240527885306 Media Bagging 0.7681818181818182 Desvio  0.05165900852844223
Media clf  0.7213773069036227 Desvio  0.04416817250533367 Media Bagging 0.7343814080656186 Desvio  0.04578758550270302
Media clf  0.6523581681476418 Desvio  0.004735120645828583 Media Bagging 0.6510594668489406 Desvio  0.003417634996582386
Media clf  0.7734962406015038 Desvio  0.03417526057730609 Media Bagging 0.7721633629528366 Desvio  0.032430101278137696


In [5]:

# Random Forest Classification
import pandas
from sklearn import tree
from sklearn import model_selection
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier

names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
dataframe = pandas.read_csv("dados/pima-indians-diabetes.csv", names = names)
array = dataframe.values
X = array[:,0:8]
Y = array[:,8]
seed = 7
num_trees = 21
max_features = 7

kfold = model_selection.StratifiedKFold(n_splits = 10, random_state = seed)
model = RandomForestClassifier(n_estimators = num_trees, max_features = max_features)
results = model_selection.cross_val_score(model, X, Y, cv = kfold)

print(results, results.mean())

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = 0.30, random_state = seed)

clf = tree.DecisionTreeClassifier(criterion = 'entropy', random_state = seed)
clf = clf.fit(X_train, y_train)

print("Acuracia: %0.3f" %  clf.score(X_test, y_test))

[0.68831169 0.83116883 0.72727273 0.67532468 0.74025974 0.79220779
 0.77922078 0.83116883 0.69736842 0.80263158] 0.7564935064935066
Acuracia: 0.736


In [8]:
# Voting Ensemble for Classification
import pandas
from sklearn import model_selection
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import VotingClassifier
import warnings
warnings.filterwarnings("ignore")

names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
dataframe = pandas.read_csv("dados/pima-indians-diabetes.csv", names = names)
array = dataframe.values
X = array[:,0:8]
Y = array[:,8]
seed = 5
kfold = model_selection.StratifiedKFold(n_splits = 10, random_state = seed)

# create the sub models
estimators = []
model1 = LogisticRegression()
estimators.append(('logistic', model1))
model2 = DecisionTreeClassifier()
estimators.append(('cart', model2))
model3 = MLPClassifier(solver='lbfgs', alpha = 1e-5, hidden_layer_sizes = (8, 8), random_state = 5)
estimators.append(('mlp', model3))
# create the ensemble model
ensemble = VotingClassifier(estimators)
results = model_selection.cross_val_score(ensemble, X, Y, cv = kfold)
print(results.mean())

0.7604408749145593
