In [52]:
import numpy as np
from sklearn.tree             import DecisionTreeClassifier,export_graphviz,DecisionTreeRegressor
from sklearn.model_selection  import train_test_split,GridSearchCV,ShuffleSplit
from sklearn.datasets         import make_classification,make_regression
from graphviz                 import Source
from sklearn.metrics          import *

In [53]:
data = make_classification(n_samples=10_000,n_classes=2,n_features=10)

In [54]:
X = data[0]
y = data[1]

In [55]:
xtrain ,xtest ,ytrain ,ytest = train_test_split(X,y,test_size=0.2,random_state=42)

In [56]:
tree = DecisionTreeClassifier()
tree.fit(xtrain,ytrain)

In [57]:
tree.get_depth(),tree.tree_.n_leaves,tree.tree_.node_count

(24, 444, 887)

In [58]:
pred = tree.predict(xtest)

print(classification_report(ytest,pred))
print(confusion_matrix(ytest,pred))

              precision    recall  f1-score   support

           0       0.91      0.91      0.91      1025
           1       0.90      0.91      0.91       975

    accuracy                           0.91      2000
   macro avg       0.91      0.91      0.91      2000
weighted avg       0.91      0.91      0.91      2000

[[930  95]
 [ 90 885]]


In [59]:
params = dict(
    max_depth = np.arange(3,20),
    min_samples_split = [100,150,200],
)
gridTree = GridSearchCV(estimator=DecisionTreeClassifier(max_features=5),param_grid=params,scoring='accuracy')
gridTree.fit(xtrain,ytrain)

print(gridTree.best_estimator_)
print(gridTree.best_score_)

DecisionTreeClassifier(max_depth=11, max_features=5, min_samples_split=100)
0.931625


In [60]:
tree = DecisionTreeClassifier(max_depth=17, max_features=5, min_samples_split=100)
tree.fit(xtrain,ytrain)
pred = tree.predict(xtest)

print(classification_report(ytest,pred))
print(confusion_matrix(ytest,pred))

              precision    recall  f1-score   support

           0       0.93      0.94      0.94      1025
           1       0.94      0.92      0.93       975

    accuracy                           0.93      2000
   macro avg       0.93      0.93      0.93      2000
weighted avg       0.93      0.93      0.93      2000

[[968  57]
 [ 74 901]]


In [61]:
export_graphviz(decision_tree=tree,out_file='img/tree.dot')
with open('img/tree.dot','r') as file:
    dot = file.read()
#Source(dot)

In [62]:
from sklearn.base import ClassifierMixin
from statistics   import mode
class RandomFlorest(ClassifierMixin):
    treinamentos = []
    def __init__(self,Ninstance=1000,splits=10,CVtree=DecisionTreeClassifier()) -> None:
        self.Ninstance = splits
        self.splits    = Ninstance
        self.Tree      = CVtree

    def shuffle(self,X,y):
        pass

    def fit(self,X,y):
        self.treinamentos = [self.Tree for i in range(self.splits)] 
        for enum,(train,test) in enumerate(ShuffleSplit(n_splits=self.splits,train_size=self.Ninstance).split(X,y)):
            train = np.concatenate((train,test),axis=0)
            xtrainf =  X[train,:]
            ytrainf =  y[train]
            self.treinamentos[enum].fit(xtrainf,ytrainf)

    def predict(self,X):
        dados = []
        if np.size(X,axis=0) > 1:
            for test in X:
                moda = []
                for trees in self.treinamentos:
                    moda.append(trees.predict(np.array([test]))[0])
                dados.append(mode(moda))
            return dados
        
        
random = RandomFlorest(10,3,tree)
random.fit(xtrain,ytrain)

pred = random.predict(xtest)

In [63]:
accuracy_score(ytest,pred)

0.9335

In [64]:
print(classification_report(ytest,pred))
print(confusion_matrix(ytest,pred))

              precision    recall  f1-score   support

           0       0.93      0.94      0.94      1025
           1       0.93      0.93      0.93       975

    accuracy                           0.93      2000
   macro avg       0.93      0.93      0.93      2000
weighted avg       0.93      0.93      0.93      2000

[[959  66]
 [ 67 908]]


In [65]:
lista_original = [1, 2, 4, 5]
import random
# Função para criar subconjuntos aleatórios
def criar_subconjuntos_aleatorios(lista, num_subconjuntos, tamanho_subconjunto):
    print(lista)
    subconjuntos = []
    for _ in range(num_subconjuntos):
        # Embaralha a lista original
        random.shuffle(lista)
        print(lista)
        # Seleciona elementos para criar o subconjunto
    
        subconjunto = lista[:tamanho_subconjunto]
        subconjuntos.append(subconjunto)
    return subconjuntos

# Definindo parâmetros
num_subconjuntos = 2
tamanho_subconjunto = 2

# Criando os subconjuntos aleatórios
subconjuntos_aleatorios = criar_subconjuntos_aleatorios(lista_original, num_subconjuntos, tamanho_subconjunto)

# Exibindo os subconjuntos gerados
for i, subconjunto in enumerate(subconjuntos_aleatorios, start=1):
    print(f"Subconjunto {i}: {subconjunto}")

[1, 2, 4, 5]
[5, 4, 1, 2]
[4, 2, 5, 1]
Subconjunto 1: [5, 4]
Subconjunto 2: [4, 2]


### Adaboost

In [66]:
class AdaBoost(ClassifierMixin):
    models     = []
    estimators = []
    def __init__(self,estimator = DecisionTreeRegressor() , n_estimators = 3) -> None:
        self.estimator    = estimator
        self.n_estimators = n_estimators
    
    def fit(self,X,y):
        self.models = [self.estimator for i in range(self.estimator)]
        for model in self.models:
            model.fit(X,y)
            yn = model.predict(X)
            self.estimators.append(model)
            y  = y - yn

### Gradiente boost

In [77]:
class GradBoost(ClassifierMixin):
    models     = []
    estimators = []
    def __init__(self,estimator = DecisionTreeRegressor() , n_estimators = 3) -> None:
        self.estimator    = estimator
        self.n_estimators = n_estimators
    
    def fit(self,X,y):
        self.models = [self.estimator for i in range(self.n_estimators)]
        for model in self.models:
            model.fit(X,y)
            yn = model.predict(X)
            self.estimators.append(model)
            y  = y - yn

    def predict(self,X):
        return sum(model.predict(X) for model in self.estimators)


In [78]:
data = make_regression(n_features=3,n_samples=500)
X = data[0]
Y = data[1]

xtrain ,xtest ,ytrain ,ytest = train_test_split(X,Y,test_size=0.2,random_state=42)

In [79]:
grad = GradBoost()
grad.fit(xtrain,ytrain)
pred = grad.predict(xtest)

In [82]:
np.sqrt(mean_squared_error(ytest,pred)),mean_absolute_error(ytest,pred)

(73.1592748440089, 57.816782932024445)