In [12]:
import numpy as np
from sklearn.tree             import DecisionTreeClassifier,export_graphviz
from sklearn.model_selection  import train_test_split,GridSearchCV,ShuffleSplit
from sklearn.datasets         import make_classification
from graphviz                 import Source
from sklearn.metrics          import *

In [13]:
data = make_classification(n_samples=10_000,n_classes=2,n_features=10)

In [14]:
X = data[0]
y = data[1]

In [15]:
xtrain ,xtest ,ytrain ,ytest = train_test_split(X,y,test_size=0.2,random_state=42)

In [16]:
tree = DecisionTreeClassifier()
tree.fit(xtrain,ytrain)

In [17]:
tree.get_depth(),tree.tree_.n_leaves,tree.tree_.node_count

(28, 388, 775)

In [18]:
pred = tree.predict(xtest)

print(classification_report(ytest,pred))
print(confusion_matrix(ytest,pred))

              precision    recall  f1-score   support

           0       0.91      0.95      0.93       970
           1       0.95      0.91      0.93      1030

    accuracy                           0.93      2000
   macro avg       0.93      0.93      0.93      2000
weighted avg       0.93      0.93      0.93      2000

[[921  49]
 [ 95 935]]


In [19]:
params = dict(
    max_depth = np.arange(3,20),
    min_samples_split = [100,150,200],
)
gridTree = GridSearchCV(estimator=DecisionTreeClassifier(max_features=5),param_grid=params,scoring='accuracy')
gridTree.fit(xtrain,ytrain)

print(gridTree.best_estimator_)
print(gridTree.best_score_)

DecisionTreeClassifier(max_depth=7, max_features=5, min_samples_split=100)
0.95625


In [20]:
tree = DecisionTreeClassifier(max_depth=17, max_features=5, min_samples_split=100)
tree.fit(xtrain,ytrain)
pred = tree.predict(xtest)

print(classification_report(ytest,pred))
print(confusion_matrix(ytest,pred))

              precision    recall  f1-score   support

           0       0.97      0.94      0.95       970
           1       0.94      0.97      0.96      1030

    accuracy                           0.96      2000
   macro avg       0.96      0.96      0.96      2000
weighted avg       0.96      0.96      0.96      2000

[[ 911   59]
 [  29 1001]]


In [21]:
export_graphviz(decision_tree=tree,out_file='img/tree.dot')
with open('img/tree.dot','r') as file:
    dot = file.read()
#Source(dot)

In [25]:
from sklearn.base import ClassifierMixin
from statistics   import mode
class RandomFlorest(ClassifierMixin):
    treinamentos = []
    def __init__(self,Ninstance=1000,splits=10,CVtree=DecisionTreeClassifier()) -> None:
        self.Ninstance = splits
        self.splits    = Ninstance
        self.Tree      = CVtree

    def shuffle(self,X,y):
        pass

    def fit(self,X,y):
        self.treinamentos = [self.Tree for i in range(self.splits)] 
        for enum,(train,test) in enumerate(ShuffleSplit(n_splits=self.splits,train_size=self.Ninstance).split(X,y)):
            train = np.concatenate((train,test),axis=0)
            xtrainf =  X[train,:]
            ytrainf =  y[train]
            self.treinamentos[enum].fit(xtrainf,ytrainf)

    def predict(self,X):
        dados = []
        if np.size(X,axis=0) > 1:
            for test in X:
                moda = []
                for trees in self.treinamentos:
                    moda.append(trees.predict(np.array([test]))[0])
                dados.append(mode(moda))
            return dados
        


        
random = RandomFlorest(1000,10,tree)
random.fit(xtrain,ytrain)

pred = random.predict(xtest)

[4070 5944 5951 ... 6142 6885 6222]
[4960  964 2676 ... 6272 2049 4298]
[5720 1724 2640 ... 3834 3717  286]
[2273 3595 4666 ... 1435 6993 6408]
[ 283 7624 3533 ... 4040 7568 2412]
[7739 5400  292 ... 4460  932 2335]
[7433 7527 4260 ... 3435 2933 7821]
[6177 6808 6322 ...  731  561 3420]
[4178  718 7167 ... 7422  496 3984]
[6132 2035 1437 ... 2629 1964 7197]
[4939  132 4460 ... 7264 5389 2302]
[2914 2735  981 ...  490 7135  424]
[4268 6620 7479 ...  461 2451 5650]
[ 200 3515 2123 ... 3695 5436   16]
[7606   62 3971 ... 6756 3414 4048]
[5477 3380  926 ...  522  169 4111]
[5408 5058 3143 ... 2147 5825 3405]
[5847 5074 5461 ... 2039 4923 3405]
[ 734 5313 4468 ... 4684 6353 1533]
[7991 7445  877 ... 3683 5331 1800]
[3536 7260 5991 ... 2258 2480 4888]
[1348 2900 2662 ... 2594 2967  475]
[3591 2084  743 ... 1775 7401 5971]
[ 672 1426 5520 ... 5120 1199  902]
[5725 7799 7589 ... 3750 6067  868]
[3557  536  785 ... 5154  686 4716]
[ 806 2123 3584 ... 4216 5113 2387]
[6485 5848 7794 ... 1787 558

In [26]:
accuracy_score(ytest,pred)

0.9605

In [28]:
print(classification_report(ytest,pred))
print(confusion_matrix(ytest,pred))

              precision    recall  f1-score   support

           0       0.96      0.96      0.96       970
           1       0.96      0.96      0.96      1030

    accuracy                           0.96      2000
   macro avg       0.96      0.96      0.96      2000
weighted avg       0.96      0.96      0.96      2000

[[929  41]
 [ 38 992]]


In [33]:
lista_original = [1, 2, 4, 5]
import random
# Função para criar subconjuntos aleatórios
def criar_subconjuntos_aleatorios(lista, num_subconjuntos, tamanho_subconjunto):
    print(lista)
    subconjuntos = []
    for _ in range(num_subconjuntos):
        # Embaralha a lista original
        random.shuffle(lista)
        print(lista)
        # Seleciona elementos para criar o subconjunto
    
        subconjunto = lista[:tamanho_subconjunto]
        subconjuntos.append(subconjunto)
    return subconjuntos

# Definindo parâmetros
num_subconjuntos = 2
tamanho_subconjunto = 2

# Criando os subconjuntos aleatórios
subconjuntos_aleatorios = criar_subconjuntos_aleatorios(lista_original, num_subconjuntos, tamanho_subconjunto)

# Exibindo os subconjuntos gerados
for i, subconjunto in enumerate(subconjuntos_aleatorios, start=1):
    print(f"Subconjunto {i}: {subconjunto}")

[1, 2, 4, 5]
[4, 5, 1, 2]
[4, 1, 5, 2]
Subconjunto 1: [4, 5]
Subconjunto 2: [4, 1]


In [34]:
import random

# Sua lista original
minha_lista = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

# Embaralhar a lista original (opcional)
random.shuffle(minha_lista)

# Tamanho do primeiro subconjunto
tamanho_subconjunto_1 = 5  # Altere o tamanho conforme necessário

# Selecionar aleatoriamente os elementos para o primeiro subconjunto
subconjunto_1 = random.sample(minha_lista, tamanho_subconjunto_1)

# Remover os elementos selecionados para o primeiro subconjunto da lista original
for elemento in subconjunto_1:
    minha_lista.remove(elemento)

# O restante da lista será o segundo subconjunto
subconjunto_2 = minha_lista

# Exibindo os subconjuntos
print("Subconjunto 1:", subconjunto_1)
print("Subconjunto 2:", subconjunto_2)


Subconjunto 1: [3, 4, 2, 5, 9]
Subconjunto 2: [8, 6, 10, 1, 7]
