In [1]:
# Imports
import numpy as np
from nltk.corpus import stopwords
from sklearn.datasets import load_files
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier, StackingClassifier
from sklearn.metrics import accuracy_score

In [2]:
# Carregando os dados
noticias = load_files('dados', encoding = 'utf-8', decode_error = 'replace')

In [3]:
# Separando variáveis de entrada e saída
X = noticias.data
y = noticias.target

In [4]:
# Lista para os resultados
d1 = []

In [5]:
# Loop
for x in range(1,100):

	# Divisão treino/teste
    X_treino, X_teste, y_treino, y_teste = train_test_split(X, y, test_size = 0.30, random_state = x)

    # Stop words
    my_stop_words = set(stopwords.words('english'))

    # Vetorização
    vectorizer = TfidfVectorizer(norm = None, stop_words = my_stop_words, max_features = 1000, decode_error = "ignore")

    # Aplica a vetorização
    X_treino_vectors = vectorizer.fit_transform(X_treino)
    X_teste_vectors = vectorizer.transform(X_teste)

    # Modelos base
    modelos_base = [('rf', RandomForestClassifier(n_estimators = 100, random_state = 42)), ('nb', MultinomialNB())]

    # Modelo Stacking
    stacking_model = StackingClassifier(estimators = modelos_base, final_estimator = LogisticRegression(multi_class = 'multinomial', random_state = 30, max_iter = 1000))
    
    # Acurácia
    acuracia = stacking_model.fit(X_treino_vectors.todense(), y_treino).score(X_teste_vectors.todense(), y_teste)
    
    # Resultado
    d1.append((x,acuracia))
    
    print('-Random State:', x, '-Acurácia :', acuracia)

print('\nMelhores Resultados')
mx = max(d1, key = lambda x:x[1])
print('-Random State:', mx[0], '-Acurácia :', mx[1])
print('\n')




-Random State: 1 -Acurácia : 0.9655688622754491




-Random State: 2 -Acurácia : 0.9700598802395209




-Random State: 3 -Acurácia : 0.9700598802395209




-Random State: 4 -Acurácia : 0.9625748502994012




-Random State: 5 -Acurácia : 0.9640718562874252




-Random State: 6 -Acurácia : 0.9505988023952096




-Random State: 7 -Acurácia : 0.9580838323353293




-Random State: 8 -Acurácia : 0.9730538922155688




-Random State: 9 -Acurácia : 0.9730538922155688




-Random State: 10 -Acurácia : 0.9625748502994012




-Random State: 11 -Acurácia : 0.9640718562874252




-Random State: 12 -Acurácia : 0.9715568862275449




-Random State: 13 -Acurácia : 0.9595808383233533




-Random State: 14 -Acurácia : 0.9670658682634731




-Random State: 15 -Acurácia : 0.9670658682634731




-Random State: 16 -Acurácia : 0.9655688622754491




-Random State: 17 -Acurácia : 0.9550898203592815




-Random State: 18 -Acurácia : 0.9730538922155688




-Random State: 19 -Acurácia : 0.9595808383233533




-Random State: 20 -Acurácia : 0.9655688622754491




-Random State: 21 -Acurácia : 0.9745508982035929




-Random State: 22 -Acurácia : 0.9700598802395209




-Random State: 23 -Acurácia : 0.9595808383233533




-Random State: 24 -Acurácia : 0.9595808383233533




-Random State: 25 -Acurácia : 0.9670658682634731




-Random State: 26 -Acurácia : 0.9700598802395209




-Random State: 27 -Acurácia : 0.9700598802395209




-Random State: 28 -Acurácia : 0.9670658682634731




-Random State: 29 -Acurácia : 0.9655688622754491




-Random State: 30 -Acurácia : 0.9790419161676647




-Random State: 31 -Acurácia : 0.9610778443113772




-Random State: 32 -Acurácia : 0.9655688622754491




-Random State: 33 -Acurácia : 0.968562874251497




-Random State: 34 -Acurácia : 0.9670658682634731




-Random State: 35 -Acurácia : 0.9580838323353293




-Random State: 36 -Acurácia : 0.9760479041916168




-Random State: 37 -Acurácia : 0.9655688622754491




-Random State: 38 -Acurácia : 0.9715568862275449




-Random State: 39 -Acurácia : 0.968562874251497




-Random State: 40 -Acurácia : 0.9580838323353293




-Random State: 41 -Acurácia : 0.9700598802395209




-Random State: 42 -Acurácia : 0.968562874251497




-Random State: 43 -Acurácia : 0.9730538922155688




-Random State: 44 -Acurácia : 0.9745508982035929




-Random State: 45 -Acurácia : 0.9730538922155688




-Random State: 46 -Acurácia : 0.9670658682634731




-Random State: 47 -Acurácia : 0.9730538922155688




-Random State: 48 -Acurácia : 0.9670658682634731




-Random State: 49 -Acurácia : 0.9610778443113772




-Random State: 50 -Acurácia : 0.9790419161676647




-Random State: 51 -Acurácia : 0.9670658682634731




-Random State: 52 -Acurácia : 0.9655688622754491




-Random State: 53 -Acurácia : 0.9640718562874252




-Random State: 54 -Acurácia : 0.9715568862275449




-Random State: 55 -Acurácia : 0.968562874251497




-Random State: 56 -Acurácia : 0.9640718562874252




-Random State: 57 -Acurácia : 0.9595808383233533




-Random State: 58 -Acurácia : 0.9670658682634731




-Random State: 59 -Acurácia : 0.9670658682634731




-Random State: 60 -Acurácia : 0.9625748502994012




-Random State: 61 -Acurácia : 0.9745508982035929




-Random State: 62 -Acurácia : 0.9535928143712575




-Random State: 63 -Acurácia : 0.9610778443113772




-Random State: 64 -Acurácia : 0.9730538922155688




-Random State: 65 -Acurácia : 0.9520958083832335




-Random State: 66 -Acurácia : 0.9655688622754491




-Random State: 67 -Acurácia : 0.9640718562874252




-Random State: 68 -Acurácia : 0.9625748502994012




-Random State: 69 -Acurácia : 0.9700598802395209




-Random State: 70 -Acurácia : 0.9655688622754491




-Random State: 71 -Acurácia : 0.968562874251497




-Random State: 72 -Acurácia : 0.9580838323353293




-Random State: 73 -Acurácia : 0.9565868263473054




-Random State: 74 -Acurácia : 0.9775449101796407




-Random State: 75 -Acurácia : 0.9565868263473054




-Random State: 76 -Acurácia : 0.9640718562874252




-Random State: 77 -Acurácia : 0.9610778443113772




-Random State: 78 -Acurácia : 0.9595808383233533




-Random State: 79 -Acurácia : 0.9715568862275449




-Random State: 80 -Acurácia : 0.9610778443113772




-Random State: 81 -Acurácia : 0.9700598802395209




-Random State: 82 -Acurácia : 0.9640718562874252




-Random State: 83 -Acurácia : 0.9700598802395209




-Random State: 84 -Acurácia : 0.9700598802395209




-Random State: 85 -Acurácia : 0.9565868263473054




-Random State: 86 -Acurácia : 0.9670658682634731




-Random State: 87 -Acurácia : 0.9730538922155688




-Random State: 88 -Acurácia : 0.9670658682634731




-Random State: 89 -Acurácia : 0.9655688622754491




-Random State: 90 -Acurácia : 0.9700598802395209




-Random State: 91 -Acurácia : 0.9760479041916168




-Random State: 92 -Acurácia : 0.968562874251497




-Random State: 93 -Acurácia : 0.9820359281437125




-Random State: 94 -Acurácia : 0.9760479041916168




-Random State: 95 -Acurácia : 0.9640718562874252




-Random State: 96 -Acurácia : 0.9580838323353293




-Random State: 97 -Acurácia : 0.9655688622754491




-Random State: 98 -Acurácia : 0.9640718562874252




-Random State: 99 -Acurácia : 0.9625748502994012

Melhores Resultados
-Random State: 93 -Acurácia : 0.9820359281437125




