# 0. Configurações para usar o Google Colab

In [None]:
!pip install qiskit-machine-learning pylatexenc nltk kagglehub

# 1. Carregar o dataset e explorar a estrutura do dataframe

## 1.1 Caso esteja usando Google Colab

In [None]:
import kagglehub
import pandas as pd
import os

# download do dataset
path = kagglehub.dataset_download("saurabhshahane/fake-news-classification")
print("Path to dataset files:", path)

# carregar o dataset
dataset_path = os.path.join(path, "WELFake_Dataset.csv") 

# carregar o dataframe, embaralhar os dados, resetar os index e remover coluna "Unnamed" inútil
dataframe = (
    pd.read_csv(dataset_path)
    .sample(frac=1)
    .reset_index(drop=True)
    .drop("Unnamed: 0", axis="columns")
)

## 1.2 Caso esteja usando sua máquina local

In [None]:
import pandas as pd

# carregar o dataframe, embaralhar os dados, resetar os index e remover coluna "Unnamed" inútil
dataframe = (
    pd.read_csv('../dataset/WELFake_Dataset.csv')
    .sample(frac=1)
    .reset_index(drop=True)
    .drop("Unnamed: 0", axis="columns")
)

## 1.3 Vizualizar os dados

In [None]:
# mostra as primeiras 5 linhas do df
print("Primeiras 5 linhas do dataframe:")
print(dataframe.head())

# mostra a quantidade de linhas e colunas
print(f"\nQuantidade de linhas x colunas: {dataframe.shape}")

# 2. Pré processamento

## 2.1 Remover rows com campos nulos

In [None]:
# mostra informações do dataframe
print("Informações sobre os tipos de dados e quantidades de nulos:")
dataframe.info()

# apaga as linhas que tem "title" ou "text" nulos, 
dataframe.dropna(subset=["title", "text"], inplace=True) # implace ao invés de retornar um novo dataframe ele altera o original direto

print("\nApós apagar os nulos:")
dataframe.info()

## 2.2 Verificar a distribuição entre notícias reais e falsas

In [None]:
print("\nContagem de labels após remover nulos  (1 = real, 0 = fake):")
print(dataframe['label'].value_counts())

## 2.3 Criar dataframes com tamanho reduzido para clássicos e quânticos

In [None]:
# criar dataframes para os modelos clássicos
df_classic_0k = dataframe[:100].copy()
df_classic_1k = dataframe[:1000].copy()
df_classic_10k = dataframe[:10000].copy()
df_classic_30k = dataframe[:30000].copy()
df_classic_50k = dataframe[:50000].copy()
df_classic = dataframe.copy()

# criar dataframes para os modelos quânticos
df_quantum_0k = df_classic_0k.copy()
df_quantum_1k = df_classic_1k.copy()
df_quantum_10k = df_classic_10k.copy()
df_quantum_30k = df_classic_30k.copy()
df_quantum_50k = df_classic_50k.copy()
df_quantum = df_classic.copy()

In [None]:
# verificar distribuição entre as labels
print(f"Classic 0k - Total samples: {len(df_classic_0k)}")
print(df_classic_0k['label'].value_counts())
print()

print(f"Classic 1k - Total samples: {len(df_classic_1k)}")
print(df_classic_1k['label'].value_counts())
print()

print(f"Classic 10k - Total samples: {len(df_classic_10k)}")
print(df_classic_10k['label'].value_counts())
print()

print(f"Classic 30k - Total samples: {len(df_classic_30k)}")
print(df_classic_30k['label'].value_counts())
print()

print(f"Classic 50k - Total samples: {len(df_classic_50k)}")
print(df_classic_50k['label'].value_counts())
print()

print(f"Classic Full - Total samples: {len(df_classic)}")
print(df_classic['label'].value_counts())
print()

## 2.4 Aplicar pré-processamento para todos os dataframes

In [None]:
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
import string
import nltk
import re

nltk.download("punkt") # remove as pontuações
nltk.download('punkt_tab') 
nltk.download("stopwords") # remove as stop words 
nltk.download('wordnet')

stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

def preprocess_text(text):
    # letras minúsculas
    text = text.lower()

    # Remove URLs
    text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE)
    
    # Remove menções e hashtags (comum em dados de redes sociais)
    text = re.sub(r'@\w+|#\w+', '', text)

    # remove a pontuação e os digitos
    text = text.translate(str.maketrans('', '', string.punctuation + string.digits))

    # tokenizar o texto
    words = word_tokenize(text)

    # remover as stop words (palavras irrelevantes como artigos e pronomes)
    words = [word for word in words if word not in stop_words]

    # Aplicar Stemming (remover o sufixo das palavaras exemplo "mudando" "mudaria" para "mud") 
    # ou lemmatizing (mais sofisticado reduz a palavra para a forma base "mudaria" ou "mudado" viraria mudar)
    
    # aplicar lemmatizer nas palavras
    words = [lemmatizer.lemmatize(word) for word in words]
   
    # junta as palavras de volta numa string
    text = ' '.join(words)

    return text

In [None]:
# Aplicar pré-processamento nos dataframes clássicos
df_classic_0k['text_clean'] = df_classic_0k['text'].apply(preprocess_text)
df_classic_1k['text_clean'] = df_classic_1k['text'].apply(preprocess_text)
df_classic_10k['text_clean'] = df_classic_10k['text'].apply(preprocess_text)
df_classic_30k['text_clean'] = df_classic_30k['text'].apply(preprocess_text)
df_classic_50k['text_clean'] = df_classic_50k['text'].apply(preprocess_text)
df_classic['text_clean'] = df_classic['text'].apply(preprocess_text)

# Aplicar pré-processamento nos dataframes quânticos
df_quantum_0k['text_clean'] = df_quantum_0k['text'].apply(preprocess_text)
df_quantum_1k['text_clean'] = df_quantum_1k['text'].apply(preprocess_text)
df_quantum_10k['text_clean'] = df_quantum_10k['text'].apply(preprocess_text)
df_quantum_30k['text_clean'] = df_quantum_30k['text'].apply(preprocess_text)
df_quantum_50k['text_clean'] = df_quantum_50k['text'].apply(preprocess_text)
df_quantum['text_clean'] = df_quantum['text'].apply(preprocess_text)

# 3. Treinamento dos modelos

## 3.1 Preparação dos dados para o treinamento dos modelos

### 3.1.1 Prepara dados para modelos clássicos usando TF-IDF

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.feature_selection import SelectFromModel
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA

def prepare_classical_data(df):
    y = df['label'].values
    
    # Cria e aplica TF-IDF
    vectorizer = TfidfVectorizer(max_features=5000)
    X = vectorizer.fit_transform(df['text_clean'])
    
    # Split treino/teste
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )
    
    return X_train, X_test, y_train, y_test, vectorizer

In [None]:
# Preparar dados para modelos clássicos
X_train_classic_0k, X_test_classic_0k, y_train_classic_0k, y_test_classic_0k, vectorizer_classic_0k = prepare_classical_data(df_classic_0k)

X_train_classic_1k, X_test_classic_1k, y_train_classic_1k, y_test_classic_1k, vectorizer_classic_1k = prepare_classical_data(df_classic_1k)

X_train_classic_10k, X_test_classic_10k, y_train_classic_10k, y_test_classic_10k, vectorizer_classic_10k = prepare_classical_data(df_classic_10k)

X_train_classic_30k, X_test_classic_30k, y_train_classic_30k, y_test_classic_30k, vectorizer_classic_30k = prepare_classical_data(df_classic_30k)

X_train_classic_50k, X_test_classic_50k, y_train_classic_50k, y_test_classic_50k, vectorizer_classic_50k = prepare_classical_data(df_classic_50k)

X_train_classic, X_test_classic, y_train_classic, y_test_classic, vectorizer_classic = prepare_classical_data(df_classic)

### 3.1.2 Prepara dados para modelos quânticos usando TF-IDF + Lasso Selector + Scaling 

In [None]:
def prepare_quantum_data(df, max_features=12, n_components=8):
    y = df['label'].values
    
    # Cria e aplica TF-IDF
    vectorizer = TfidfVectorizer(max_features=5000)
    X = vectorizer.fit_transform(df['text_clean'])
    
    # Seleção de features com Lasso
    lasso_selector = SelectFromModel(
        LogisticRegression(random_state=42, penalty='l1', solver='liblinear'),
        max_features=max_features
    )
    X = lasso_selector.fit_transform(X, y)
    
    # Escalonamento dos dados
    scaler = StandardScaler(with_mean=False)
    X_scaled = scaler.fit_transform(X)

    n_components_or_n_features = min(n_components, X_scaled.shape[1])

    # Aplica o PCA
    pca = PCA(n_components=n_components_or_n_features)
    X_pca = pca.fit_transform(X_scaled.toarray()) 
    
    print(f"PCA: {X_scaled.shape[1]} features → {X_pca.shape[1]} componentes")
    print(f"Variância explicada: {sum(pca.explained_variance_ratio_):.2%}")
    
    # Split treino/teste 
    X_train, X_test, y_train, y_test = train_test_split(
        X_pca, y, test_size=0.2, random_state=42  
    )
    
    return X_train, X_test, y_train, y_test, vectorizer, lasso_selector, scaler, pca  

In [None]:
# Preparar dados para modelos quânticos
X_train_quantum_0k, X_test_quantum_0k, y_train_quantum_0k, y_test_quantum_0k, vectorizer_quantum_0k, lasso_selector_quantum_0k, scaler_quantum_0k, pca_quantum_0k = prepare_quantum_data(df_quantum_0k)

X_train_quantum_1k, X_test_quantum_1k, y_train_quantum_1k, y_test_quantum_1k, vectorizer_quantum_1k, lasso_selector_quantum_1k, scaler_quantum_1k, pca_quantum_1k = prepare_quantum_data(df_quantum_1k)

X_train_quantum_10k, X_test_quantum_10k, y_train_quantum_10k, y_test_quantum_10k, vectorizer_quantum_10k, lasso_selector_quantum_10k, scaler_quantum_10k, pca_quantum_10k = prepare_quantum_data(df_quantum_10k)

X_train_quantum_30k, X_test_quantum_30k, y_train_quantum_30k, y_test_quantum_30k, vectorizer_quantum_30k, lasso_selector_quantum_30k, scaler_quantum_30k, pca_quantum_30k = prepare_quantum_data(df_quantum_30k)

X_train_quantum_50k, X_test_quantum_50k, y_train_quantum_50k, y_test_quantum_50k, vectorizer_quantum_50k, lasso_selector_quantum_50k, scaler_quantum_50k, pca_quantum_50k = prepare_quantum_data(df_quantum_50k)

X_train_quantum, X_test_quantum, y_train_quantum, y_test_quantum, vectorizer_quantum, lasso_selector_quantum, scaler_quantum, pca_quantum = prepare_quantum_data(df_quantum)

## 3.2 Treinamento dos modelos clássicos

### 3.2.1 Treinamento com Regressão Logística

In [None]:
def train_logistic_regression(X_train, y_train):
    from sklearn.linear_model import LogisticRegression
    import time
    
    model = LogisticRegression(random_state=42, max_iter=1000)
    
    start = time.time()
    model.fit(X_train, y_train)
    elapsed = time.time() - start
    
    print(f"Training time: {round(elapsed)} seconds")
    return model

### 3.2.2 Treinamento com Floresta Aleatória

In [None]:
def train_random_forest(X_train, y_train):
    from sklearn.ensemble import RandomForestClassifier
    import time
    
    model = RandomForestClassifier(n_estimators=100)
    
    start = time.time()
    model.fit(X_train, y_train)
    elapsed = time.time() - start
    
    print(f"Training time: {round(elapsed)} seconds")
    return model

### 3.2.3 Treinamento com Support Vector Classifier (SVC)

In [None]:
def train_svm(X_train, y_train):
    from sklearn.svm import SVC
    import time
    
    model = SVC(kernel='linear', probability=True)
    
    start = time.time()
    model.fit(X_train, y_train)
    elapsed = time.time() - start
    
    print(f"Training time: {round(elapsed)} seconds")
    return model

### 3.2.4 Treinamento com Naive Bayes

In [None]:
def train_naive_bayes(X_train, y_train):
    from sklearn.naive_bayes import MultinomialNB
    import time

    model = MultinomialNB()

    start = time.time()
    model.fit(X_train, y_train)
    elapsed = time.time() - start
    
    print(f"Training time: {round(elapsed)} seconds")
    return model

### 3.2.5 Treinamento com K-Nearest Neighbors (KNN)

In [None]:
def train_knn(X_train, y_train, n_neighbors=5):
    from sklearn.neighbors import KNeighborsClassifier
    import time
    
    model = KNeighborsClassifier(n_neighbors=n_neighbors)
    
    start = time.time()
    model.fit(X_train, y_train)
    elapsed = time.time() - start
    
    print(f"Training time: {round(elapsed)} seconds")
    return model

## 3.3 Treinamento dos modelos quânticos

### 3.3.1 Treinamento com Variational Quantum Classifier (VQC)

In [None]:
def train_vqc(X_train, y_train, num_features):
    from qiskit_machine_learning.algorithms.classifiers import VQC
    from qiskit.circuit.library import ZZFeatureMap, RealAmplitudes
    from qiskit_machine_learning.optimizers import COBYLA
    import time
    
    feature_map = ZZFeatureMap(feature_dimension=num_features, reps=1)

    # define o ansatz RealAmplitudes
    ansatz = RealAmplitudes(num_qubits=num_features, reps=2)

    # define o otimizador COBYLA 
    optimizer = COBYLA(maxiter=50)

    # inicializa o classificador VQC
    model = VQC(
        feature_map=feature_map,
        ansatz=ansatz,
        optimizer=optimizer,
    )
    
    start = time.time()
    model.fit(X_train, y_train)
    elapsed = time.time() - start
    
    print(f"Training time: {round(elapsed)} seconds")
    return model

In [None]:
from qiskit import QuantumCircuit
from qiskit.circuit import Parameter
from qiskit.quantum_info import SparsePauliOp
from qiskit_machine_learning.neural_networks import EstimatorQNN
from qiskit.primitives import Estimator
from qiskit_algorithms.optimizers import COBYLA
from qiskit_machine_learning.algorithms.classifiers import NeuralNetworkClassifier
import time

def train_qnn(X_train, y_train, num_features):   
    # Criar circuito quântico 
    qc = QuantumCircuit(num_features)
    
    # Criar parâmetros para inputs e weights
    input_params = [Parameter(f'input_{i}') for i in range(num_features)]
    weight_params = [Parameter(f'weight_{i}') for i in range(num_features)]
    
    # Adicionar operações ao circuito 
    for i in range(num_features):
        qc.ry(input_params[i], i)  # Rotações Y com parâmetros de input
    
    # Adicionar camada parametrizada (weights)
    for i in range(num_features):
        qc.rz(weight_params[i], i)  # Rotações Z com parâmetros de weight
    
    # Adicionar algum entrelaçamento
    for i in range(num_features - 1):
        qc.cx(i, i + 1)
    
    # Definir observável (Z no primeiro qubit)
    observable = SparsePauliOp("Z" + "I" * (num_features - 1))
    
    # Criar a QNN usando EstimatorQNN 
    estimator = Estimator()
    qnn = EstimatorQNN(
        circuit=qc,
        observables=observable,
        input_params=input_params,
        weight_params=weight_params,
        estimator=estimator
    )
    
    # Criar classificador com a QNN
    model = NeuralNetworkClassifier(
        neural_network=qnn,
        optimizer=COBYLA(maxiter=50),
        initial_point=[0.0] * len(weight_params)  # Ponto inicial para os weights
    )
    
    start = time.time()
    model.fit(X_train, y_train)
    elapsed = time.time() - start
    
    print(f"Training time: {round(elapsed)} seconds")
    return model

# 4. Avaliação dos modelos

In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

def plot_confusion_matrix(y_test, y_pred, model_name, dataset_size):
    cm = confusion_matrix(y_test, y_pred)
    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['Fake', 'Real'])
    disp.plot(cmap='Blues')
    plt.title(f'Matriz de Confusão - {model_name} ({dataset_size})')
    plt.show()
    
    # print dos valores
    print(f"Verdadeiros Negativos (Fake): {cm[0][0]}")
    print(f"Falsos Positivos: {cm[0][1]}")
    print(f"Falsos Negativos: {cm[1][0]}")
    print(f"Verdadeiros Positivos (Real): {cm[1][1]}")
    print()

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import time

def evaluate_model(y_test, y_pred, model_name="Modelo", dataset_size="Dataset"):
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    print('Accuracy:', accuracy)
    print('Precision:', precision)
    print('Recall:', recall)
    print('F1 Score:', f1, "\n")
    plot_confusion_matrix(y_test, y_pred, model_name, dataset_size)

In [None]:
import time

def train_and_evaluate_model(train_function, X_train, X_test, y_train, y_test, model_name, dataset_size):
    # treina o modelo
    model = train_function(X_train, y_train)
    
    # mede o tempo de inferência
    start = time.time()
    y_pred = model.predict(X_test)
    elapsed = time.time() - start
    print(f"Inference time: {elapsed:.4f} seconds")
    
    # avalia o modelo
    evaluate_model(y_test, y_pred, model_name, dataset_size)
    
    return model, y_pred

In [None]:
import time

def train_and_evaluate_vqc(X_train, X_test, y_train, y_test, model_name, dataset_size, num_features):
    # treina o modelo
    model = train_vqc(X_train, y_train, num_features)
    
    # mede o tempo de inferência
    start = time.time()
    y_pred = model.predict(X_test)
    elapsed = time.time() - start
    print(f"Inference time: {elapsed:.4f} seconds")
    
    # avalia o modelo
    evaluate_model(y_test, y_pred, model_name, dataset_size)
    
    return model, y_pred

In [None]:
import time

def train_and_evaluate_knn(X_train, X_test, y_train, y_test, model_name, dataset_size, n_neighbors=5):
    # treina o modelo
    model = train_knn(X_train, y_train, n_neighbors)
    
    # mede o tempo de inferência
    start = time.time()
    y_pred = model.predict(X_test)
    elapsed = time.time() - start
    print(f"Inference time: {elapsed:.4f} seconds")
    
    # avalia o modelo
    evaluate_model(y_test, y_pred, model_name, dataset_size)
    
    return model, y_pred

In [None]:
import time

def train_and_evaluate_qnn(X_train, X_test, y_train, y_test, model_name, dataset_size, num_features):
    # treina o modelo
    model = train_qnn(X_train, y_train, num_features)
    
    # mede o tempo de inferência
    start = time.time()
    y_pred = model.predict(X_test)
    elapsed = time.time() - start
    print(f"Inference time: {elapsed:.4f} seconds")
    
    # avalia o modelo
    evaluate_model(y_test, y_pred, model_name, dataset_size)
    
    return model, y_pred

## 4.1 Avaliação da Regressão Logística

In [None]:
# Avaliação da Regressão Logística
print("=" * 50)
print("REGRESSÃO LOGÍSTICA")
print("=" * 50)

# Classic 0k
print("\n--- Classic 0k ---")
lcr_0k, y_pred_lcr_0k = train_and_evaluate_model(
    train_logistic_regression, 
    X_train_classic_0k, X_test_classic_0k, 
    y_train_classic_0k, y_test_classic_0k,
    "Regressão Logística", "0k"
)

# Classic 1k
print("\n--- Classic 1k ---")
lcr_1k, y_pred_lcr_1k = train_and_evaluate_model(
    train_logistic_regression, 
    X_train_classic_1k, X_test_classic_1k, 
    y_train_classic_1k, y_test_classic_1k,
    "Regressão Logística", "1k"
)

# Classic 10k
print("\n--- Classic 10k ---")
lcr_10k, y_pred_lcr_10k = train_and_evaluate_model(
    train_logistic_regression, 
    X_train_classic_10k, X_test_classic_10k, 
    y_train_classic_10k, y_test_classic_10k,
    "Regressão Logística", "10k"
)

# Classic 30k
print("\n--- Classic 30k ---")
lcr_30k, y_pred_lcr_30k = train_and_evaluate_model(
    train_logistic_regression, 
    X_train_classic_30k, X_test_classic_30k, 
    y_train_classic_30k, y_test_classic_30k,
    "Regressão Logística", "30k"
)

# Classic 50k
print("\n--- Classic 50k ---")
lcr_50k, y_pred_lcr_50k = train_and_evaluate_model(
    train_logistic_regression, 
    X_train_classic_50k, X_test_classic_50k, 
    y_train_classic_50k, y_test_classic_50k,
    "Regressão Logística", "50k"
)

# Classic Full
print("\n--- Classic Full ---")
lcr_full, y_pred_lcr_full = train_and_evaluate_model(
    train_logistic_regression, 
    X_train_classic, X_test_classic, 
    y_train_classic, y_test_classic,
    "Regressão Logística", "Full"
)

## 4.2 Avaliação da Floresta Aleatória

In [None]:
# Avaliação da Floresta Aleatória
print("=" * 50)
print("FLORESTA ALEATÓRIA")
print("=" * 50)

# Classic 0k
print("\n--- Classic 0k ---")
rfc_0k, y_pred_rfc_0k = train_and_evaluate_model(
    train_random_forest, 
    X_train_classic_0k, X_test_classic_0k, 
    y_train_classic_0k, y_test_classic_0k,
    "Floresta Aleatória", "0k"
)

# Classic 1k
print("\n--- Classic 1k ---")
rfc_1k, y_pred_rfc_1k = train_and_evaluate_model(
    train_random_forest, 
    X_train_classic_1k, X_test_classic_1k, 
    y_train_classic_1k, y_test_classic_1k,
    "Floresta Aleatória", "1k"
)

# Classic 10k
print("\n--- Classic 10k ---")
rfc_10k, y_pred_rfc_10k = train_and_evaluate_model(
    train_random_forest, 
    X_train_classic_10k, X_test_classic_10k, 
    y_train_classic_10k, y_test_classic_10k,
    "Floresta Aleatória", "10k"
)

# Classic 30k
print("\n--- Classic 30k ---")
rfc_30k, y_pred_rfc_30k = train_and_evaluate_model(
    train_random_forest, 
    X_train_classic_30k, X_test_classic_30k, 
    y_train_classic_30k, y_test_classic_30k,
    "Floresta Aleatória", "30k"
)

# Classic 50k
print("\n--- Classic 50k ---")
rfc_50k, y_pred_rfc_50k = train_and_evaluate_model(
    train_random_forest, 
    X_train_classic_50k, X_test_classic_50k, 
    y_train_classic_50k, y_test_classic_50k,
    "Floresta Aleatória", "50k"
)

# Classic Full
print("\n--- Classic Full ---")
rfc_full, y_pred_rfc_full = train_and_evaluate_model(
    train_random_forest, 
    X_train_classic, X_test_classic, 
    y_train_classic, y_test_classic,
    "Floresta Aleatória", "Full"
)

## 4.3 Avaliação do Support Vector Classifier

In [None]:
# Avaliação do Support Vector Classifier
print("=" * 50)
print("SUPPORT VECTOR CLASSIFIER")
print("=" * 50)

# Classic 0k
print("\n--- Classic 0k ---")
svm_0k, y_pred_svm_0k = train_and_evaluate_model(
    train_svm, 
    X_train_classic_0k, X_test_classic_0k, 
    y_train_classic_0k, y_test_classic_0k,
    "Support Vector Classifier", "0k"
)

# Classic 1k
print("\n--- Classic 1k ---")
svm_1k, y_pred_svm_1k = train_and_evaluate_model(
    train_svm, 
    X_train_classic_1k, X_test_classic_1k, 
    y_train_classic_1k, y_test_classic_1k,
    "Support Vector Classifier", "1k"
)

# Classic 10k
print("\n--- Classic 10k ---")
svm_10k, y_pred_svm_10k = train_and_evaluate_model(
    train_svm, 
    X_train_classic_10k, X_test_classic_10k, 
    y_train_classic_10k, y_test_classic_10k,
    "Support Vector Classifier", "10k"
)

# Classic 30k
print("\n--- Classic 30k ---")
svm_30k, y_pred_svm_30k = train_and_evaluate_model(
    train_svm, 
    X_train_classic_30k, X_test_classic_30k, 
    y_train_classic_30k, y_test_classic_30k,
    "Support Vector Classifier", "30k"
)

# Classic 50k
print("\n--- Classic 50k ---")
svm_50k, y_pred_svm_50k = train_and_evaluate_model(
    train_svm, 
    X_train_classic_50k, X_test_classic_50k, 
    y_train_classic_50k, y_test_classic_50k,
    "Support Vector Classifier", "50k"
)

# Classic Full
print("\n--- Classic Full ---")
svm_full, y_pred_svm_full = train_and_evaluate_model(
    train_svm, 
    X_train_classic, X_test_classic, 
    y_train_classic, y_test_classic,
    "Support Vector Classifier", "Full"
)

## 4.4 Avaliação do Naive Bayes

In [None]:
# Avaliação do Naive Bayes
print("=" * 50)
print("NAIVE BAYES")
print("=" * 50)

# Classic 0k
print("\n--- Classic 0k ---")
nb_0k, y_pred_nb_0k = train_and_evaluate_model(
    train_naive_bayes, 
    X_train_classic_0k, X_test_classic_0k, 
    y_train_classic_0k, y_test_classic_0k,
    "Naive Bayes", "0k"
)

# Classic 1k
print("\n--- Classic 1k ---")
nb_1k, y_pred_nb_1k = train_and_evaluate_model(
    train_naive_bayes, 
    X_train_classic_1k, X_test_classic_1k, 
    y_train_classic_1k, y_test_classic_1k,
    "Naive Bayes", "1k"
)

# Classic 10k
print("\n--- Classic 10k ---")
nb_10k, y_pred_nb_10k = train_and_evaluate_model(
    train_naive_bayes, 
    X_train_classic_10k, X_test_classic_10k, 
    y_train_classic_10k, y_test_classic_10k,
    "Naive Bayes", "10k"
)

# Classic 30k
print("\n--- Classic 30k ---")
nb_30k, y_pred_nb_30k = train_and_evaluate_model(
    train_naive_bayes, 
    X_train_classic_30k, X_test_classic_30k, 
    y_train_classic_30k, y_test_classic_30k,
    "Naive Bayes", "30k"
)

# Classic 50k
print("\n--- Classic 50k ---")
nb_50k, y_pred_nb_50k = train_and_evaluate_model(
    train_naive_bayes, 
    X_train_classic_50k, X_test_classic_50k, 
    y_train_classic_50k, y_test_classic_50k,
    "Naive Bayes", "50k"
)

# Classic Full
print("\n--- Classic Full ---")
nb_full, y_pred_nb_full = train_and_evaluate_model(
    train_naive_bayes, 
    X_train_classic, X_test_classic, 
    y_train_classic, y_test_classic,
    "Naive Bayes", "Full"
)

## 4.5 Avaliação do K-Nearest Neighbors

In [None]:
# Avaliação do K-Nearest Neighbors
print("=" * 50)
print("K-NEAREST NEIGHBORS")
print("=" * 50)

# Classic 0k
print("\n--- Classic 0k ---")
knn_0k, y_pred_knn_0k = train_and_evaluate_knn(
    X_train_classic_0k, X_test_classic_0k, 
    y_train_classic_0k, y_test_classic_0k,
    "K-Nearest Neighbors", "0k", n_neighbors=5
)

# Classic 1k
print("\n--- Classic 1k ---")
knn_1k, y_pred_knn_1k = train_and_evaluate_knn(
    X_train_classic_1k, X_test_classic_1k, 
    y_train_classic_1k, y_test_classic_1k,
    "K-Nearest Neighbors", "1k", n_neighbors=5
)

# Classic 10k
print("\n--- Classic 10k ---")
knn_10k, y_pred_knn_10k = train_and_evaluate_knn(
    X_train_classic_10k, X_test_classic_10k, 
    y_train_classic_10k, y_test_classic_10k,
    "K-Nearest Neighbors", "10k", n_neighbors=5
)

# Classic 30k
print("\n--- Classic 30k ---")
knn_30k, y_pred_knn_30k = train_and_evaluate_knn(
    X_train_classic_30k, X_test_classic_30k, 
    y_train_classic_30k, y_test_classic_30k,
    "K-Nearest Neighbors", "30k", n_neighbors=5
)

# Classic 50k
print("\n--- Classic 50k ---")
knn_50k, y_pred_knn_50k = train_and_evaluate_knn(
    X_train_classic_50k, X_test_classic_50k, 
    y_train_classic_50k, y_test_classic_50k,
    "K-Nearest Neighbors", "50k", n_neighbors=5
)

# Classic Full
print("\n--- Classic Full ---")
knn_full, y_pred_knn_full = train_and_evaluate_knn(
    X_train_classic, X_test_classic, 
    y_train_classic, y_test_classic,
    "K-Nearest Neighbors", "Full", n_neighbors=5
)

## 4.6 Avaliação do Variational Quantum Classifier

In [None]:
# Avaliação do Variational Quantum Classifier
print("=" * 50)
print("VARIATIONAL QUANTUM CLASSIFIER")
print("=" * 50)

# Quantum 0k
print("\n--- Quantum 0k ---")
vqc_0k, y_pred_vqc_0k = train_and_evaluate_vqc(
    X_train_quantum_0k, X_test_quantum_0k, 
    y_train_quantum_0k, y_test_quantum_0k,
    "Variational Quantum Classifier", "0k", X_train_quantum_0k.shape[1]
)

# Quantum 1k
print("\n--- Quantum 1k ---")
vqc_1k, y_pred_vqc_1k = train_and_evaluate_vqc(
    X_train_quantum_1k, X_test_quantum_1k, 
    y_train_quantum_1k, y_test_quantum_1k,
    "Variational Quantum Classifier", "1k", X_train_quantum_1k.shape[1]
)

# Quantum 10k
print("\n--- Quantum 10k ---")
vqc_10k, y_pred_vqc_10k = train_and_evaluate_vqc(
    X_train_quantum_10k, X_test_quantum_10k, 
    y_train_quantum_10k, y_test_quantum_10k,
    "Variational Quantum Classifier", "10k", X_train_quantum_10k.shape[1]
)

# Quantum 30k
print("\n--- Quantum 30k ---")
vqc_30k, y_pred_vqc_30k = train_and_evaluate_vqc(
    X_train_quantum_30k, X_test_quantum_30k, 
    y_train_quantum_30k, y_test_quantum_30k,
    "Variational Quantum Classifier", "30k", X_train_quantum_30k.shape[1]
)

# Quantum 50k
print("\n--- Quantum 50k ---")
vqc_50k, y_pred_vqc_50k = train_and_evaluate_vqc(
    X_train_quantum_50k, X_test_quantum_50k, 
    y_train_quantum_50k, y_test_quantum_50k,
    "Variational Quantum Classifier", "50k", X_train_quantum_50k.shape[1]
)

# Quantum Full
print("\n--- Quantum Full ---")
vqc_full, y_pred_vqc_full = train_and_evaluate_vqc(
    X_train_quantum, X_test_quantum, 
    y_train_quantum, y_test_quantum,
    "Variational Quantum Classifier", "Full", X_train_quantum.shape[1]
)

## 4.7 Avaliação do Quantum Neural Network

In [None]:
# Avaliação da Quantum Neural Network
print("=" * 50)
print("QUANTUM NEURAL NETWORK")
print("=" * 50)

# Quantum 0k
print("\n--- Quantum 0k ---")
qnn_0k, y_pred_qnn_0k = train_and_evaluate_qnn(
    X_train_quantum_0k, X_test_quantum_0k, 
    y_train_quantum_0k, y_test_quantum_0k,
    "Quantum Neural Network", "0k", X_train_quantum_0k.shape[1]
)

# Quantum 1k
print("\n--- Quantum 1k ---")
qnn_1k, y_pred_qnn_1k = train_and_evaluate_qnn(
    X_train_quantum_1k, X_test_quantum_1k, 
    y_train_quantum_1k, y_test_quantum_1k,
    "Quantum Neural Network", "1k", X_train_quantum_1k.shape[1]
)

# Quantum 10k
print("\n--- Quantum 10k ---")
qnn_10k, y_pred_qnn_10k = train_and_evaluate_qnn(
    X_train_quantum_10k, X_test_quantum_10k, 
    y_train_quantum_10k, y_test_quantum_10k,
    "Quantum Neural Network", "10k", X_train_quantum_10k.shape[1]
)

# Quantum 30k
print("\n--- Quantum 30k ---")
qnn_30k, y_pred_qnn_30k = train_and_evaluate_qnn(
    X_train_quantum_30k, X_test_quantum_30k, 
    y_train_quantum_30k, y_test_quantum_30k,
    "Quantum Neural Network", "30k", X_train_quantum_30k.shape[1]
)

# Quantum 50k
print("\n--- Quantum 50k ---")
qnn_50k, y_pred_qnn_50k = train_and_evaluate_qnn(
    X_train_quantum_50k, X_test_quantum_50k, 
    y_train_quantum_50k, y_test_quantum_50k,
    "Quantum Neural Network", "50k", X_train_quantum_50k.shape[1]
)

# Quantum Full
print("\n--- Quantum Full ---")
qnn_full, y_pred_qnn_full = train_and_evaluate_qnn(
    X_train_quantum, X_test_quantum, 
    y_train_quantum, y_test_quantum,
    "Quantum Neural Network", "Full", X_train_quantum.shape[1]
)