In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix

df = pd.read_csv('data_training.csv', parse_dates=['datetime'])

train_start, train_end = '2024-01-01', '2024-03-30'
test_start, test_end = '2024-04-01', '2024-06-30'

treino = df[(df['datetime'] >= train_start) & (df['datetime'] <= train_end)].copy()
validacao = df[(df['datetime'] >= test_start) & (df['datetime'] <= test_end)].copy()

treino_original = treino.copy()
validacao_original = validacao.copy()

for df_loop in [treino, validacao]:
    df_loop.drop(columns=['datetime','date','close','open','low','high','volume','average','amount_stock','id_ticker','business'], inplace=True)

X_train = treino.drop(columns=['trend'])
y_train = treino['trend']

X_valid = validacao.drop(columns=['trend'])
y_valid = validacao['trend']

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_valid_scaled = scaler.transform(X_valid)

skf = StratifiedKFold(n_splits=9, shuffle=True, random_state=42)

train_preds_cv = np.zeros(len(X_train))
train_scores = []
val_scores = []

for train_idx, val_idx in skf.split(X_train_scaled, y_train):
    X_tr, X_val = X_train_scaled[train_idx], X_train_scaled[val_idx]
    y_tr, y_val = y_train.iloc[train_idx], y_train.iloc[val_idx]

    mlp = MLPClassifier(
        hidden_layer_sizes=(100,),
        early_stopping=False,
        random_state=42
    )
    mlp.fit(X_tr, y_tr)

    y_tr_pred = mlp.predict(X_tr)
    y_val_pred = mlp.predict(X_val)

    train_scores.append(accuracy_score(y_tr, y_tr_pred))
    val_scores.append(accuracy_score(y_val, y_val_pred))
    
    train_preds_cv[val_idx] = y_val_pred

final_model = MLPClassifier(
    hidden_layer_sizes=(100,),
    early_stopping=False,
    random_state=42
)
final_model.fit(X_train_scaled, y_train)

val_preds = final_model.predict(X_valid_scaled)

print(f"Acurácia média no treino (CV): {np.mean(train_scores):.4f}")
print(f"Acurácia média na validação (CV): {np.mean(val_scores):.4f}")
print(f"Acurácia final no conjunto de validação: {accuracy_score(y_valid, val_preds):.4f}")

print("\nMatriz de confusão - Treino (Out-of-Fold):")
print(confusion_matrix(y_train, train_preds_cv))

print("\nMatriz de confusão - Validação:")
print(confusion_matrix(y_valid, val_preds))

fig, axes = plt.subplots(1, 2, figsize=(12, 5))
sns.heatmap(confusion_matrix(y_train, train_preds_cv), annot=True, fmt='d', cmap='Blues', ax=axes[0])
axes[0].set_title("Matriz de Confusão - Treino (CV Out-of-Fold)")
sns.heatmap(confusion_matrix(y_valid, val_preds), annot=True, fmt='d', cmap='Greens', ax=axes[1])
axes[1].set_title("Matriz de Confusão - Validação Final")
plt.tight_layout()
plt.show()

results_treino_df = treino_original.copy()
results_treino_df['predicted_trend'] = train_preds_cv.astype(int)
results_treino_df['ERRORS'] = np.where(results_treino_df['trend'] == results_treino_df['predicted_trend'], 1, 0) # 1 para acerto, 0 para erro

results_valid_df = validacao_original.copy()
results_valid_df['predicted_trend'] = val_preds
results_valid_df['ERRORS'] = np.where(results_valid_df['trend'] == results_valid_df['predicted_trend'], 1, 0) # 1 para acerto, 0 para erro

final_results_df = pd.concat([results_treino_df, results_valid_df])

final_columns_order = [
    'datetime', 'id_ticker', 'open', 'close', 'high', 'low', 'average',
    'volume', 'business', 'amount_stock', 'date', 'Bands_Norm', 'NSMA_3',
    'NSMA_5', 'NSMA_7', 'NSMA_9', 'NSMA_11', 'trend', 'predicted_trend', 'ERRORS'
]
final_results_df = final_results_df[final_columns_order]

print("Início do Período (dados de treino):")
print(final_results_df.head())
print("\nFinal do Período (dados de validação):")
print(final_results_df.tail())

nome_do_arquivo = 'data_with_errors_v2.csv'
final_results_df.to_csv(nome_do_arquivo, index=False)

print(f"\nArquivo '{nome_do_arquivo}' com {len(final_results_df)} linhas salvo com sucesso!")