In [3]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from collections import deque
import random
import matplotlib.pyplot as plt

class DQNAgent:
    def __init__(self, n_states, n_actions, learning_rate=0.001, gamma=0.99, epsilon=1.0, buffer_size=10000):
        self.n_states = n_states
        self.n_actions = n_actions
        self.learning_rate = learning_rate
        self.gamma = gamma
        self.epsilon = epsilon
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.memory = deque(maxlen=buffer_size)

        self.model = self.build_model()

    def build_model(self):
        model = tf.keras.Sequential([
            tf.keras.layers.Dense(256, input_dim=self.n_states, activation='relu'),
            tf.keras.layers.Dropout(0.2),
            tf.keras.layers.Dense(256, activation='relu'),
            tf.keras.layers.Dropout(0.2),
            tf.keras.layers.Dense(self.n_actions, activation='linear')
        ])
        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=self.learning_rate), loss='mse')
        return model

    def get_action(self, state):
        if np.random.rand() <= self.epsilon:
            return np.random.choice(self.n_actions)
        q_values = self.model.predict(state.reshape(1, -1), batch_size=1, verbose=0)
        return np.argmax(q_values[0])

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def replay(self, batch_size):
        if len(self.memory) < batch_size:
            return
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target = (reward + self.gamma *
                          np.amax(self.model.predict(next_state.reshape(1, -1), batch_size=1, verbose=0)[0]))
            target_f = self.model.predict(state.reshape(1, -1), batch_size=1, verbose=0)
            target_f[0][action] = target
            self.model.fit(state.reshape(1, -1), target_f, epochs=1, verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

def calculate_reward(action, acerto):
    if action == 1:
        if acerto == 1:
            return 10
        elif acerto == 2:
            return -5
    return 0

def normalize_data(features):
    scaler = StandardScaler()
    return scaler.fit_transform(features)

def main():
    data = pd.read_csv('/home/darkcover/Documentos/Out/dados/data_final.csv')
    features = data[['odd_entrada', 'media5', 'media10', 'media20', 'media40', 'media80', 'media160', 'media320', 'media640', 'acerto', 'level', 'contagem']].values
    rewards = data['apostar'].values
    features = normalize_data(features)

    X_train, X_test, y_train, y_test = train_test_split(features, rewards, test_size=0.2, random_state=42)

    n_epochs = 25
    batch_size = 64

    n_states = X_train.shape[1]
    n_actions = 2
    dqn_agent = DQNAgent(n_states, n_actions)

    epoch_accuracies = []
    epoch_directional_accuracies = []
    epoch_weighted_directional_accuracies = []

    for epoch in range(n_epochs):
        correct_predictions = 0
        overestimations = 0
        weighted_correct_predictions = 0
        weighted_overestimations = 0

        for i in range(len(X_train) - 1):
            state = X_train[i]
            action = dqn_agent.get_action(state)
            true_action = y_train[i]
            acerto = state[-3]
            reward = calculate_reward(action, acerto)
            next_state = X_train[i + 1]
            done = (i == len(X_train) - 2)

            dqn_agent.remember(state, action, reward, next_state, done)
            if done:
                dqn_agent.replay(batch_size)

            if action == true_action:
                correct_predictions += 1
                weighted_correct_predictions += 2
            elif action > true_action:
                overestimations += 1
                weighted_overestimations += 1

        epoch_accuracy = correct_predictions / len(X_test)
        epoch_directional_accuracy = (correct_predictions + overestimations) / len(X_test)
        epoch_weighted_directional_accuracy = (weighted_correct_predictions + weighted_overestimations) / len(X_test)

        epoch_accuracies.append(epoch_accuracy)
        epoch_directional_accuracies.append(epoch_directional_accuracy)
        epoch_weighted_directional_accuracies.append(epoch_weighted_directional_accuracy)

        print(f'Época {epoch + 1}/{n_epochs} - Precisão: {epoch_accuracy:.4f}, Acurácia Direcional: {epoch_directional_accuracy:.4f}, Acurácia Direcional Ponderada: {epoch_weighted_directional_accuracy:.4f}')

    plt.figure(figsize=(12, 6))
    plt.plot(epoch_accuracies, label='Precisão')
    plt.plot(epoch_directional_accuracies, label='Acurácia Direcional')
    plt.plot(epoch_weighted_directional_accuracies, label='Acurácia Direcional Ponderada')
    plt.xlabel('Época')
    plt.ylabel('Métrica')
    plt.legend()
    plt.title('Desempenho do Modelo ao Longo das Épocas')
    plt.show()

if __name__ == "__main__":
    main()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Época 1/25 - Precisão: 2.0032, Acurácia Direcional: 3.9621, Acurácia Direcional Ponderada: 5.9653
Época 2/25 - Precisão: 2.0085, Acurácia Direcional: 3.9611, Acurácia Direcional Ponderada: 5.9696
Época 3/25 - Precisão: 2.0044, Acurácia Direcional: 3.9605, Acurácia Direcional Ponderada: 5.9649
