In [None]:
import os
import pickle
import numpy as np
import pandas as pd
import random
from collections import deque
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# from tensorflow.keras.callbacks import ModelCheckpoint

class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.99
        self.learning_rate = 0.001
        self.model = self._build_model()
        self.target_model = self._build_model()
        self.update_target_model()

    def _build_model(self):
        model = Sequential()
        model.add(Dense(64, input_dim=self.state_size, activation='relu'))
        model.add(Dense(64, activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
        model.compile(loss='mse', optimizer=tf.keras.optimizers.legacy.Adam(learning_rate=self.learning_rate))
        return model

    def update_target_model(self):
        self.target_model.set_weights(self.model.get_weights())

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        act_values = self.model.predict(state)
        return np.argmax(act_values[0])
    def replay(self, batch_size, callbacks=None):
        minibatch = random.sample(self.memory, batch_size)
        states, targets = [], []
        for state, action, reward, next_state, done in minibatch:
            target = self.model.predict(state)
            if done:
                target[0][action] = reward
            else:
                a = self.model.predict(next_state)[0]
                t = self.target_model.predict(next_state)[0]
                target[0][action] = reward + self.gamma * t[np.argmax(a)]
            states.append(state[0])
            targets.append(target[0])

        states, targets = np.array(states), np.array(targets)
    	# Include the callbacks and add a validation_split
        self.model.fit(states, targets, epochs=1, verbose=0, callbacks=callbacks, validation_split=0.1)
        history = self.model.fit(states, targets, epochs=1, verbose=0, callbacks=callbacks, validation_split=0.1)
        print(f"Loss: {history.history['loss'][0]}, Val_loss: {history.history['val_loss'][0]}")




def load_data(filename):
    df = pd.read_csv(filename)
    return df


def preprocess_data(df):
    scaler = MinMaxScaler()
    data_scaled = scaler.fit_transform(df[['open', 'high', 'low', 'close', 'volume']])
    return data_scaled

def train_ddqn(episodes, training_data, window_size=10):
    state_size = window_size * training_data.shape[1]
    action_size = 3
    agent = DQNAgent(state_size, action_size)
    batch_size = 64

    all_profits = []
    all_episode_lengths = []

    # Add EarlyStopping and ModelCheckpoint callbacks
    early_stopping = EarlyStopping(monitor='val_loss', patience=5)
    model_checkpoint = ModelCheckpoint('best_model.h5', save_best_only=True, monitor='val_loss', mode='min')

    for e in range(episodes):
        print(f"Starting episode {e + 1}")
        total_profit = 0
        total_loss = 0
        inventory = []
        state = training_data[np.newaxis, :window_size, :]

        for t in range(window_size, len(training_data)):
            flat_state = np.reshape(state, (1, state_size))
            action = agent.act(flat_state)
            next_state = training_data[np.newaxis, t - window_size + 1:t + 1, :]
            flat_next_state = np.reshape(next_state, (1, state_size))

            reward = 0

            if action == 1:  # Buy
                inventory.append(training_data[t][-2])  # Buy at Close price
            elif action == 2 and len(inventory) > 0:  # Sell
                bought_price = inventory.pop(0)
                profit = training_data[t][-2] - bought_price
                total_profit += profit
                reward = profit
                print(f"Step: {t}, Profit: {profit}")  # Print profit at each step
            else:
                reward = 0

            done = True if t == len(training_data) - 1 else False
            agent.remember(flat_state, action, reward, flat_next_state, done)
            state = next_state

            if done:
                print(f"Episode: {e + 1}/{episodes}, Total Profit: {total_profit}")
                all_profits.append(total_profit)
                all_episode_lengths.append(t - window_size + 1)

            if len(agent.memory) > batch_size:
                # Pass the callbacks list to the replay method
                agent.replay(batch_size, callbacks=[early_stopping, model_checkpoint])

        agent.update_target_model()
        print(f"Finished episode {e + 1}")

    return agent, all_profits, all_episode_lengths

def plot_results(all_profits, all_episode_lengths):
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 8))

    ax1.plot(all_profits, label="Profits per Episode")
    ax1.set_xlabel("Episode")
    ax1.set_ylabel("Profit")
    ax1.legend()

    ax2.plot(all_episode_lengths, label="Episode Length")
    ax2.set_xlabel("Episode")
    ax2.set_ylabel("Length")
    ax2.legend()

    plt.tight_layout()
    plt.show()

def generate_signals(agent, training_data, window_size=10):
    buy_signals = []
    sell_signals = []
    state = training_data[np.newaxis, :window_size, :]

    for t in range(window_size, len(training_data)):
        flat_state = np.reshape(state, (1, window_size * training_data.shape[1]))
        action = agent.act(flat_state)
        next_state = training_data[np.newaxis, t - window_size + 1:t + 1, :]

        if action == 1:  # Buy
            buy_signals.append((t, training_data[t][-2]))
        elif action == 2:  # Sell
            sell_signals.append((t, training_data[t][-2]))

        state = next_state

    return buy_signals, sell_signals


def visualize_signals(df, buy_signals, sell_signals):
    plt.figure(figsize=(12, 6))
    plt.plot(df['Close'], label='Close Price', alpha=0.5)
    plt.scatter([x[0] for x in buy_signals], [x[1] for x in buy_signals], marker='^', color='g', label='Buy')
    plt.scatter([x[0] for x in sell_signals], [x[1] for x in sell_signals], marker='v', color='r', label='Sell')
    plt.xlabel('Time Step')
    plt.ylabel('Cl ose Price')
    plt.legend()
    plt.show()

def save_agent(agent, filename):
    with open(filename, 'wb') as f:
        pickle.dump(agent, f)

def load_agent(filename):
    with open(filename, 'rb') as f:
        return pickle.load(f)

def main():
    filename = 'GOOGL_data.csv'
    episodes = 20
    window_size = 10

    df = load_data(filename)
    print(df.head())

    training_data = preprocess_data(df)
    agent, all_profits, all_episode_lengths = train_ddqn(episodes, training_data, window_size)

    print("Training completed.")
    print("Profits per episode:", all_profits)

    plot_results(all_profits, all_episode_lengths)

    # Generate buy and sell signals for training data
    buy_signals, sell_signals = generate_signals(agent, training_data, window_size)
    print("Buy signals:", buy_signals)
    print("Sell signals:", sell_signals)

    # Visualize buy and sell signals on the stock price chart
    visualize_signals(df, buy_signals, sell_signals)


    # Save and load the agent
    save_agent(agent, 'trained_agent.pkl')
    loaded_agent = load_agent('trained_agent.pkl')

if __name__ == "__main__":
    main()


      date      open      high       low     close   volume   Name
0   2/8/13  390.4551  393.7283  390.1698  393.0777  6031199  GOOGL
1  2/11/13  389.5892  391.8915  387.2619  391.6012  4330781  GOOGL
2  2/12/13  391.2659  394.3440  390.0747  390.7403  3714176  GOOGL
3  2/13/13  390.4551  393.0677  390.3750  391.8214  2393946  GOOGL
4  2/14/13  390.2549  394.7644  389.2739  394.3039  3466971  GOOGL
Metal device set to: Apple M2 Max
Starting episode 1
Step: 14, Profit: 0.03715157014569015
Step: 17, Profit: 0.07271313761971343
Step: 27, Profit: 0.000978166947873449
Step: 29, Profit: 0.006071663244847425
Step: 34, Profit: -0.02035858098399168
Step: 35, Profit: 0.0014937263972696346
Step: 36, Profit: 0.00853392896255123
Step: 39, Profit: -0.0465914588503169
Step: 42, Profit: -0.0112753958760059
Step: 43, Profit: 0.016865196367496393
Step: 46, Profit: 0.01182994721653241
Step: 47, Profit: -0.05846424900895619
Step: 49, Profit: 0.04381100004573146
Step: 56, Profit: 0.06519588611479477
Step: 

2023-05-03 18:20:03.230621: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


Loss: 0.0020242510363459587, Val_loss: 0.002939658472314477
Loss: 0.0008132655057124794, Val_loss: 0.0007232683128677309
Loss: 0.0004590284952428192, Val_loss: 0.001025667181238532
Loss: 0.0007345132762566209, Val_loss: 0.0006471142987720668
Loss: 0.0011163384187966585, Val_loss: 0.0012309107696637511
Loss: 0.0009283949038945138, Val_loss: 0.002898678183555603
Step: 80, Profit: -0.027750191950822067
Loss: 0.0006553537677973509, Val_loss: 0.0014018997317180037
Loss: 0.0005986051401123405, Val_loss: 0.0003862700832542032
Loss: 0.0006116404547356069, Val_loss: 0.0013120460789650679
Loss: 0.0010164744453504682, Val_loss: 0.0013090759748592973
Loss: 0.0008552299113944173, Val_loss: 0.0012353641213849187
Loss: 0.0007867600652389228, Val_loss: 0.0006975340656936169
Loss: 0.0009142427006736398, Val_loss: 0.00047831941628828645
Loss: 0.0009960019960999489, Val_loss: 0.00323806912638247
Step: 88, Profit: 0.022286031997420075
Loss: 0.0011585220927372575, Val_loss: 0.001245090621523559
Loss: 0.001