In [1]:
import pandas as pd
import numpy as np

# Load the dataset
url = "https://datatraining.site/data/GLE-PA-data.csv"
data = pd.read_csv(url)

# Extract relevant columns
data = data[['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume']]

# Convert the Date column to datetime
data['Date'] = pd.to_datetime(data['Date'])

# Sort the data by date
data = data.sort_values('Date')

# Normalize the data
data['Open'] = data['Open'] / data['Open'].max()
data['High'] = data['High'] / data['High'].max()
data['Low'] = data['Low'] / data['Low'].max()
data['Close'] = data['Close'] / data['Close'].max()
data['Adj Close'] = data['Adj Close'] / data['Adj Close'].max()
data['Volume'] = data['Volume'] / data['Volume'].max()

# Define the Q-learning parameters
gamma = 0.95  # Discount factor
alpha = 0.1   # Learning rate
epsilon = 0.1 # Exploration rate

# Define the states and actions
states = range(len(data))
actions = ['buy', 'sell', 'hold']

# Initialize the Q-table
Q = np.zeros((len(states), len(actions)))

# Define a reward function
def get_reward(action, current_state, next_state):
    if action == 'buy':
        return data['Adj Close'].iloc[next_state] - data['Adj Close'].iloc[current_state]
    elif action == 'sell':
        return data['Adj Close'].iloc[current_state] - data['Adj Close'].iloc[next_state]
    else: # hold
        return 0

# Q-learning algorithm
for episode in range(1000):
    state = np.random.choice(states)
    while state < len(states) - 1:
        if np.random.uniform(0, 1) < epsilon:
            action = np.random.choice(actions)
        else:
            action = actions[np.argmax(Q[state, :])]

        next_state = state + 1
        reward = get_reward(action, state, next_state)

        Q[state, actions.index(action)] = Q[state, actions.index(action)] + alpha * (reward + gamma * np.max(Q[next_state, :]) - Q[state, actions.index(action)])

        state = next_state

# Print the learned Q-values
print(Q)

# To make a decision based on the Q-table:
def make_decision(state):
    return actions[np.argmax(Q[state, :])]

# Example of using the Q-table to make a decision
current_state = 0
while current_state < len(states) - 1:
    decision = make_decision(current_state)
    print(f"Day {current_state}: Action - {decision}")
    current_state += 1

[[ 0.          0.          0.        ]
 [-0.00062271  0.00062271  0.        ]
 [-0.00299881  0.00320786  0.        ]
 ...
 [-0.00079741  0.03025734  0.01449493]
 [-0.0149246   0.01551658  0.        ]
 [ 0.          0.          0.        ]]
Day 0: Action - buy
Day 1: Action - sell
Day 2: Action - sell
Day 3: Action - sell
Day 4: Action - buy
Day 5: Action - buy
Day 6: Action - sell
Day 7: Action - buy
Day 8: Action - sell
Day 9: Action - sell
Day 10: Action - buy
Day 11: Action - buy
Day 12: Action - buy
Day 13: Action - sell
Day 14: Action - sell
Day 15: Action - sell
Day 16: Action - buy
Day 17: Action - sell
Day 18: Action - buy
Day 19: Action - buy
Day 20: Action - buy
Day 21: Action - sell
Day 22: Action - buy
Day 23: Action - buy
Day 24: Action - buy
Day 25: Action - buy
Day 26: Action - buy
Day 27: Action - sell
Day 28: Action - buy
Day 29: Action - buy
Day 30: Action - buy
Day 31: Action - hold
Day 32: Action - buy
Day 33: Action - sell
Day 34: Action - sell
Day 35: Action - sel