In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2010-06-29,3.8,5.0,3.508,4.778,4.778,93831500
1,2010-06-30,5.158,6.084,4.66,4.766,4.766,85935500
2,2010-07-01,5.0,5.184,4.054,4.392,4.392,41094000
3,2010-07-02,4.6,4.62,3.742,3.84,3.84,25699000
4,2010-07-06,4.0,4.0,3.166,3.222,3.222,34334500


In [None]:
data = pd.read_csv('TSLA.csv')
data.head()

In [8]:
data = data[['Date', 'Open']]  # Keep only the required columns
data.rename(columns={'Open': 'price'}, inplace=True)  # Rename for consistency
data['returns'] = data['price'].pct_change().fillna(0)  # Calculate percentage change

# Print the processed dataset for verification
print(data.head())


         Date  price   returns
0  2010-06-29  3.800  0.000000
1  2010-06-30  5.158  0.357368
2  2010-07-01  5.000 -0.030632
3  2010-07-02  4.600 -0.080000
4  2010-07-06  4.000 -0.130435


In [9]:
# Define the environment
class PortfolioManagementEnv:
    def __init__(self, data, initial_balance=1000):
        self.data = data.reset_index(drop=True)
        self.initial_balance = initial_balance
        self.balance = initial_balance
        self.position = 0
        self.current_step = 0
        self.state_size = 2  # [current price, position]
        self.done = False

    def reset(self):
        self.balance = self.initial_balance
        self.position = 0
        self.current_step = 0
        self.done = False
        return self._get_state()

    def _get_state(self):
        return [self.data.iloc[self.current_step]['price'], self.position]

    def step(self, action):
        """
        Actions: 0 = Hold, 1 = Buy, 2 = Sell
        """
        if self.done:
            raise Exception("Episode has ended. Please reset the environment.")

        price = self.data.iloc[self.current_step]['price']
        reward = 0

        if action == 1:  # Buy
            self.position += 1
            self.balance -= price
        elif action == 2:  # Sell
            if self.position > 0:
                self.position -= 1
                self.balance += price

        self.current_step += 1
        if self.current_step >= len(self.data) - 1:
            self.done = True

        portfolio_value = self.balance + (self.position * price)
        reward = portfolio_value - self.initial_balance

        return self._get_state(), reward, self.done

In [10]:

# Define the Q-Learning Agent
class QLearningAgent:
    def __init__(self, state_size, action_size, learning_rate=0.1, discount_factor=0.95, epsilon=1.0, epsilon_decay=0.995):
        self.state_size = state_size
        self.action_size = action_size
        self.q_table = {}
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.epsilon = epsilon
        self.epsilon_decay = epsilon_decay

    def get_state_key(self, state):
        return tuple(np.round(state, 2))

    def get_action(self, state):
        if np.random.rand() < self.epsilon:
            return np.random.choice(range(self.action_size))
        state_key = self.get_state_key(state)
        if state_key not in self.q_table:
            self.q_table[state_key] = np.zeros(self.action_size)
        return np.argmax(self.q_table[state_key])

    def update_q_table(self, state, action, reward, next_state, done):
        state_key = self.get_state_key(state)
        next_state_key = self.get_state_key(next_state)
        if state_key not in self.q_table:
            self.q_table[state_key] = np.zeros(self.action_size)
        if next_state_key not in self.q_table:
            self.q_table[next_state_key] = np.zeros(self.action_size)

        q_update = reward
        if not done:
            q_update += self.discount_factor * np.max(self.q_table[next_state_key])
        self.q_table[state_key][action] += self.learning_rate * (q_update - self.q_table[state_key][action])

    def decay_epsilon(self):
        self.epsilon *= self.epsilon_decay

In [None]:

env = PortfolioManagementEnv(data)
agent = QLearningAgent(state_size=env.state_size, action_size=3)  # Actions: Hold, Buy, Sell

episodes = 500
for episode in range(episodes):
    state = env.reset()
    total_reward = 0
    while True:
        action = agent.get_action(state)
        next_state, reward, done = env.step(action)
        agent.update_q_table(state, action, reward, next_state, done)
        state = next_state
        total_reward += reward
        if done:
            break
    agent.decay_epsilon()
    print(f"Episode {episode + 1}/{episodes}, Total Reward: {total_reward:.2f}")


Episode 1/500, Total Reward: 5396393.09
Episode 2/500, Total Reward: 8355744.45
Episode 3/500, Total Reward: 3404809.93
Episode 4/500, Total Reward: 13956811.19
Episode 5/500, Total Reward: 10274247.32
Episode 6/500, Total Reward: 12700933.24
Episode 7/500, Total Reward: 4662238.86
Episode 8/500, Total Reward: 5571698.53
Episode 9/500, Total Reward: 10802670.58
Episode 10/500, Total Reward: 3709787.70
Episode 11/500, Total Reward: 9289545.52
Episode 12/500, Total Reward: 18613703.33
Episode 13/500, Total Reward: 23190681.57
Episode 14/500, Total Reward: 21272099.71
Episode 15/500, Total Reward: 8979753.09
Episode 16/500, Total Reward: 7868627.30
Episode 17/500, Total Reward: 12166065.38
Episode 18/500, Total Reward: 5500136.73
Episode 19/500, Total Reward: 14950977.67
Episode 20/500, Total Reward: 7965682.55
Episode 21/500, Total Reward: 8055477.37
Episode 22/500, Total Reward: 12200845.13
Episode 23/500, Total Reward: 24680805.93
Episode 24/500, Total Reward: 6559304.28
Episode 25/500

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(range(1, episodes + 1), episode_rewards, label='Total Reward')
plt.title('Total Reward per Episode')
plt.xlabel('Episode')
plt.ylabel('Total Reward')
plt.legend()
plt.grid()
plt.show()

In [None]:
state = env.reset()
total_reward = 0
while True:
    action = np.argmax(agent.q_table.get(agent.get_state_key(state), np.zeros(3)))
    state, reward, done = env.step(action)
    total_reward += reward
    if done:
        break
print(f"Test Total Reward: {total_reward:.2f}")