In [37]:
import random
import numpy as np
import tensorflow as tf

In [38]:
random.seed(0)

buff = [0] * 100 

buff_sample = random.sample(buff,10)

print(buff_sample)

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]


In [39]:
np_arr = np.zeros(100)
print(np_arr)
s = tf.convert_to_tensor(np_arr,tf.float32)
print(s)

[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0.]
tf.Tensor(
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0.], shape=(100,), dtype=float32)


In [70]:
import numpy as np
import random
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
import gym

class DQN:
    def __init__(self, state_size, action_size, learning_rate=0.001, gamma=0.99, epsilon=1.0, epsilon_decay=0.995, epsilon_min=0.01, batch_size=32):
        self.state_size = state_size
        self.action_size = action_size
        self.learning_rate = learning_rate
        self.gamma = gamma
        self.epsilon = epsilon
        self.epsilon_decay = epsilon_decay
        self.epsilon_min = epsilon_min
        self.batch_size = batch_size
        self.memory = []
        self.model = self._build_model()

    def _build_model(self):
        # Simple Q-network with fully connected layers
        model = Sequential()
        model.add(Dense(24, input_dim=self.state_size, activation='relu'))
        model.add(Dense(24, activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
        model.compile(loss='mse', optimizer=Adam(lr=self.learning_rate))
        return model

    def remember(self, state, action, reward, next_state, done):
        # Strip any additional metadata or nested structures from next_state
        next_state = np.array(next_state)  # Ensure next_state is a NumPy array
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        print("State shape:", state.shape)
        print("State:", state)
        state_tensor = tf.convert_to_tensor(state, dtype=tf.float32)
        act_values = self.model.predict(state_tensor)
        # act_values = self.model.predict(state)
        return np.argmax(act_values[0])

    def replay(self):
        if len(self.memory) < self.batch_size:
            return
        minibatch = random.sample(self.memory, self.batch_size)
        states, targets = [], []
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                next_state_tensor = tf.convert_to_tensor(next_state, dtype=tf.float32)
                target = (reward + self.gamma * np.amax(self.model.predict(next_state_tensor)[0]))
            print("State type:", type(state))
            print("State:", state)
            print("Next state type:", type(next_state))
            print("Next state:", next_state)
            state_tensor = tf.convert_to_tensor(state, dtype=tf.float32)
            target_f = self.model.predict(state_tensor)
            target_f[0][action] = target
            states.append(state[0])
            targets.append(target_f[0])
        states_tensor = tf.convert_to_tensor(states, dtype=tf.float32)
        targets_tensor = tf.convert_to_tensor(targets, dtype=tf.float32)
        self.model.fit(states_tensor, targets_tensor, epochs=1, verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay




    def train(self, env, episodes):
        for e in range(episodes):
            state = env.reset()
            state = np.expand_dims(state, axis=0)
            for time in range(500):  # Limiting the number of steps per episode to 500
                action = self.act(state)
                next_state, reward, done, _ = env.step(action)[:4]
                reward = reward if not done else -10  # Penalty for termination
                next_state = np.expand_dims(next_state, axis=0)
                self.remember(state, action, reward, next_state, done)
                state = next_state
                if done:
                    print("episode: {}/{}, score: {}, e: {:.2}".format(e, episodes, time, self.epsilon))
                    break
            self.replay()

def main():
    # Initialize environment
    env = gym.make('CartPole-v1')  # Example environment (CartPole)
    state_size = env.observation_space.shape[0]
    action_size = env.action_space.n
    
    # Initialize DQN agent
    agent = DQN(state_size, action_size)
    
    # Train the agent
    episodes = 1000  # Number of episodes for training
    agent.train(env, episodes)

if __name__ == "__main__":
    main()


episode: 0/1000, score: 16, e: 1.0
episode: 1/1000, score: 39, e: 1.0
State type: <class 'numpy.ndarray'>
State: [[-0.09397387 -0.76313007  0.07319549  1.1415876 ]]
Next state type: <class 'numpy.ndarray'>
Next state: [[-0.10923646 -0.5690371   0.09602724  0.8727279 ]]
State type: <class 'numpy.ndarray'>
State: [[0.11716747 0.15467612 0.11186315 0.9555584 ]]
Next state type: <class 'numpy.ndarray'>
Next state: [[0.120261   0.3481302  0.13097432 0.7000102 ]]
State type: <class 'numpy.ndarray'>
State: [[ 0.05523582  0.94115067  0.07186548 -0.3533125 ]]
Next state type: <class 'numpy.ndarray'>
Next state: [[ 0.07405884  0.7450843   0.06479923 -0.03886184]]
State type: <class 'numpy.ndarray'>
State: [[-0.14290428 -0.18977392  0.15924773  0.5325201 ]]
Next state type: <class 'numpy.ndarray'>
Next state: [[-0.14669976  0.00279216  0.16989812  0.29394704]]
State type: <class 'numpy.ndarray'>
State: [[-0.04823403 -0.5670598   0.00587451  0.8257928 ]]
Next state type: <class 'numpy.ndarray'>
Ne

ValueError: setting an array element with a sequence.