In [1]:
import cv2
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import random
import matplotlib.pyplot as plt

# Define your DQN class and functions
class DQN(nn.Module):
    def __init__(self, state_size, action_size):
        super(DQN, self).__init__()
        self.fc1 = nn.Linear(state_size, 64)
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, action_size)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Load and preprocess your video dataset
video_dataset = '/Users/martinprabhu/pythonObject1/output.mp4'
cap = cv2.VideoCapture(video_dataset)

def preprocess_frame(frame):
    # Resize the frame to a specific size
    new_width = 224
    new_height = 224
    resized_frame = cv2.resize(frame, (new_width, new_height))

    return resized_frame

# Initialize necessary variables and hyperparameters
state_size = 224 * 224 * 3  # Modify according to your input state dimensions
action_size = 2  # Modify according to your action space size

batch_size = 64
learning_rate = 0.001
num_episodes = 500
num_steps = 100
target_update_freq = 10
num_frames = 3  # Number of frames to stack for the state

epsilon = 1.0
epsilon_decay = 0.99
epsilon_min = 0.01
gamma = 0.99

# Create an instance of the DQN class
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
dqn = DQN(state_size, action_size).to(device)
target_dqn = DQN(state_size, action_size).to(device)
target_dqn.load_state_dict(dqn.state_dict())
target_dqn.eval()

optimizer = optim.Adam(dqn.parameters(), lr=learning_rate)
criterion = nn.MSELoss()

# Initialize replay memory
replay_memory = []

# Initialize variables for average episode length and average return
avg_episode_length = 0
avg_return = 0

# Initialize a list to store the episode lengths
episode_lengths = []

# Start training the DQN network
for episode in range(num_episodes):
    cap = cv2.VideoCapture(video_dataset)  # Assuming video_dataset is the path to your video file
    done = False
    total_reward = 0
    episode_length = 0

    for step in range(num_steps):
        ret, frame = cap.read()
        if not ret:
            break

        frame = preprocess_frame(frame)

        state = frame.flatten()  # Assuming the flattened frame is used as the state

        # Choose an action using epsilon-greedy policy
        if np.random.rand() < epsilon:
            action = np.random.randint(action_size)
        else:
            state_tensor = torch.FloatTensor(state).unsqueeze(0).to(device)
            with torch.no_grad():
                q_values = target_dqn(state_tensor)
            action = torch.argmax(q_values).item()

        # Perform the action and get the next state, reward, and done signal
        # Perform your object tracking action here and update the state, reward, and done signal accordingly
        next_state = frame.flatten()  # Update the next state
        reward = 0  # Update the reward
        done = False  # Update the done signal

        # Store the transition in the replay memory
        replay_memory.append((state, action, reward, next_state, done))

        # Update the state
        state = next_state
        total_reward += reward
        episode_length += 1

        # Perform the DQN network update
        if len(replay_memory) >= batch_size:
            batch = random.sample(replay_memory, batch_size)
            state_batch, action_batch, reward_batch, next_state_batch, done_batch = zip(*batch)

            state_batch_array = np.array(state_batch)
            state_batch_tensor = torch.FloatTensor(state_batch_array).to(device)
            action_batch_tensor = torch.LongTensor(action_batch).unsqueeze(1).to(device)
            reward_batch_tensor = torch.FloatTensor(reward_batch).unsqueeze(1).to(device)
            next_state_batch_array = np.array(next_state_batch)
            next_state_batch_tensor = torch.FloatTensor(next_state_batch_array).to(device)
            done_batch_tensor = torch.FloatTensor(done_batch).unsqueeze(1).to(device)

            q_values = dqn(state_batch_tensor).gather(1, action_batch_tensor)
            target_q_values = target_dqn(next_state_batch_tensor).max(1)[0].unsqueeze(1).detach()
            target_q_values = reward_batch_tensor + gamma * target_q_values * (1 - done_batch_tensor)

            loss = criterion(q_values, target_q_values)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        # Update the target network
        if step % target_update_freq == 0:
            target_dqn.load_state_dict(dqn.state_dict())

        if done:
            break

    cap.release()  # Release the video capture

    # Decay epsilon
    epsilon = max(epsilon * epsilon_decay, epsilon_min)

    # Update average episode length and average return
    avg_episode_length += episode_length
    avg_return += total_reward

    # Append the episode length to the list
    episode_lengths.append(episode_length)

    # Print episode statistics
    print("Episode: {}, Total Reward: {}, Epsilon: {:.4f}".format(episode, total_reward, epsilon))

# Calculate the average episode length and average return
avg_episode_length /= num_episodes
avg_return /= num_episodes

# Print the average episode length and average return
print("Average Episode Length: {:.2f}".format(avg_episode_length))
print("Average Return: {:.2f}".format(avg_return))

# Plot the average episode length over epochs
plt.plot(range(1, num_episodes + 1), episode_lengths)
plt.xlabel("Epoch")
plt.ylabel("Average Episode Length")
plt.title("Average Episode Length per Epoch")
plt.show()


Episode: 0, Total Reward: 0, Epsilon: 0.9900
Episode: 1, Total Reward: 0, Epsilon: 0.9801
Episode: 2, Total Reward: 0, Epsilon: 0.9703
Episode: 3, Total Reward: 0, Epsilon: 0.9606
Episode: 4, Total Reward: 0, Epsilon: 0.9510
Episode: 5, Total Reward: 0, Epsilon: 0.9415
Episode: 6, Total Reward: 0, Epsilon: 0.9321
Episode: 7, Total Reward: 0, Epsilon: 0.9227
Episode: 8, Total Reward: 0, Epsilon: 0.9135
Episode: 9, Total Reward: 0, Epsilon: 0.9044
Episode: 10, Total Reward: 0, Epsilon: 0.8953
Episode: 11, Total Reward: 0, Epsilon: 0.8864
Episode: 12, Total Reward: 0, Epsilon: 0.8775
Episode: 13, Total Reward: 0, Epsilon: 0.8687
Episode: 14, Total Reward: 0, Epsilon: 0.8601
Episode: 15, Total Reward: 0, Epsilon: 0.8515
Episode: 16, Total Reward: 0, Epsilon: 0.8429
Episode: 17, Total Reward: 0, Epsilon: 0.8345
Episode: 18, Total Reward: 0, Epsilon: 0.8262
Episode: 19, Total Reward: 0, Epsilon: 0.8179
Episode: 20, Total Reward: 0, Epsilon: 0.8097
Episode: 21, Total Reward: 0, Epsilon: 0.801