### Reinforcement Learning Using Deep Learning

#### Verify CUDA usage

In [1]:
import torch
print(torch.cuda.is_available())
print(torch.version.cuda) 

True
11.8


#### Imports

In [2]:
import numpy as np
import random
import torch
import torch.nn as nn
import torch.optim as optim
from collections import deque

In [3]:
# Check if CUDA is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


#### Environment

In [4]:
class TFTEnvironment:
    def __init__(self):
        self.state_size = 50  # Example state size
        self.action_size = 10  # Example action size
        self.state = np.zeros(self.state_size)
        self.episode_length = 200  # Longer episodes
        self.current_step = 0

    def reset(self):
        self.state = np.zeros(self.state_size)
        self.current_step = 0
        return self.state

    def step(self, action):
        reward = self.calculate_reward(action)
        self.state[action] += 1
        self.current_step += 1
        done = self.current_step >= self.episode_length
        return self.state, reward, done

    def calculate_reward(self, action):
        base_reward = random.random() * (action + 1)
        synergy_bonus = self.get_synergy_bonus(action)
        placement_penalty = self.get_placement_penalty()
        damage_dealt_bonus = self.get_damage_dealt_bonus(action)
        survival_time_bonus = self.get_survival_time_bonus()
        reward = base_reward + synergy_bonus + damage_dealt_bonus + survival_time_bonus - placement_penalty
        return reward

    def get_synergy_bonus(self, action):
        return random.random() * 2 if self.state[action] >= 2 else 0

    def get_placement_penalty(self):
        return random.random() * 1.5 if self.current_step < self.episode_length / 2 else 0

    def get_damage_dealt_bonus(self, action):
        return random.random() * action

    def get_survival_time_bonus(self):
        return random.random() * (self.episode_length - self.current_step) / self.episode_length

#### DQN (Deep-Q-Network) Agent

In [5]:
class DQN(nn.Module):
    def __init__(self, state_size, action_size):
        super(DQN, self).__init__()
        self.fc1 = nn.Linear(state_size, 24)
        self.fc2 = nn.Linear(24, 24)
        self.fc3 = nn.Linear(24, action_size)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.fc3(x)

class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.learning_rate = 0.001
        self.model = DQN(state_size, action_size).to(device)
        self.target_model = DQN(state_size, action_size).to(device)
        self.optimizer = optim.Adam(self.model.parameters(), lr=self.learning_rate)
        self.update_target_model()

    def update_target_model(self):
        self.target_model.load_state_dict(self.model.state_dict())

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        state = torch.FloatTensor(state).unsqueeze(0).to(device)
        with torch.no_grad():
            act_values = self.model(state)
        return torch.argmax(act_values).item()

    def replay(self, batch_size):
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            state = torch.FloatTensor(state).unsqueeze(0).to(device)
            next_state = torch.FloatTensor(next_state).unsqueeze(0).to(device)
            target = self.model(state).detach().clone()
            if done:
                target[0][action] = reward
            else:
                t = self.target_model(next_state).detach()
                target[0][action] = reward + self.gamma * torch.max(t)
            self.optimizer.zero_grad()
            loss = nn.MSELoss()(self.model(state), target)
            loss.backward()
            self.optimizer.step()
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

    def load(self, name):
        self.model.load_state_dict(torch.load(name))

    def save(self, name):
        torch.save(self.model.state_dict(), name)


In [6]:
# Create the environment and agent
env = TFTEnvironment()
agent = DQNAgent(state_size=env.state_size, action_size=env.action_size)

#### Training the DQN Agent

In [None]:
# Training loop
episodes = 1000
batch_size = 32

for e in range(episodes):
    state = env.reset()
    for time in range(env.episode_length):
        action = agent.act(state)
        next_state, reward, done = env.step(action)
        agent.remember(state, action, reward, next_state, done)
        state = next_state
        if done:
            agent.update_target_model()
            print(f"Episode {e}/{episodes} finished")
            break
        if len(agent.memory) > batch_size:
            agent.replay(batch_size)

agent.save("models/dqn_model.pth")

In [7]:
# Load the original PyTorch model
original_model = DQN(env.state_size, env.action_size)
original_model.load_state_dict(torch.load("models/dqn_model.pth"))

def check_for_nans(model):
    for name, param in model.named_parameters():
        if torch.isnan(param).any():
            print(f"NaNs found in {name}")
            return True
    return False

if check_for_nans(original_model):
    print("NaNs found in model weights. Reinitializing weights.")
    original_model.apply(lambda m: m.reset_parameters() if hasattr(m, 'reset_parameters') else None)
else:
    print("No NaNs found in model weights.")

torch.save(original_model.state_dict(), 'models/dqn_model.pth')

original_model.to(device)
original_model.eval()

# Generate a random input
input_data = np.random.random((1, env.state_size)).astype(np.float32)
torch_input = torch.tensor(input_data).to(device)

No NaNs found in model weights.


#### Convert the PyTorch model to ONNX format

In [8]:
import torch.onnx

# Convert the reinitialized model to ONNX format
onnx_file_path = 'models/dqn_model.onnx'
torch.onnx.export(
    original_model, 
    torch_input, 
    onnx_file_path, 
    export_params=True, 
    opset_version=10, 
    do_constant_folding=True, 
    input_names=['input'], 
    output_names=['output'],
    dynamic_axes={'input': {0: 'batch_size'}, 'output': {0: 'batch_size'}}
)

print(f"Model successfully saved to {onnx_file_path}")

Model successfully saved to models/dqn_model.onnx


#### Optimize the ONNX model using TensorRT

In [9]:
import tensorrt as trt
import pycuda.driver as cuda
import pycuda.autoinit

TRT_LOGGER = trt.Logger(trt.Logger.WARNING)

def build_engine(onnx_file_path, engine_file_path):
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network(1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)) as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
        config = builder.create_builder_config()
        config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 30)  # 1GB

        with open(onnx_file_path, 'rb') as model:
            if not parser.parse(model.read()):
                print('Failed to parse the ONNX file.')
                for error in range(parser.num_errors()):
                    print(parser.get_error(error))
                return None

        # Check and print network inputs and outputs
        print(f"Number of network inputs: {network.num_inputs}")
        for i in range(network.num_inputs):
            input = network.get_input(i)
            print(f"Input {i}: {input.name}, shape: {input.shape}, dtype: {input.dtype}")

        print(f"Number of network outputs: {network.num_outputs}")
        for i in range(network.num_outputs):
            output = network.get_output(i)
            print(f"Output {i}: {output.name}, shape: {output.shape}, dtype: {output.dtype}")

        # Mark the input and output for optimization profile
        input_tensor = network.get_input(0)
        profile = builder.create_optimization_profile()
        profile.set_shape(input_tensor.name, (1, 50), (16, 50), (32, 50))
        config.add_optimization_profile(profile)

        # Attempt to build the engine and catch any errors
        try:
            serialized_engine = builder.build_serialized_network(network, config)
            if serialized_engine is None:
                print("Failed to build the engine.")
                return None

            with open(engine_file_path, 'wb') as f:
                f.write(serialized_engine)
            return serialized_engine

        except Exception as e:
            print(f"Error during engine build: {e}")
            return None

onnx_file_path = 'models/dqn_model.onnx'
engine_file_path = 'models/dqn_model.trt'

engine = build_engine(onnx_file_path, engine_file_path)
if engine:
    print(f"Model successfully optimized and saved to {engine_file_path}")
else:
    print("Failed to optimize the model.")

Number of network inputs: 1
Input 0: input, shape: (-1, 50), dtype: DataType.FLOAT
Number of network outputs: 1
Output 0: output, shape: (-1, 10), dtype: DataType.FLOAT
Model successfully optimized and saved to models/dqn_model.trt


#### Perform inference with the optimized TensorRT model

In [11]:
import numpy as np
import pycuda.driver as cuda
import pycuda.autoinit
import tensorrt as trt
import gc  # Import garbage collector

TRT_LOGGER = trt.Logger(trt.Logger.WARNING)

def allocate_buffers(engine, context, batch_size=1):
    inputs = []
    outputs = []
    bindings = []
    stream = cuda.Stream()

    for binding in range(engine.num_io_tensors):
        name = engine.get_tensor_name(binding)
        shape = engine.get_tensor_shape(name)
        
        # Replace dynamic dimensions with the actual batch size
        shape = [batch_size if dim == -1 else dim for dim in shape]
        
        dtype = trt.nptype(engine.get_tensor_dtype(name))
        
        print(f"Binding {binding}: {name}, shape: {shape}, dtype: {dtype}")
        
        size = trt.volume(shape)
        
        # Check available GPU memory
        free_mem, total_mem = cuda.mem_get_info()
        print(f"Available GPU Memory: {free_mem} bytes, Total GPU Memory: {total_mem} bytes")

        print(f"Memory Needed: {size * np.dtype(dtype).itemsize}")
        
        # Ensure enough memory is available
        if free_mem < size * np.dtype(dtype).itemsize:
            raise MemoryError("Not enough GPU memory to allocate buffer")

        host_mem = cuda.pagelocked_empty(size, dtype)
        device_mem = cuda.mem_alloc(host_mem.nbytes)
        
        bindings.append(int(device_mem))
        if engine.get_tensor_mode(name) == trt.TensorIOMode.INPUT:
            inputs.append((host_mem, device_mem))
        else:
            outputs.append((host_mem, device_mem))
    
    return inputs, outputs, bindings, stream

def do_inference(context, bindings, inputs, outputs, stream, batch_size=1):
    # Transfer input data to the GPU
    for host_mem, device_mem in inputs:
        cuda.memcpy_htod(device_mem, host_mem)
    
    # Run inference
    context.execute_v2(bindings)
    
    # Transfer predictions back from the GPU
    for host_mem, device_mem in outputs:
        cuda.memcpy_dtoh(host_mem, device_mem)
    
    # Synchronize the stream
    stream.synchronize()
    
    return [host_mem for host_mem, device_mem in outputs]

# Load the TensorRT engine
with open('models/dqn_model.trt', 'rb') as f, trt.Runtime(TRT_LOGGER) as runtime:
    serialized_engine = f.read()
    engine = runtime.deserialize_cuda_engine(serialized_engine)

context = engine.create_execution_context()

# Set the input shape dynamically
input_shape = (1, env.state_size)  # Assuming batch size of 1
context.set_input_shape('input', input_shape)

inputs, outputs, bindings, stream = allocate_buffers(engine, context, batch_size=input_shape[0])

# Example input
input_data = np.random.random(input_shape).astype(np.float32)
inputs[0][0][:] = input_data.ravel()  # Filling the host input buffer

# Print the size and type of the input data
print(f"Input data shape: {input_data.shape}, size: {input_data.size}, dtype: {input_data.dtype}")

# Run garbage collection to free up memory
gc.collect()

# Perform inference
output = do_inference(context, bindings, inputs, outputs, stream)
print(output)

Binding 0: input, shape: [1, 50], dtype: <class 'numpy.float32'>
Available GPU Memory: 7229931520 bytes, Total GPU Memory: 8585084928 bytes
Memory Needed: 200
Binding 1: output, shape: [1, 10], dtype: <class 'numpy.float32'>
Available GPU Memory: 7229931520 bytes, Total GPU Memory: 8585084928 bytes
Memory Needed: 40
Input data shape: (1, 50), size: 50, dtype: float32
[array([ 0.10805288,  0.06681406,  0.12057865, -0.00891978, -0.10379699,
        0.16202234,  0.09430063,  0.08437087,  0.02555529, -0.29642096],
      dtype=float32)]


#### Compare PyTorch and TensorRT Models

In [12]:
import time
import tracemalloc
import os
import psutil

# Measure throughput
def measure_throughput(model, input_data, device, num_iterations=1000):
    start_time = time.time()
    with torch.no_grad():
        for _ in range(num_iterations):
            _ = model(input_data)
    end_time = time.time()
    return num_iterations / (end_time - start_time)

def measure_trt_throughput(context, bindings, inputs, outputs, stream, num_iterations=1000):
    start_time = time.time()
    for _ in range(num_iterations):
        _ = do_inference(context, bindings, inputs, outputs, stream)
    end_time = time.time()
    return num_iterations / (end_time - start_time)

# Measure memory usage
def measure_memory_usage(model, input_data, device):
    tracemalloc.start()
    with torch.no_grad():
        _ = model(input_data)
    current, peak = tracemalloc.get_traced_memory()
    tracemalloc.stop()
    return current, peak

# Measure latency
def measure_latency(model, input_data, device, num_iterations=1000):
    latencies = []
    with torch.no_grad():
        for _ in range(num_iterations):
            start_time = time.time()
            _ = model(input_data)
            latencies.append(time.time() - start_time)
    return np.mean(latencies), np.std(latencies)

def measure_trt_latency(context, bindings, inputs, outputs, stream, num_iterations=1000):
    latencies = []
    for _ in range(num_iterations):
        start_time = time.time()
        _ = do_inference(context, bindings, inputs, outputs, stream)
        latencies.append(time.time() - start_time)
    return np.mean(latencies), np.std(latencies)

# Measure power consumption
def measure_power_consumption(pid, duration=60):
    process = psutil.Process(pid)
    start_time = time.time()
    start_energy = process.cpu_times().user + process.cpu_times().system
    time.sleep(duration)
    end_energy = process.cpu_times().user + process.cpu_times().system
    end_time = time.time()
    power_consumption = (end_energy - start_energy) / (end_time - start_time)
    return power_consumption

# Measure PyTorch throughput
pytorch_throughput = measure_throughput(original_model, torch_input, device)

# Measure TensorRT throughput
trt_throughput = measure_trt_throughput(context, bindings, inputs, outputs, stream)

# Measure memory usage for PyTorch
pytorch_memory_current, pytorch_memory_peak = measure_memory_usage(original_model, torch_input, device)

# Measure latency for PyTorch
pytorch_latency_mean, pytorch_latency_std = measure_latency(original_model, torch_input, device)

# Measure latency for TensorRT
trt_latency_mean, trt_latency_std = measure_trt_latency(context, bindings, inputs, outputs, stream)

# Measure power consumption
pid = os.getpid()
pytorch_power_consumption = measure_power_consumption(pid)

# Compare accuracy
def compare_accuracy(pytorch_model, trt_context, trt_bindings, trt_inputs, trt_outputs, trt_stream, input_data, device):
    torch_input = torch.tensor(input_data).to(device)
    with torch.no_grad():
        pytorch_output = pytorch_model(torch_input).cpu().numpy()
    trt_inputs[0][0][:] = input_data.ravel()
    trt_output = do_inference(trt_context, trt_bindings, trt_inputs, trt_outputs, trt_stream)
    return pytorch_output, trt_output[0]

pytorch_output, trt_output = compare_accuracy(original_model, context, bindings, inputs, outputs, stream, input_data, device)
mae = np.mean(np.abs(pytorch_output - trt_output))
mse = np.mean((pytorch_output - trt_output) ** 2)

# Print results
print(f"PyTorch Throughput: {pytorch_throughput:.2f} inferences/second")
print(f"TensorRT Throughput: {trt_throughput:.2f} inferences/second")
print(f"PyTorch Memory Usage: Current = {pytorch_memory_current} bytes, Peak = {pytorch_memory_peak} bytes")
print(f"PyTorch Latency: Mean = {pytorch_latency_mean:.6f} seconds, Std = {pytorch_latency_std:.6f} seconds")
print(f"TensorRT Latency: Mean = {trt_latency_mean:.6f} seconds, Std = {trt_latency_std:.6f} seconds")
print(f"PyTorch Power Consumption: {pytorch_power_consumption:.6f} watts")
print(f"Mean Absolute Error between PyTorch and TensorRT outputs: {mae}")
print(f"Mean Squared Error between PyTorch and TensorRT outputs: {mse}")

PyTorch Throughput: 1062.21 inferences/second
TensorRT Throughput: 8430.12 inferences/second
PyTorch Memory Usage: Current = 88 bytes, Peak = 728 bytes
PyTorch Latency: Mean = 0.000534 seconds, Std = 0.001903 seconds
TensorRT Latency: Mean = 0.000114 seconds, Std = 0.001975 seconds
PyTorch Power Consumption: 0.000000 watts
Mean Absolute Error between PyTorch and TensorRT outputs: 2.5729090339154936e-05
Mean Squared Error between PyTorch and TensorRT outputs: 1.0547458462184522e-09
