1. Generate expert dataset from csv file

In [None]:
import pandas as pd
from tianshou.data import ReplayBuffer, Batch
import numpy as np
import pickle

# Load the CSV file
df = pd.read_csv('pendulum_data_new.csv')

# Initialize the Replay Buffer
buffer_size = len(df)
expert_buffer = ReplayBuffer(buffer_size)

# Populate the Replay Buffer
for i in range(buffer_size):
    obs = np.array(df.loc[i, 'observations'][1:-1].split(', '), dtype=float)
    act = np.array([df.loc[i, 'actions']], dtype=float)
    rew = df.loc[i, 'rewards']
    done = df.loc[i, 'terminals']
    next_obs = np.array(df.loc[i, 'next_observations'][1:-1].split(', '), dtype=float)
    
    # Assuming no truncation information is available, use the done flag for terminated
    # and False for truncated (change this based on your specific case)
    terminated = done
    truncated = False

    expert_buffer.add(
        Batch(
            obs=obs,
            act=act,
            rew=rew,
            done=done,
            obs_next=next_obs,
            terminated=terminated,
            truncated=truncated
        )
    )
print("Data loaded into Tianshou Replay Buffer.")

Check data format in csv file (obsolete)

In [None]:
# Process and save the data
transition_data = []
for i in range(len(expert_buffer)):
    transition = expert_buffer[i]
    print("--- Transition {} ---".format(i))
    print(transition.obs)
    print(transition.act)
    print(transition.rew)
    print(transition.done)
    print(transition.obs_next)
    print("--- End of Transition {} ---".format(i))
    transition_data.append(
        Batch(
            obs=transition.obs,
            act=transition.act,
            rew=transition.rew,
            done=transition.done,
            obs_next=transition.obs_next
        )
    )

Save dataset to pickle file

In [None]:
# Test saving the data to a pickle file
with open("test_expert_dataset.pkl", "wb") as f:
    pickle.dump(expert_buffer, f)
    print("Data saved to test_expert_dataset.pkl")

Check dataset structure

In [None]:
# expert_buffer._meta
# print(expert_buffer._meta)
print(expert_buffer._meta.__dict__.keys())
print(expert_buffer._reserved_keys)
# expert_buffer_act = expert_buffer._meta.__dict__['act'].astype(np.float32)
# print(expert_buffer_act)

2. Check data format in read from pickle file

In [None]:
def load_buffer(file_name):
    try:
        with open(file_name, "rb") as f:
            buffer = pickle.load(f)
        print("Loaded expert buffer from {}".format(file_name))
        return buffer
    except FileNotFoundError:
        print("File not found: {}".format(file_name))
        return None
    except Exception as e:
        print("An error occurred while loading the buffer: {}".format(e))
        return None

# Specify the file name directly
file_name = "expert_SAC_Pendulum-v1.pkl"
# file_name = "expert_SAC_JModelicaCSSingleZoneEnv-action-v2.pkl"

# Load the buffer
buffer = load_buffer(file_name)

# Print the attributes of the buffer
print(buffer._meta.__dict__.keys())
print(buffer._meta.__dict__['obs'].astype(np.float32))

In [None]:
# Load the pickle file
file_name = "expert_SAC_Pendulum-v1.pkl"  # Replace with your pickle file name
with open(file_name, "rb") as f:
    loaded_data = pickle.load(f)

# Initialize the Replay Buffer
buffer_size = len(loaded_data)
expert_buffer = ReplayBuffer(buffer_size)

# Populate the Replay Buffer
for data in loaded_data:
    # If data is already a Batch object or similar
    expert_buffer.add(data)
    # If data is not a Batch object, you need to convert it. Example:
    # expert_buffer.add(
    #     Batch(
    #         obs=data['obs'],
    #         act=data['act'],
    #         rew=data['rew'],
    #         done=data['done'],
    #         obs_next=data['obs_next'],
    #         terminated=data.get('terminated', data['done']),  # Use 'done' if 'terminated' is not available
    #         truncated=data.get('truncated', False)  # Default to False if not available
    #     )
    # )

print("Data loaded into Tianshou Replay Buffer.")

In [None]:
expert_buffer

# Print the attributes of the buffer
print(expert_buffer._meta.__dict__.keys())
# print(new_vector_buffer._meta.__dict__['obs'].astype(np.float32))

# print('====Fowlloing is for checking====')
# # Example: Inspecting the first few transitions in each sub-buffer
# for buf_index, buf in enumerate(new_vector_buffer.buffers):
#     print(f"Buffer {buf_index}:")
#     for i in range(min(len(buf), 5)):  # Print first 5 transitions of each buffer
#         transition = buf[i]
#         print(f"  Transition {i}: {transition}")
#     print()  # Blank line for readability

In [None]:
# Test saving the data to a pickle file
with open("test_expert_dataset_Pendulum-v1.pkl", "wb") as f:
    pickle.dump(expert_buffer, f)
    print("Data saved to test_expert_dataset_Pendulum-v1.pkl")

Generate vectortized reply buffer/dataset from pickle file

In [None]:
from tianshou.data import VectorReplayBuffer, ReplayBuffer
import pickle

# Load the pickle file
file_name = "expert_SAC_Pendulum-v1.pkl"  # Replace with your pickle file name
with open(file_name, "rb") as f:
    original_vector_buffer = pickle.load(f)

# Initialize a new VectorReplayBuffer with the same number of buffers and buffer size
num_buffers = len(original_vector_buffer.buffers)
print("Number of buffers: {}".format(num_buffers))
buffer_size = len(original_vector_buffer)
print("Buffer size: {}".format(buffer_size))
new_vector_buffer = VectorReplayBuffer(buffer_size, num_buffers)

# Populate the new VectorReplayBuffer
for buf_index, buf in enumerate(original_vector_buffer.buffers):
    for transition in buf:
        # Add directly to the corresponding buffer
        new_vector_buffer.buffers[buf_index].add(transition)

print("Data loaded into new VectorReplayBuffer.")

In [None]:
new_vector_buffer

# Print the attributes of the buffer
print(new_vector_buffer.buffers[0]._meta.__dict__.keys())
# print(new_vector_buffer._meta.__dict__['obs'].astype(np.float32))

# print('====Fowlloing is for checking====')
# # Example: Inspecting the first few transitions in each sub-buffer
# for buf_index, buf in enumerate(new_vector_buffer.buffers):
#     print(f"Buffer {buf_index}:")
#     for i in range(min(len(buf), 5)):  # Print first 5 transitions of each buffer
#         transition = buf[i]
#         print(f"  Transition {i}: {transition}")
#     print()  # Blank line for readability

In [None]:
# Test saving the data to a pickle file
with open("test_expert_dataset_Pendulum-v1.pkl", "wb") as f:
    pickle.dump(new_vector_buffer, f)
    print("Data saved to test_expert_dataset_Pendulum-v1.pkl")

Read expert dataset from saved pickle file

In [None]:
import pickle
# from tianshou.data import VectorReplayBuffer  # Uncomment if needed

# Path to your pickle file containing the VectorReplayBuffer
pickle_file_path = 'expert_SAC_Pendulum-v1.pkl'

# Load the buffer from the pickle file
with open(pickle_file_path, 'rb') as file:
    buffer = pickle.load(file) # vectorized replay buffer, a VectorReplayBuffer object (used for the next two cells)
    buffer = buffer.buffers[0] # One of the replay buffers in the vectorized replay buffer (used for this cell only)

# Print the buffer metadata
print(dir(buffer)) 

# Print the keys of the metadata
print(buffer._meta.__dict__.keys())

# Check and print if done and truncated and terminated are all False, print the index that is True
# for i in range(len(buffer)):
#     if buffer._meta.__dict__['done'][i] or buffer._meta.__dict__['truncated'][i] or buffer._meta.__dict__['terminated'][i]:
#         print(i)
#         print(buffer._meta.__dict__['done'][i])
#         print(buffer._meta.__dict__['truncated'][i])
#         print(buffer._meta.__dict__['terminated'][i])
#         break

# Print the index of true in the done, truncated and terminated
print(np.where(buffer._meta.__dict__['done']))
print(np.where(buffer._meta.__dict__['truncated']))
print(np.where(buffer._meta.__dict__['terminated']))

# Print all transitions when done is True
for i in range(len(buffer)):
    if buffer._meta.__dict__['done'][i]:
        print("--- Transition {} ---".format(i))
        print(buffer._meta.__dict__['obs'][i])
        print(buffer._meta.__dict__['act'][i])
        print(buffer._meta.__dict__['rew'][i])
        print(buffer._meta.__dict__['done'][i])
        print(buffer._meta.__dict__['obs_next'][i])
        print("--- End of Transition {} ---".format(i))

Following is based on vectorized replay buffer objective

In [None]:
# Using __len__ to get the current size of the buffer
buffer_size = len(buffer)
print(f"Current Size of the Buffer: {buffer_size}")

# Using maxsize to get the maximum capacity of the buffer
buffer_capacity = buffer.maxsize
print(f"Maximum Capacity of the Buffer: {buffer_capacity}")

# Additional metadata
buffer_num = buffer.buffer_num
print(f"Number of Buffers: {buffer_num}")

stack_num = buffer.stack_num
print(f"Number of Stacked Frames: {stack_num}")

# Access the first buffer (if it's a VectorReplayBuffer)
single_buffer = buffer.buffers[0]

# Check if the buffer is not empty
if len(single_buffer) > 0:
    # Accessing the first transition
    # This depends on how the data is structured in your buffer
    # For example, if transitions are stored in a list-like structure
    first_transition = single_buffer[0]

    # Now, print the contents of the first transition
    print("First Transition Contents:")
    for key, value in first_transition.items():
        print(f"{key}: {value}")
else:
    print("The buffer is empty.")

Access the expert dataset in a single buffer

In [None]:
# Access a specific buffer from the VectorReplayBuffer
specific_buffer = buffer.buffers[0]

# Now you can print this buffer or inspect its contents
print("Specific Buffer:", specific_buffer)

# To inspect details of this buffer, you can use dir() or access its attributes
# For example, printing the size of this specific buffer
print("Size of the Specific Buffer:", len(specific_buffer))