In [1]:
import gym

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import sklearn
from sklearn.metrics import mean_squared_error, r2_score

import pickle

import os

In [2]:
torch.cuda.empty_cache()

# Check GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [3]:
# Initialize environment
#env_name = 'CartPole-v1'
#env_name = 'MountainCarContinuous-v0'
#env_name = 'MountainCar-v0'
env_name = 'Pendulum-v1'

#Defining bounds for each state variable
bounds = {'CartPole-v1' : [[-4.8, 4.8], ['-inf', 'inf'], [-0.418, 0.418], ['-inf', 'inf']], 
        'MountainCar-v0' : [['-inf', 'inf'], ['-inf', 'inf']],
          'MountainCarContinuous-v0' : [['-inf', 'inf'], ['-inf', 'inf']],
          'Pendulum-v1' : [[-1, 1], [-1, 1], [-8, 8]]
         }

# Create the CartPole environment
env = gym.make(env_name)

num_samples = 50000

# Store the data from environment in the form (state,action)-> next state

In [4]:
def add_noise(state, terminated, bound):
    # Add noise to the next state
    out_bound = True
#     print(bound)
#     print(type(bound))
#     print(type(bound[0]))
#     print(type(bound[0][0]))
    while out_bound:
        noisy_state = state + np.random.normal(loc=0, scale=1e-8, size=state.shape)  # You can adjust the scale of noise as needed
        for i in range(len(bound)):
            if (isinstance(bound[i][0], str) or state[i] > bound[i][0]) and (isinstance(bound[i][1], str) or state[i] < bound[i][1]):
                out_bound = False
            if (noisy_state[i] - state[i]) / state[i] > 0.1:
                out_bound = True
        if out_bound:
            print(state)
            print(noisy_state)
        if terminated:
            out_bound = False
    return noisy_state

# Function to generate episodes
def generate_episodes(num_samples, max_episode_length, bound):
    episodes = []
    episode_inputs = []
    episode_outputs = []
    samples = []
    sample_count = 0
    state_size = env.observation_space.shape[0]

    while sample_count < num_samples:
        state = env.reset()[0]
        done = False
        episode_length = 0

        while episode_length < max_episode_length:
            # Choose a random action for exploration
            action = env.action_space.sample()
            next_state, reward, terminated, truncated , info = env.step(action)
            
            #print(type(action))
            #print(len(action))
            
            if isinstance(action, int):
                input_tensor = torch.tensor(np.concatenate((state, [action])), dtype=torch.float32)
            elif isinstance(action, np.ndarray):
                input_tensor = torch.tensor(np.concatenate((state, action)), dtype=torch.float32)
            # Concatenate the current state and action
            
            next_state = add_noise(next_state, terminated, bound)
            
            output_tensor = torch.tensor(np.concatenate((next_state, [reward], [float(terminated)])), dtype=torch.float32)
            
            sample = list(state) + [str(action)] + [reward] + list(next_state)
            #print(sample)
            samples.append(sample)
            #output_tensor = torch.tensor(next_state, dtype=torch.float32)

            episode_inputs.append(input_tensor)
            episode_outputs.append(output_tensor)
            
            sample_count = sample_count + 1
            episode_length = episode_length + 1

            if terminated:
                break
            state = next_state
        

        episodes.append((episode_inputs, episode_outputs))
    episode_inputs = torch.stack(episode_inputs)
    episode_outputs = torch.stack(episode_outputs)
    df = pd.DataFrame(samples)
    
    columns = [f'variable_{i}' for i in range(state_size)] + ['action','reward'] + [f'nx_variable_{i}' for i in range(state_size)]
    print(columns)
    columns = df.columns
    return episode_inputs, episode_outputs, df




# Generating Samples for all four environments with 10k, 20k, 30k, 40k, 50k training samples.

In [5]:
env_names = ['CartPole-v1', 'MountainCarContinuous-v0', 'MountainCar-v0', 'Pendulum-v1']
# env_names = ['CartPole-v1']

for env_name in env_names:

    env = gym.make(env_name)

    for num_samples in range(10000, 55000, 10000):
        episodes_input, episodes_output, df = generate_episodes(num_samples, 200, bounds[env_name])

        num_training_samples = int(0.8 * len(episodes_input))

        train_x = episodes_input[1:num_training_samples]
        train_y = episodes_output[1:num_training_samples]

        test_x = episodes_input[num_training_samples:]
        test_y = episodes_output[num_training_samples:]


        print(len(episodes_input))
        print(len(episodes_output))

        train_dataset = torch.utils.data.TensorDataset(train_x, train_y)
        test_dataset = torch.utils.data.TensorDataset(test_x, test_y)
        
        # Save datasets
        with open(env_name + '_' + str(int(num_samples/1000)) + 'k_train_dataset.pkl', 'wb') as f:
            pickle.dump(train_dataset, f)

        with open(env_name + '_' + str(int(num_samples/1000)) + 'k_test_dataset.pkl', 'wb') as f:
            pickle.dump(test_dataset, f)

        df.to_csv(env_name + '_' + str(int(num_samples/1000)) + 'k_sample.csv', index=False)
        
#         break
#     break

  if not isinstance(terminated, (bool, np.bool8)):


['variable_0', 'variable_1', 'variable_2', 'variable_3', 'action', 'reward', 'nx_variable_0', 'nx_variable_1', 'nx_variable_2', 'nx_variable_3']
10006
10006


## Testing values in stored Dataloader

In [51]:
num_samples = 50000

# Load the training dataset
with open(env_name + '_' + str(int(num_samples/1000)) + 'k_train_dataset.pkl', 'rb') as f:
    train_dataset = pickle.load(f)

# Load the testing dataset
with open(env_name + '_' + str(int(num_samples/1000)) + 'k_test_dataset.pkl', 'rb') as f:
    test_dataset = pickle.load(f)

# Function to print the first 5 values of the dataset
def print_first_5_values(dataset, dataset_name):
#     print(f"First 5 values of {dataset_name}:")
    count  = 0
    print(len(dataset))
    for i in range(11855, 11865):
#         print(dataset[i])
        input_tensor, output_tensor = dataset[i]
        if output_tensor[-1] == 0:
            count = count+1
        print(f"Sample {i + 1}:")
        print("Input:", input_tensor)
        print("Output:", output_tensor)
#     print(dataset[1])
    print(count)

# Print the first 5 values of the training dataset
print_first_5_values(train_dataset, "training dataset")

# Print the first 5 values of the testing dataset
# print_first_5_values(test_dataset, "testing dataset")

39999
Sample 11856:
Input: tensor([-1.6052,  2.3752,  6.8472, -0.7606])
Output: tensor([ 1.5264,  1.0805,  6.2406, -7.2393,  0.0000])
Sample 11857:
Input: tensor([ 1.5264,  1.0805,  6.2406, -1.9458])
Output: tensor([ 1.2596,  0.0835,  7.9672, -9.3669,  0.0000])
Sample 11858:
Input: tensor([ 1.2596,  0.0835,  7.9672, -1.0572])
Output: tensor([ -1.3964,   1.1590,   6.9604, -11.4406,   0.0000])
Sample 11859:
Input: tensor([-1.3964,  1.1590,  6.9604,  0.1044])
Output: tensor([ -0.6795,   1.1073,   8.0195, -13.7633,   0.0000])
Sample 11860:
Input: tensor([-0.6795,  1.1073,  8.0195, -1.3090])
Output: tensor([ -1.4767,  -1.6258,  11.1175, -14.3073,   0.0000])
Sample 11861:
Input: tensor([-1.4767, -1.6258, 11.1175,  0.7687])
Output: tensor([  0.0296,  -0.8379,   6.5337, -11.8767,   0.0000])
Sample 11862:
Input: tensor([ 0.0296, -0.8379,  6.5337, -0.1866])
Output: tensor([-1.0686, -0.4587,  7.9813, -9.8577,  0.0000])
Sample 11863:
Input: tensor([-1.0686, -0.4587,  7.9813, -0.8348])
Output: tens

### Finding Max and Min Bounds

In [41]:
# Initialize CartPole environment
#env_name = 'CartPole-v1'
#env_name = 'MountainCarContinuous-v0'
#env_name = 'MountainCar-v0'
env_name = 'Pendulum-v1'

# Create the CartPole environment
env = gym.make(env_name)

#num_samples = 10000

In [7]:
state_size = env.observation_space.shape[0]

max_value = 0
min_value = 0


for num_samples in range(50000, 60000, 10000):
    with open(env_name + '_' + str(int(num_samples/1000)) + 'k_train_dataset.pkl', 'rb') as f:
        train_dataset = pickle.load(f)

    with open(env_name + '_' + str(int(num_samples/1000)) + 'k_test_dataset.pkl', 'rb') as f:
        test_dataset = pickle.load(f)
        
    
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=len(train_dataset), shuffle=True)
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=len(test_dataset), shuffle=False)
    
#     print("Train Data")

    for data in train_loader:
        inputs, labels = data
        
        max_input = (inputs.max(dim=0)[0])
        max_label = (labels.max(dim=0)[0])
        
        print('max_input', max_input)
        print('max_label', max_label)
        
        max_input = max_input[:state_size]
        max_label = max_label[:-2]

#         print('max_input', max_input)
#         print('max_label', max_label)

        max_all_state = torch.max(max_input, max_label)

#         print('max_all_state', max_all_state)

        if isinstance(max_value, int):
#             max_value = max_all_state
            max_value = max_input
        else:
#             max_value = torch.max(max_all_state, max_value)
            max_value = torch.max(max_input, max_value)
#             print('max_value', max_value)
            
        min_input = (inputs.min(dim=0)[0])
        min_label = (labels.min(dim=0)[0])
        
        print('min_input', min_input)
        print('min_label', min_label)
        
        min_input = min_input[:state_size]
        min_label = min_label[:-2]

#         print('max_input', min_input)
#         print('max_label', min_label)

        min_all_state = torch.min(min_input, min_label)

#         print('max_all_state', min_all_state)

        if isinstance(min_value, int):
#             min_value = min_all_state
            min_value = min_input
        else:
#             min_value = torch.min(min_all_state, min_value)
            min_value = torch.min(min_input, min_value)
#             print('min_value', min_value)
    
#     print("Test Data")
    for data in test_loader:
        inputs, labels = data
        
        max_input = (inputs.max(dim=0)[0])[:state_size]
        max_label = (labels.max(dim=0)[0])[:-2]

#         print('max_input', max_input)
#         print('max_label', max_label)

        max_all_state = torch.max(max_input, max_label)

#         print('max_all_state', max_all_state)

#         max_value = torch.max(max_all_state, max_value)
        max_value = torch.max(max_input, max_value)
#         print('max_value', max_value)   
        
        min_input = (inputs.min(dim=0)[0])
        min_label = (labels.min(dim=0)[0])
        
#         print('min_input', min_input)
#         print('min_label', min_label)
        
        min_input = min_input[:state_size]
        min_label = min_label[:-2]

#         print('max_input', min_input)
#         print('max_label', min_label)

        min_all_state = torch.min(min_input, min_label)

#         print('max_all_state', min_all_state)

#        min_value = torch.min(min_all_state, min_value)
        min_value = torch.max(min_input, min_value)
#         print('min_value', min_value)
        

print(min_value)
print(max_value)

max_input tensor([ 4.9642,  4.4407, 11.1886,  2.0000])
max_label tensor([ 4.9642e+00,  4.4407e+00,  1.1189e+01, -2.9544e-04,  0.0000e+00])
min_input tensor([ -5.1541,  -4.7110, -11.1756,  -2.0000])
min_label tensor([ -5.1541,  -4.7110, -11.1756, -16.2625,   0.0000])
tensor([ -4.5428,  -4.1662, -10.3733])
tensor([ 4.9642,  4.5456, 11.1886])


In [6]:
train_dataset = torch.utils.data.TensorDataset(train_x, train_y)
test_dataset = torch.utils.data.TensorDataset(test_x, test_y)

# Save datasets
with open(env_name + '_' + str(int(num_samples/1000)) + 'k_train_dataset.pkl', 'wb') as f:
    pickle.dump(train_dataset, f)

with open(env_name + '_' + str(int(num_samples/1000)) + 'k_test_dataset.pkl', 'wb') as f:
    pickle.dump(test_dataset, f)
    
df.to_csv(env_name + '_' + str(int(num_samples/1000)) + 'k_sample.csv', index=False)