In [None]:
import gym
import d4rl # Import required to register environments
import numpy as np
import matplotlib.pyplot as plt
import numpy as np
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans

import seaborn as sns
from sklearn.manifold import TSNE

In [None]:
sns.set_style("ticks")
sns.set_context("paper", 1.5, {"lines.linewidth": 2})

In [None]:
name = "halfcheetah-medium-v2"
env = gym.make(name)
dataset = env.get_dataset()
rewards = dataset['rewards']
actions = dataset['actions']
observations = dataset['observations']

# Frequency Histogram

In [None]:
for i in range(actions.shape[1]):
    plt.hist(actions[:, i])
    plt.show()

# Clustering

## Split data into that with large reward and that with small reward

In [None]:
trajectory_set = np.array([rewards[i * 1000: (i + 1) * 1000] for i in range(len(rewards) // 1000)])
trajectory_mean = trajectory_set.mean(axis=0)
trajectory_std = trajectory_set.std(axis=0)

x = np.arange(len(rewards) // 1000)
trajectory_lower = trajectory_mean - trajectory_std
trajectory_upper = trajectory_mean + trajectory_std

plt.plot(trajectory_mean)
plt.fill_between(x, trajectory_lower, trajectory_upper, alpha=0.3)
plt.vlines(x=100, ymin=min(trajectory_lower) - 1, ymax=max(trajectory_upper) + 1, label='step 100')
plt.ylim(min(trajectory_lower) - 1, max(trajectory_upper) + 1)
plt.ylabel('Reward')
plt.xlabel('Step')
plt.legend()
plt.show()

## T-SNE

In [None]:
max_length = 5000
threshold = 100
X = observations[:max_length, :]
labels = []
for step in range(max_length):
    offset = step % 1000
    if step % 1000 < threshold:
        labels.append(0)
    else:
        labels.append(1)

tsne = TSNE(n_components=2, random_state=41)
X_reduced = tsne.fit_transform(X)

plt.scatter(X_reduced[:, 0], X_reduced[:, 1],
            c=labels, cmap='jet',
            s=15, alpha=0.5)
plt.axis('off')
plt.show()

# Plot as 1-Dimensional Time Seriese Data

In [None]:
dimensionality_reduction = 'pca'

if dimensionality_reduction == 'pca':
    action_feature = PCA(n_components=1).fit(actions).transform(actions)
    state_feature = PCA(n_components=1).fit(observations).transform(observations)
elif dimensionality_reduction == 'kmeans':
    action_feature = KMeans(n_clusters=10).fit(actions).fit_predict(actions)
    state_feature = KMeans(n_clusters=10).fit(observations).fit_predict(observations)
else:
    print('no such feature')

plt.figure(figsize=(20, 10))
plt.scatter(np.arange(len(rewards[:10000])), rewards[:10000])
plt.show()