# Signatures and RL

In [None]:
%run base.ipynb

import gym
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import sys
import torch
import tqdm

import reinforcement_learning
import utils

In [None]:
env = gym.make('MountainCar-v0')

steps = 300
episodes = 2000

## Random play exploration

A random play exploration, unsurprisingly, does not tend to work.

In [None]:
random_policy = reinforcement_learning.RandomPolicy(env)
successes = [reinforcement_learning.play(env, random_policy, steps, render=False)[1] 
             for _ in tqdm.trange(episodes, file=sys.stdout)]
print("Number of successes: {}".format(sum(successes)))

## Training a policy

In [None]:
sigpolicy = reinforcement_learning.SigPolicy(env)
rnnpolicy = reinforcement_learning.RNNPolicy(env)

In [None]:
sighistory = reinforcement_learning.train(env, sigpolicy, steps, episodes)

In [None]:
rnnhistory = reinforcement_learning.train(env, rnnpolicy, steps, episodes)

## Plot Results

In [None]:
tuple(utils.count_parameters(x) for x in (sigpolicy, rnnpolicy))

In [None]:
plt.figure(2, figsize=[10,5])

sigp = pd.Series(sighistory[2])
sigma = sigp.rolling(100).mean()
plt.plot(sigma, label="Signatures")
rnnp = pd.Series(rnnhistory[2])
rnnma = rnnp.rolling(100).mean()
plt.plot(rnnma, label="RNN")

plt.xlabel('Generation')
plt.ylabel('Final position')
plt.legend(mode='expand', bbox_to_anchor=(0, 1, 1, 0), ncol=3, prop={'size': 16})
plt.show()

## Play

In [None]:
states, success = reinforcement_learning.play(env, sigpolicy, steps, render=True)
print(f"Success: {success}")