In [1]:
import os, sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
from utils.helpers import launch_env, wrap_env, view_results_ipython, force_done
from utils.helpers import SteeringToWheelVelWrapper, ResizeWrapper, ImgWrapper

import numpy as np

import torch
import torch.nn as nn
from torch import optim

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

INFO:aido-protocols:aido-protocols 5.0.18
[2m17:29:18|[0mzj[2m|__init__.py:4|<module>(): [0m[32mzuper-ipce 5.1.0[0m
[2m17:29:18|[0mzuper-typing[2m|__init__.py:5|<module>(): [0m[32mzuper-typing 5.0.12[0m
[2m17:29:18|[0mzuper-commons[2m|__init__.py:9|<module>(): [0m[32mzuper-commons 5.0.6[0m
[2m17:29:18|[0mzuper-typing[2m|zeneric2.py:48|<module>(): [0m[32mIn Python 3.6[0m
[2m17:29:18|[0mzuper-nodes[2m|__init__.py:8|<module>(): [0m[32mzuper-nodes 5.0.8[0m


# Generating Data from a Teacher

In order to use imitation learning in practice, we need to have _demonstrations_. However, demonstrations need to be gathered; in general, we can collect the demonstrations that we need in one of four ways:

* Human demonstrator teleoperating the robot
* Data logs or historical data
* Learned policy (i.e from reinforcement learning) is rolled out
* Hard-coded expert is rolled out

While these trajectories can be gathered on real robots, to speed up collection, we work mainly in simulation. Duckietown has a [vast](https://logs.duckietown.org) collection of logs gathered over years of running programs on Duckiebots, but here, we focus on the last data collection method: a hard-coded expert.

**<font color='red'>Question 1:</font> What are some pros and cons of each approach? List two pros and two cons for each of the four methods listed above.**

We first introduce a _pure-pursuit expert_ - often, in robotic imitation learning, we have controllers to control many of our robots and systems; a pure-pursuit expert is about the simplest controller that we can have for a Duckiebot.

Our expert drives with ground-truth state data; while more complicated controllers incorporate and fuse observational data to estimate a state, we use data that'd a robot would not normally have access to.

In [64]:
class PurePursuitExpert:
    def __init__(self, env, ref_velocity=0.04, position_threshold=0.8, gain=10,
                 following_distance=0.3, max_iterations=1000):
        self.env = env.unwrapped
        self.following_distance = following_distance
        self.max_iterations = max_iterations
        self.ref_velocity = ref_velocity
        self.gain = gain
        self.position_threshold = position_threshold

    def predict(self, observation):  
        # Our expert drives with "cheating" data, something your implementation will not have access to
        closest_point, closest_tangent = self.env.closest_curve_point(self.env.cur_pos, self.env.cur_angle)

        iterations = 0
        lookup_distance = self.following_distance
        curve_point = None
        while iterations < self.max_iterations:
            # Project a point ahead along the curve tangent,
            # then find the closest point to to that
            follow_point = closest_point + closest_tangent * lookup_distance
            curve_point, _ = self.env.closest_curve_point(follow_point, self.env.cur_angle)

            # If we have a valid point on the curve, stop
            if curve_point is not None:
                break

            iterations += 1
            lookup_distance *= 0.5

        # Compute a normalized vector to the curve point
        point_vec = curve_point - self.env.cur_pos
        point_vec /= np.linalg.norm(point_vec)

        dot = np.dot(self.env.get_right_vec(), point_vec)
        steering = self.gain * -dot

        return self.ref_velocity, steering

In [65]:
nsteps = 2500

In [66]:
local_env = launch_env()
local_env = wrap_env(local_env)
local_env = ResizeWrapper(local_env)
local_env = ImgWrapper(local_env)

local_env.reset()
wrapper = SteeringToWheelVelWrapper()

# Create an demonstrator
expert = PurePursuitExpert(env=local_env,ref_velocity=0.4, position_threshold=0.8, 
                           gain=5, following_distance=0.8)

observations = []
actions = []

# Collect samples

for steps in range(0, nsteps):
    # use our 'expert' to predict the next action.
    action = expert.predict(None)
    action = wrapper.convert(action)
    observation, reward, done, info = local_env.step(action)
    observations.append(observation)
    actions.append(action)

    if done:
        local_env.reset()
        
local_env.close()

print('\nDone!\n')

[2m18:49:46|[0mgym-duckietown[2m|graphics.py:121|create_frame_buffers(): [0m[35mFalling back to non-multisampled frame buffer[0m
[2m18:49:46|[0mgym-duckietown[2m|graphics.py:121|create_frame_buffers(): [0m[35mFalling back to non-multisampled frame buffer[0m
[2m18:49:46|[0mgym-duckietown[2m|simulator.py:578|_load_map(): [0m[35mloading map file "/duckietown/simulation/src/gym_duckietown/maps/loop_empty.yaml"[0m
[2m18:49:46|[0mgym-duckietown[2m|simulator.py:1140|_drivable_pos(): [0m[35m[4.13719561 0.         1.88880899] corresponds to tile at (7, 3) which is not drivable: {'coords': (7, 3), 'kind': 'floor', 'angle': 0, 'drivable': False, 'texture': <simulation.src.gym_duckietown.graphics.Texture object at 0x7fb2c32047b8>, 'color': array([1, 1, 1])}[0m
[2m18:49:46|[0mgym-duckietown[2m|simulator.py:1250|_valid_pose(): [0m[35mInvalid pose. Collision free: True On drivable area: False[0m
[2m18:49:46|[0mgym-duckietown[2m|simulator.py:1251|_valid_pose(): [0m[3

[2m18:49:46|[0mgym-duckietown[2m|simulator.py:1250|_valid_pose(): [0m[35mInvalid pose. Collision free: True On drivable area: False[0m
[2m18:49:46|[0mgym-duckietown[2m|simulator.py:1251|_valid_pose(): [0m[35msafety_factor: 1.3[0m
[2m18:49:46|[0mgym-duckietown[2m|simulator.py:1252|_valid_pose(): [0m[35mpos: [3.57292165 0.         1.88314807][0m
[2m18:49:46|[0mgym-duckietown[2m|simulator.py:1253|_valid_pose(): [0m[35ml_pos: [3.48172154 0.         1.84866924][0m
[2m18:49:46|[0mgym-duckietown[2m|simulator.py:1254|_valid_pose(): [0m[35mr_pos: [3.66412176 0.         1.91762689][0m
[2m18:49:46|[0mgym-duckietown[2m|simulator.py:1255|_valid_pose(): [0m[35mf_pos: [3.61429624 0.         1.77370793][0m
[2m18:49:46|[0mgym-duckietown[2m|simulator.py:1140|_drivable_pos(): [0m[35m[3.50690495 0.         2.23654429] corresponds to tile at (5, 3) which is not drivable: {'coords': (5, 3), 'kind': 'floor', 'angle': 0, 'drivable': False, 'texture': <simulation.src.gy

[2m18:49:46|[0mgym-duckietown[2m|simulator.py:1255|_valid_pose(): [0m[35mf_pos: [3.50609039 0.         2.05544313][0m
[2m18:49:46|[0mgym-duckietown[2m|simulator.py:1140|_drivable_pos(): [0m[35m[4.12972415 0.         2.27350789] corresponds to tile at (7, 3) which is not drivable: {'coords': (7, 3), 'kind': 'floor', 'angle': 0, 'drivable': False, 'texture': <simulation.src.gym_duckietown.graphics.Texture object at 0x7fb2c32047b8>, 'color': array([1, 1, 1])}[0m
[2m18:49:46|[0mgym-duckietown[2m|simulator.py:1250|_valid_pose(): [0m[35mInvalid pose. Collision free: True On drivable area: False[0m
[2m18:49:46|[0mgym-duckietown[2m|simulator.py:1251|_valid_pose(): [0m[35msafety_factor: 1.3[0m
[2m18:49:46|[0mgym-duckietown[2m|simulator.py:1252|_valid_pose(): [0m[35mpos: [4.022444   0.         2.32019802][0m
[2m18:49:46|[0mgym-duckietown[2m|simulator.py:1253|_valid_pose(): [0m[35ml_pos: [3.98353555 0.         2.23079789][0m
[2m18:49:46|[0mgym-duckietown[2m|

[2m18:49:51|[0mgym-duckietown[2m|simulator.py:1250|_valid_pose(): [0m[35mInvalid pose. Collision free: True On drivable area: False[0m
[2m18:49:51|[0mgym-duckietown[2m|simulator.py:1251|_valid_pose(): [0m[35msafety_factor: 1.3[0m
[2m18:49:51|[0mgym-duckietown[2m|simulator.py:1252|_valid_pose(): [0m[35mpos: [2.38071712 0.         2.71640816][0m
[2m18:49:51|[0mgym-duckietown[2m|simulator.py:1253|_valid_pose(): [0m[35ml_pos: [2.28873639 0.         2.68406941][0m
[2m18:49:51|[0mgym-duckietown[2m|simulator.py:1254|_valid_pose(): [0m[35mr_pos: [2.47269785 0.         2.74874692][0m
[2m18:49:51|[0mgym-duckietown[2m|simulator.py:1255|_valid_pose(): [0m[35mf_pos: [2.41952363 0.         2.60603129][0m
[2m18:49:51|[0mgym-duckietown[2m|simulator.py:559|reset(): [0m[32mStarting at [2.64153703 0.         2.88468928] 4.836049502872963[0m
[2m18:50:15|[0mgym-duckietown[2m|simulator.py:1140|_drivable_pos(): [0m[35m[2.33902017 0.         2.82524844] correspon

[2m18:50:15|[0mgym-duckietown[2m|simulator.py:1253|_valid_pose(): [0m[35ml_pos: [3.36666698 0.         0.75300265][0m
[2m18:50:15|[0mgym-duckietown[2m|simulator.py:1254|_valid_pose(): [0m[35mr_pos: [3.32870397 0.         0.56173371][0m
[2m18:50:15|[0mgym-duckietown[2m|simulator.py:1255|_valid_pose(): [0m[35mf_pos: [3.23292411 0.         0.68014598][0m
[2m18:50:15|[0mgym-duckietown[2m|simulator.py:1140|_drivable_pos(): [0m[35m[3.30655235 0.         1.18763146] corresponds to tile at (5, 2) which is not drivable: {'coords': (5, 2), 'kind': 'floor', 'angle': 0, 'drivable': False, 'texture': <simulation.src.gym_duckietown.graphics.Texture object at 0x7fb2c32047b8>, 'color': array([1, 1, 1])}[0m
[2m18:50:15|[0mgym-duckietown[2m|simulator.py:1250|_valid_pose(): [0m[35mInvalid pose. Collision free: True On drivable area: False[0m
[2m18:50:15|[0mgym-duckietown[2m|simulator.py:1251|_valid_pose(): [0m[35msafety_factor: 1.3[0m
[2m18:50:15|[0mgym-duckietown[2

[2m18:50:24|[0mgym-duckietown[2m|simulator.py:1140|_drivable_pos(): [0m[35m[3.49681812 0.         1.96007661] corresponds to tile at (5, 3) which is not drivable: {'coords': (5, 3), 'kind': 'floor', 'angle': 0, 'drivable': False, 'texture': <simulation.src.gym_duckietown.graphics.Texture object at 0x7fb2c32047b8>, 'color': array([1, 1, 1])}[0m
[2m18:50:24|[0mgym-duckietown[2m|simulator.py:1250|_valid_pose(): [0m[35mInvalid pose. Collision free: True On drivable area: False[0m
[2m18:50:24|[0mgym-duckietown[2m|simulator.py:1251|_valid_pose(): [0m[35msafety_factor: 1.3[0m
[2m18:50:24|[0mgym-duckietown[2m|simulator.py:1252|_valid_pose(): [0m[35mpos: [3.61244082 0.         1.97797609][0m
[2m18:50:24|[0mgym-duckietown[2m|simulator.py:1253|_valid_pose(): [0m[35ml_pos: [3.59752459 0.         2.07432834][0m
[2m18:50:24|[0mgym-duckietown[2m|simulator.py:1254|_valid_pose(): [0m[35mr_pos: [3.62735705 0.         1.88162384][0m
[2m18:50:24|[0mgym-duckietown[2m|

[2m18:50:24|[0mgym-duckietown[2m|simulator.py:1252|_valid_pose(): [0m[35mpos: [4.01598834 0.         2.11758689][0m
[2m18:50:24|[0mgym-duckietown[2m|simulator.py:1253|_valid_pose(): [0m[35ml_pos: [3.99679017 0.         2.02199568][0m
[2m18:50:24|[0mgym-duckietown[2m|simulator.py:1254|_valid_pose(): [0m[35mr_pos: [4.03518651 0.         2.21317811][0m
[2m18:50:24|[0mgym-duckietown[2m|simulator.py:1255|_valid_pose(): [0m[35mf_pos: [4.1306978  0.         2.09454909][0m
[2m18:50:24|[0mgym-duckietown[2m|simulator.py:559|reset(): [0m[32mStarting at [3.78554996 0.         2.10374274] 4.702071320710585[0m
[2m18:50:26|[0mgym-duckietown[2m|simulator.py:1140|_drivable_pos(): [0m[35m[2.33832405 0.         2.81924571] corresponds to tile at (3, 4) which is not drivable: {'coords': (3, 4), 'kind': 'floor', 'angle': 0, 'drivable': False, 'texture': <simulation.src.gym_duckietown.graphics.Texture object at 0x7fb2c32047b8>, 'color': array([1, 1, 1])}[0m
[2m18:50:26|

[2m18:50:26|[0mgym-duckietown[2m|simulator.py:1255|_valid_pose(): [0m[35mf_pos: [4.11137141 0.         1.21021764][0m
[2m18:50:26|[0mgym-duckietown[2m|simulator.py:1140|_drivable_pos(): [0m[35m[3.48637281 0.         1.37703924] corresponds to tile at (5, 2) which is not drivable: {'coords': (5, 2), 'kind': 'floor', 'angle': 0, 'drivable': False, 'texture': <simulation.src.gym_duckietown.graphics.Texture object at 0x7fb2c32047b8>, 'color': array([1, 1, 1])}[0m
[2m18:50:26|[0mgym-duckietown[2m|simulator.py:1250|_valid_pose(): [0m[35mInvalid pose. Collision free: True On drivable area: False[0m
[2m18:50:26|[0mgym-duckietown[2m|simulator.py:1251|_valid_pose(): [0m[35msafety_factor: 1.3[0m
[2m18:50:26|[0mgym-duckietown[2m|simulator.py:1252|_valid_pose(): [0m[35mpos: [3.51607019 0.         1.28417205][0m
[2m18:50:26|[0mgym-duckietown[2m|simulator.py:1253|_valid_pose(): [0m[35ml_pos: [3.54576757 0.         1.19130485][0m
[2m18:50:26|[0mgym-duckietown[2m|

[2m18:50:36|[0mgym-duckietown[2m|simulator.py:1255|_valid_pose(): [0m[35mf_pos: [2.9520602  0.         1.08939365][0m
[2m18:50:36|[0mgym-duckietown[2m|simulator.py:1140|_drivable_pos(): [0m[35m[2.52468862 0.         0.53311341] corresponds to tile at (4, 0) which is not drivable: {'coords': (4, 0), 'kind': 'floor', 'angle': 0, 'drivable': False, 'texture': <simulation.src.gym_duckietown.graphics.Texture object at 0x7fb2c32047b8>, 'color': array([1, 1, 1])}[0m
[2m18:50:36|[0mgym-duckietown[2m|simulator.py:1250|_valid_pose(): [0m[35mInvalid pose. Collision free: True On drivable area: False[0m
[2m18:50:36|[0mgym-duckietown[2m|simulator.py:1251|_valid_pose(): [0m[35msafety_factor: 1.3[0m
[2m18:50:36|[0mgym-duckietown[2m|simulator.py:1252|_valid_pose(): [0m[35mpos: [2.54130968 0.         0.62918625][0m
[2m18:50:36|[0mgym-duckietown[2m|simulator.py:1253|_valid_pose(): [0m[35ml_pos: [2.52468862 0.         0.53311341][0m
[2m18:50:36|[0mgym-duckietown[2m|

[2m18:50:36|[0mgym-duckietown[2m|simulator.py:1251|_valid_pose(): [0m[35msafety_factor: 1.3[0m
[2m18:50:36|[0mgym-duckietown[2m|simulator.py:1252|_valid_pose(): [0m[35mpos: [2.83092254 0.         0.60243768][0m
[2m18:50:36|[0mgym-duckietown[2m|simulator.py:1253|_valid_pose(): [0m[35ml_pos: [2.868816   0.         0.69227273][0m
[2m18:50:36|[0mgym-duckietown[2m|simulator.py:1254|_valid_pose(): [0m[35mr_pos: [2.79302907 0.         0.51260264][0m
[2m18:50:36|[0mgym-duckietown[2m|simulator.py:1255|_valid_pose(): [0m[35mf_pos: [2.72312048 0.         0.64790984][0m
[2m18:50:36|[0mgym-duckietown[2m|simulator.py:1140|_drivable_pos(): [0m[35m[2.58312267 0.         1.21305648] corresponds to tile at (4, 2) which is not drivable: {'coords': (4, 2), 'kind': 'floor', 'angle': 0, 'drivable': False, 'texture': <simulation.src.gym_duckietown.graphics.Texture object at 0x7fb2c32047b8>, 'color': array([1, 1, 1])}[0m
[2m18:50:36|[0mgym-duckietown[2m|simulator.py:1250

[2m18:50:36|[0mgym-duckietown[2m|simulator.py:1140|_drivable_pos(): [0m[35m[2.55826295 0.         0.5754657 ] corresponds to tile at (4, 0) which is not drivable: {'coords': (4, 0), 'kind': 'floor', 'angle': 0, 'drivable': False, 'texture': <simulation.src.gym_duckietown.graphics.Texture object at 0x7fb2c32047b8>, 'color': array([1, 1, 1])}[0m
[2m18:50:36|[0mgym-duckietown[2m|simulator.py:1250|_valid_pose(): [0m[35mInvalid pose. Collision free: True On drivable area: False[0m
[2m18:50:36|[0mgym-duckietown[2m|simulator.py:1251|_valid_pose(): [0m[35msafety_factor: 1.3[0m
[2m18:50:36|[0mgym-duckietown[2m|simulator.py:1252|_valid_pose(): [0m[35mpos: [2.55826295 0.         0.5754657 ][0m
[2m18:50:36|[0mgym-duckietown[2m|simulator.py:1253|_valid_pose(): [0m[35ml_pos: [2.64534792 0.         0.53161986][0m
[2m18:50:36|[0mgym-duckietown[2m|simulator.py:1254|_valid_pose(): [0m[35mr_pos: [2.47117798 0.         0.61931154][0m
[2m18:50:36|[0mgym-duckietown[2m|

[2m18:50:36|[0mgym-duckietown[2m|simulator.py:1252|_valid_pose(): [0m[35mpos: [2.55826064 0.         0.64954754][0m
[2m18:50:36|[0mgym-duckietown[2m|simulator.py:1253|_valid_pose(): [0m[35ml_pos: [2.46078006 0.         0.65149372][0m
[2m18:50:36|[0mgym-duckietown[2m|simulator.py:1254|_valid_pose(): [0m[35mr_pos: [2.65574121 0.         0.64760137][0m
[2m18:50:36|[0mgym-duckietown[2m|simulator.py:1255|_valid_pose(): [0m[35mf_pos: [2.55592523 0.         0.53257085][0m
[2m18:50:36|[0mgym-duckietown[2m|simulator.py:1140|_drivable_pos(): [0m[35m[2.86842933 0.         1.24497133] corresponds to tile at (4, 2) which is not drivable: {'coords': (4, 2), 'kind': 'floor', 'angle': 0, 'drivable': False, 'texture': <simulation.src.gym_duckietown.graphics.Texture object at 0x7fb2c32047b8>, 'color': array([1, 1, 1])}[0m
[2m18:50:36|[0mgym-duckietown[2m|simulator.py:1250|_valid_pose(): [0m[35mInvalid pose. Collision free: True On drivable area: False[0m
[2m18:50:36|


Done!



In [1]:
view_results_ipython(local_env)

NameError: name 'view_results_ipython' is not defined

**<font color='red'>Question 2:</font> When you visualize the results, what are two major issues? Play with the expert's code and the execution code above, and list five changes that you tried, as well as their _qualitative_ effects on performance (i.e cover the most distance). DO NOT RESEED THE ENVIRONMENT**

# Defining a Model

While the above expert isn't great, it's a start. What's best is that we now have image `observations` and real-valued `actions` that we can use to train a neural network in Pytorch. Our imitation learner will driver directly from observations, and will be trained with a popular imitation learning loss: Mean Squared Error.

In [69]:
class Model(nn.Module):
    def __init__(self, action_dim, max_action):
        super(Model, self).__init__()

        # TODO: You'll need to change this!
        flat_size = 31968
        
        ###########################################
        # QUESTION 3. What does the next line do? #
        ###########################################
        # these define the activation functions for the 
        self.lr = nn.LeakyReLU()
        self.relu = nn.ReLU()
        self.tanh = nn.Tanh()

        self.conv1 = nn.Conv2d(3, 32, 8, stride=2)
        self.conv2 = nn.Conv2d(32, 32, 4, stride=2)

        self.bn1 = nn.BatchNorm2d(32)
        self.bn2 = nn.BatchNorm2d(32)

        #Changed from 0.1
        self.dropout = nn.Dropout(.2)

        self.lin1 = nn.Linear(flat_size, 100)
        self.lin2 = nn.Linear(100, action_dim)

        self.max_action = max_action

    def forward(self, x):
        x = self.bn1(self.relu(self.conv1(x)))
        x = self.bn2(self.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)  # flatten
        x = self.dropout(x)
        x = self.lr(self.lin1(x))

        x = self.lin2(x)
        x = self.max_action * self.tanh(x)
        
        return x

# Training from the Teacher Data

We can then write our _training loop_ : the piece of code that implements the process of stochastic gradient descent to minimize the loss between our network's predicted actions and those implemented by our expert.

In [70]:
nepochs = 400
batchsize = 4

actions = np.array(actions)
observations = np.array(observations)

model = Model(action_dim=2, max_action=1.)
model.train().to(device)

# weight_decay is L2 regularization, helps avoid overfitting
optimizer = optim.SGD(
    model.parameters(),
    lr=0.001,
    weight_decay=0.5e-3
)

avg_loss = 0
for epoch in range(nepochs):
    optimizer.zero_grad()

    batch_indices = np.random.randint(0, observations.shape[0], (batchsize))
    obs_batch = torch.from_numpy(observations[batch_indices]).float().to(device)
    act_batch = torch.from_numpy(actions[batch_indices]).float().to(device)

    model_actions = model(obs_batch)

    loss = (model_actions - act_batch).norm(2).mean()
    loss.backward()
    optimizer.step()

    loss = loss.data.item()
    avg_loss = avg_loss * 0.995 + loss * 0.005

    print('epoch %d, loss=%.3f' % (epoch, avg_loss))

    # Periodically save the trained model
    if epoch % 5 == 0:
        torch.save(model.state_dict(), 'models/imitate.pt')
        
print('\nDone!\n')

epoch 0, loss=0.006
epoch 1, loss=0.009
epoch 2, loss=0.013
epoch 3, loss=0.015
epoch 4, loss=0.017
epoch 5, loss=0.021
epoch 6, loss=0.023
epoch 7, loss=0.026
epoch 8, loss=0.029
epoch 9, loss=0.030
epoch 10, loss=0.031
epoch 11, loss=0.034
epoch 12, loss=0.036
epoch 13, loss=0.038
epoch 14, loss=0.041
epoch 15, loss=0.042
epoch 16, loss=0.044
epoch 17, loss=0.045
epoch 18, loss=0.047
epoch 19, loss=0.048
epoch 20, loss=0.049
epoch 21, loss=0.051
epoch 22, loss=0.053
epoch 23, loss=0.054
epoch 24, loss=0.055
epoch 25, loss=0.056
epoch 26, loss=0.058
epoch 27, loss=0.059
epoch 28, loss=0.061
epoch 29, loss=0.063
epoch 30, loss=0.064
epoch 31, loss=0.065
epoch 32, loss=0.067
epoch 33, loss=0.068
epoch 34, loss=0.069
epoch 35, loss=0.070
epoch 36, loss=0.072
epoch 37, loss=0.075
epoch 38, loss=0.076
epoch 39, loss=0.078
epoch 40, loss=0.080
epoch 41, loss=0.080
epoch 42, loss=0.081
epoch 43, loss=0.082
epoch 44, loss=0.083
epoch 45, loss=0.085
epoch 46, loss=0.087
epoch 47, loss=0.088
ep

epoch 379, loss=0.163
epoch 380, loss=0.162
epoch 381, loss=0.162
epoch 382, loss=0.162
epoch 383, loss=0.162
epoch 384, loss=0.162
epoch 385, loss=0.162
epoch 386, loss=0.163
epoch 387, loss=0.162
epoch 388, loss=0.162
epoch 389, loss=0.162
epoch 390, loss=0.162
epoch 391, loss=0.162
epoch 392, loss=0.162
epoch 393, loss=0.162
epoch 394, loss=0.162
epoch 395, loss=0.162
epoch 396, loss=0.161
epoch 397, loss=0.161
epoch 398, loss=0.161
epoch 399, loss=0.161

Done!



**<font color='red'>Question 3:</font> Qualitatively explain at least 2 changes you made to both the expert and network (architecture, hyperparameters, episode lengths, number of training episodes / epochs, etc.) (including partial points if we find that you didn't make changes to any part of our code - hyperparameters, network, etc.)**


**<font color='red'>Question 4:</font> Explain the issues with the imitation learning loop above. Specifically, comment on the loss function and training objective. Explain at least one issue, and propose a way that could help solve the issues you've brought up.**

In [71]:
force_done(local_env)
local_env = launch_env()
local_env = wrap_env(local_env)
local_env = ResizeWrapper(local_env)
local_env = ImgWrapper(local_env)

obs = local_env.reset()

done = False
rewards = []
nsteps = 500
for steps in range(0, nsteps):
    obs = torch.from_numpy(obs).float().to(device).unsqueeze(0)
    action = model(obs)
    action = action.squeeze().data.cpu().numpy()
    obs, reward, done, info = local_env.step(action) 
    rewards.append(reward)
    
    if done:
        local_env.reset()
        print("Reset!")

print(info)
        
local_env.close()


print("\nDone!\n")
    

[2m18:53:47|[0mgym-duckietown[2m|graphics.py:121|create_frame_buffers(): [0m[35mFalling back to non-multisampled frame buffer[0m
[2m18:53:47|[0mgym-duckietown[2m|graphics.py:121|create_frame_buffers(): [0m[35mFalling back to non-multisampled frame buffer[0m
[2m18:53:47|[0mgym-duckietown[2m|simulator.py:578|_load_map(): [0m[35mloading map file "/duckietown/simulation/src/gym_duckietown/maps/loop_empty.yaml"[0m
[2m18:53:47|[0mgym-duckietown[2m|simulator.py:1140|_drivable_pos(): [0m[35m[4.13719561 0.         1.88880899] corresponds to tile at (7, 3) which is not drivable: {'coords': (7, 3), 'kind': 'floor', 'angle': 0, 'drivable': False, 'texture': <simulation.src.gym_duckietown.graphics.Texture object at 0x7fb2c32047b8>, 'color': array([1, 1, 1])}[0m
[2m18:53:47|[0mgym-duckietown[2m|simulator.py:1250|_valid_pose(): [0m[35mInvalid pose. Collision free: True On drivable area: False[0m
[2m18:53:47|[0mgym-duckietown[2m|simulator.py:1251|_valid_pose(): [0m[3

[2m18:53:47|[0mgym-duckietown[2m|simulator.py:1250|_valid_pose(): [0m[35mInvalid pose. Collision free: True On drivable area: False[0m
[2m18:53:47|[0mgym-duckietown[2m|simulator.py:1251|_valid_pose(): [0m[35msafety_factor: 1.3[0m
[2m18:53:47|[0mgym-duckietown[2m|simulator.py:1252|_valid_pose(): [0m[35mpos: [3.57292165 0.         1.88314807][0m
[2m18:53:47|[0mgym-duckietown[2m|simulator.py:1253|_valid_pose(): [0m[35ml_pos: [3.48172154 0.         1.84866924][0m
[2m18:53:47|[0mgym-duckietown[2m|simulator.py:1254|_valid_pose(): [0m[35mr_pos: [3.66412176 0.         1.91762689][0m
[2m18:53:47|[0mgym-duckietown[2m|simulator.py:1255|_valid_pose(): [0m[35mf_pos: [3.61429624 0.         1.77370793][0m
[2m18:53:47|[0mgym-duckietown[2m|simulator.py:1140|_drivable_pos(): [0m[35m[3.50690495 0.         2.23654429] corresponds to tile at (5, 3) which is not drivable: {'coords': (5, 3), 'kind': 'floor', 'angle': 0, 'drivable': False, 'texture': <simulation.src.gy

[2m18:53:47|[0mgym-duckietown[2m|simulator.py:1255|_valid_pose(): [0m[35mf_pos: [3.50609039 0.         2.05544313][0m
[2m18:53:47|[0mgym-duckietown[2m|simulator.py:1140|_drivable_pos(): [0m[35m[4.12972415 0.         2.27350789] corresponds to tile at (7, 3) which is not drivable: {'coords': (7, 3), 'kind': 'floor', 'angle': 0, 'drivable': False, 'texture': <simulation.src.gym_duckietown.graphics.Texture object at 0x7fb2c32047b8>, 'color': array([1, 1, 1])}[0m
[2m18:53:47|[0mgym-duckietown[2m|simulator.py:1250|_valid_pose(): [0m[35mInvalid pose. Collision free: True On drivable area: False[0m
[2m18:53:47|[0mgym-duckietown[2m|simulator.py:1251|_valid_pose(): [0m[35msafety_factor: 1.3[0m
[2m18:53:47|[0mgym-duckietown[2m|simulator.py:1252|_valid_pose(): [0m[35mpos: [4.022444   0.         2.32019802][0m
[2m18:53:47|[0mgym-duckietown[2m|simulator.py:1253|_valid_pose(): [0m[35ml_pos: [3.98353555 0.         2.23079789][0m
[2m18:53:47|[0mgym-duckietown[2m|

[2m18:53:52|[0mgym-duckietown[2m|simulator.py:1250|_valid_pose(): [0m[35mInvalid pose. Collision free: True On drivable area: False[0m
[2m18:53:52|[0mgym-duckietown[2m|simulator.py:1251|_valid_pose(): [0m[35msafety_factor: 1.3[0m
[2m18:53:52|[0mgym-duckietown[2m|simulator.py:1252|_valid_pose(): [0m[35mpos: [2.38071712 0.         2.71640816][0m
[2m18:53:52|[0mgym-duckietown[2m|simulator.py:1253|_valid_pose(): [0m[35ml_pos: [2.28873639 0.         2.68406941][0m
[2m18:53:52|[0mgym-duckietown[2m|simulator.py:1254|_valid_pose(): [0m[35mr_pos: [2.47269785 0.         2.74874692][0m
[2m18:53:52|[0mgym-duckietown[2m|simulator.py:1255|_valid_pose(): [0m[35mf_pos: [2.41952363 0.         2.60603129][0m
[2m18:53:52|[0mgym-duckietown[2m|simulator.py:559|reset(): [0m[32mStarting at [2.64153703 0.         2.88468928] 4.836049502872963[0m
[2m18:54:05|[0mgym-duckietown[2m|simulator.py:1140|_drivable_pos(): [0m[35m[1.06046908 0.         0.57816269] correspon

[2m18:54:05|[0mgym-duckietown[2m|simulator.py:1253|_valid_pose(): [0m[35ml_pos: [3.36666698 0.         0.75300265][0m
[2m18:54:05|[0mgym-duckietown[2m|simulator.py:1254|_valid_pose(): [0m[35mr_pos: [3.32870397 0.         0.56173371][0m
[2m18:54:05|[0mgym-duckietown[2m|simulator.py:1255|_valid_pose(): [0m[35mf_pos: [3.23292411 0.         0.68014598][0m
[2m18:54:05|[0mgym-duckietown[2m|simulator.py:1140|_drivable_pos(): [0m[35m[3.30655235 0.         1.18763146] corresponds to tile at (5, 2) which is not drivable: {'coords': (5, 2), 'kind': 'floor', 'angle': 0, 'drivable': False, 'texture': <simulation.src.gym_duckietown.graphics.Texture object at 0x7fb2c32047b8>, 'color': array([1, 1, 1])}[0m
[2m18:54:05|[0mgym-duckietown[2m|simulator.py:1250|_valid_pose(): [0m[35mInvalid pose. Collision free: True On drivable area: False[0m
[2m18:54:05|[0mgym-duckietown[2m|simulator.py:1251|_valid_pose(): [0m[35msafety_factor: 1.3[0m
[2m18:54:05|[0mgym-duckietown[2

Reset!
{'Simulator': {'action': [0.50564337, 0.49128446], 'lane_position': {'dist': -0.07664491667525605, 'dot_dir': 0.987074670420132, 'angle_deg': -9.222047162193872, 'angle_rad': -0.1609550867544826}, 'robot_speed': 0.29857731501100476, 'proximity_penalty': 0.0, 'cur_pos': [3.761992665057099, 0.0, 1.1587016141755275], 'cur_angle': -1.383795027392724, 'wheel_velocities': [0.60677207, 0.5895414], 'timestamp': 1.8000000000000023, 'tile_coords': [6, 1], 'msg': ''}}

Done!



In [72]:
view_results_ipython(local_env)

**<font color='red'>Question 5:</font> Copy the value of _info_ , after simulating for 500 steps. If your simulation fails earlier, save the results _before_ the failure (i.e. when the simulation returns `done = True`.  DO NOT RESEED THE ENVIRONMENT** 

# A nastier environment

Once your solution is able to pass a curve while staying in the lane, you can try to see what happens if you modify the test environment with respect to the one used to generate the training dataset. 

To do this, create a new environment called *new_environment* by using the **launch_env()** function as above. This time passing the argument *domain_rand=True*. Basically it randomizes the environment. Once you have the new environment, run again the model without retraining. 

Then, visualize the results. 

**<font color='red'>Question 6:</font> Comment the performance of your solution on the new environment, name two reasons that justify the performance.**

In [52]:
# TODO: Run again the agent in the new randomized environment as explained above

new_env = launch_env(domain_rand=True)
new_env = wrap_env(new_env)
new_env = ResizeWrapper(new_env)
new_env = ImgWrapper(new_env)

obs = new_env.reset()

done = False
rewards = []
nsteps = 300
for steps in range(0, nsteps):
    obs = torch.from_numpy(obs).float().to(device).unsqueeze(0)
    action = model(obs)
    action = action.squeeze().data.cpu().numpy()
    obs, reward, done, info = new_env.step(action) 
    rewards.append(reward)
    
    if done:
        new_env.reset()

new_env.close()

print("\nDone!\n")

[2m18:39:11|[0mgym-duckietown[2m|graphics.py:121|create_frame_buffers(): [0m[35mFalling back to non-multisampled frame buffer[0m
[2m18:39:11|[0mgym-duckietown[2m|graphics.py:121|create_frame_buffers(): [0m[35mFalling back to non-multisampled frame buffer[0m
[2m18:39:11|[0mgym-duckietown[2m|simulator.py:578|_load_map(): [0m[35mloading map file "/duckietown/simulation/src/gym_duckietown/maps/loop_empty.yaml"[0m
[2m18:39:11|[0mgym-duckietown[2m|graphics.py:60|load_texture(): [0m[35mloading texture "curve_left_3.png"[0m
[2m18:39:11|[0mgym-duckietown[2m|simulator.py:1140|_drivable_pos(): [0m[35m[0.54829264 0.         2.5248345 ] corresponds to tile at (0, 4) which is not drivable: {'coords': (0, 4), 'kind': 'floor', 'angle': 0, 'drivable': False, 'texture': <simulation.src.gym_duckietown.graphics.Texture object at 0x7fb2c32047b8>, 'color': array([0.82985124, 1.15541818, 0.90280541])}[0m
[2m18:39:11|[0mgym-duckietown[2m|simulator.py:1250|_valid_pose(): [0m[

[2m18:39:17|[0mgym-duckietown[2m|simulator.py:1251|_valid_pose(): [0m[35msafety_factor: 1.3[0m
[2m18:39:17|[0mgym-duckietown[2m|simulator.py:1252|_valid_pose(): [0m[35mpos: [4.07112609 0.         1.33627461][0m
[2m18:39:17|[0mgym-duckietown[2m|simulator.py:1253|_valid_pose(): [0m[35ml_pos: [3.97837341 0.         1.30622148][0m
[2m18:39:17|[0mgym-duckietown[2m|simulator.py:1254|_valid_pose(): [0m[35mr_pos: [4.16387877 0.         1.36632773][0m
[2m18:39:17|[0mgym-duckietown[2m|simulator.py:1255|_valid_pose(): [0m[35mf_pos: [4.10718984 0.         1.22497139][0m
[2m18:39:17|[0mgym-duckietown[2m|simulator.py:1140|_drivable_pos(): [0m[35m[3.49738929 0.         1.1729982 ] corresponds to tile at (5, 2) which is not drivable: {'coords': (5, 2), 'kind': 'floor', 'angle': 0, 'drivable': False, 'texture': <simulation.src.gym_duckietown.graphics.Texture object at 0x7fb2c32047b8>, 'color': array([0.9986692 , 1.10691105, 1.10118672])}[0m
[2m18:39:17|[0mgym-ducki

[2m18:39:17|[0mgym-duckietown[2m|simulator.py:1255|_valid_pose(): [0m[35mf_pos: [4.06918932 0.         1.26595126][0m
[2m18:39:17|[0mgym-duckietown[2m|simulator.py:1140|_drivable_pos(): [0m[35m[3.43004286 0.         1.37655549] corresponds to tile at (5, 2) which is not drivable: {'coords': (5, 2), 'kind': 'floor', 'angle': 0, 'drivable': False, 'texture': <simulation.src.gym_duckietown.graphics.Texture object at 0x7fb2c32047b8>, 'color': array([0.9986692 , 1.10691105, 1.10118672])}[0m
[2m18:39:17|[0mgym-duckietown[2m|simulator.py:1250|_valid_pose(): [0m[35mInvalid pose. Collision free: True On drivable area: False[0m
[2m18:39:17|[0mgym-duckietown[2m|simulator.py:1251|_valid_pose(): [0m[35msafety_factor: 1.3[0m
[2m18:39:17|[0mgym-duckietown[2m|simulator.py:1252|_valid_pose(): [0m[35mpos: [3.52677835 0.         1.36436963][0m
[2m18:39:17|[0mgym-duckietown[2m|simulator.py:1253|_valid_pose(): [0m[35ml_pos: [3.43004286 0.         1.37655549][0m
[2m18:39

[2m18:39:23|[0mgym-duckietown[2m|simulator.py:1140|_drivable_pos(): [0m[35m[3.60959406 0.         2.94426744] corresponds to tile at (6, 5) which is not drivable: {'coords': (6, 5), 'kind': 'floor', 'angle': 0, 'drivable': False, 'texture': <simulation.src.gym_duckietown.graphics.Texture object at 0x7fb2c32047b8>, 'color': array([1.02314891, 0.89307918, 0.87773796])}[0m
[2m18:39:23|[0mgym-duckietown[2m|simulator.py:1250|_valid_pose(): [0m[35mInvalid pose. Collision free: True On drivable area: False[0m
[2m18:39:23|[0mgym-duckietown[2m|simulator.py:1251|_valid_pose(): [0m[35msafety_factor: 1.3[0m
[2m18:39:23|[0mgym-duckietown[2m|simulator.py:1252|_valid_pose(): [0m[35mpos: [3.67500036 0.         2.8719607 ][0m
[2m18:39:23|[0mgym-duckietown[2m|simulator.py:1253|_valid_pose(): [0m[35ml_pos: [3.60959406 0.         2.94426744][0m
[2m18:39:23|[0mgym-duckietown[2m|simulator.py:1254|_valid_pose(): [0m[35mr_pos: [3.74040667 0.         2.79965396][0m
[2m18:39

[2m18:39:23|[0mgym-duckietown[2m|simulator.py:1251|_valid_pose(): [0m[35msafety_factor: 1.3[0m
[2m18:39:23|[0mgym-duckietown[2m|simulator.py:1252|_valid_pose(): [0m[35mpos: [4.05066809 0.         2.88372711][0m
[2m18:39:23|[0mgym-duckietown[2m|simulator.py:1253|_valid_pose(): [0m[35ml_pos: [3.96758823 0.         2.83269782][0m
[2m18:39:23|[0mgym-duckietown[2m|simulator.py:1254|_valid_pose(): [0m[35mr_pos: [4.13374794 0.         2.93475639][0m
[2m18:39:23|[0mgym-duckietown[2m|simulator.py:1255|_valid_pose(): [0m[35mf_pos: [4.11190323 0.         2.78403128][0m
[2m18:39:23|[0mgym-duckietown[2m|simulator.py:1140|_drivable_pos(): [0m[35m[4.16613406 0.         2.48080121] corresponds to tile at (7, 4) which is not drivable: {'coords': (7, 4), 'kind': 'floor', 'angle': 0, 'drivable': False, 'texture': <simulation.src.gym_duckietown.graphics.Texture object at 0x7fb2c32047b8>, 'color': array([1.16440902, 1.17414572, 1.05009426])}[0m
[2m18:39:23|[0mgym-ducki

[2m18:39:23|[0mgym-duckietown[2m|simulator.py:1255|_valid_pose(): [0m[35mf_pos: [4.08773243 0.         2.73522416][0m
[2m18:39:23|[0mgym-duckietown[2m|simulator.py:1140|_drivable_pos(): [0m[35m[4.14702153 0.         2.72105114] corresponds to tile at (7, 4) which is not drivable: {'coords': (7, 4), 'kind': 'floor', 'angle': 0, 'drivable': False, 'texture': <simulation.src.gym_duckietown.graphics.Texture object at 0x7fb2c32047b8>, 'color': array([1.16440902, 1.17414572, 1.05009426])}[0m
[2m18:39:23|[0mgym-duckietown[2m|simulator.py:1250|_valid_pose(): [0m[35mInvalid pose. Collision free: True On drivable area: False[0m
[2m18:39:23|[0mgym-duckietown[2m|simulator.py:1251|_valid_pose(): [0m[35msafety_factor: 1.3[0m
[2m18:39:23|[0mgym-duckietown[2m|simulator.py:1252|_valid_pose(): [0m[35mpos: [4.07695674 0.         2.78885361][0m
[2m18:39:23|[0mgym-duckietown[2m|simulator.py:1253|_valid_pose(): [0m[35ml_pos: [4.00689194 0.         2.85665607][0m
[2m18:39

[2m18:39:25|[0mgym-duckietown[2m|simulator.py:1250|_valid_pose(): [0m[35mInvalid pose. Collision free: True On drivable area: False[0m
[2m18:39:25|[0mgym-duckietown[2m|simulator.py:1251|_valid_pose(): [0m[35msafety_factor: 1.0[0m
[2m18:39:25|[0mgym-duckietown[2m|simulator.py:1252|_valid_pose(): [0m[35mpos: [3.61858433 0.         2.83948592][0m
[2m18:39:25|[0mgym-duckietown[2m|simulator.py:1253|_valid_pose(): [0m[35ml_pos: [3.68986748 0.         2.86280357][0m
[2m18:39:25|[0mgym-duckietown[2m|simulator.py:1254|_valid_pose(): [0m[35mr_pos: [3.54730118 0.         2.81616827][0m
[2m18:39:25|[0mgym-duckietown[2m|simulator.py:1255|_valid_pose(): [0m[35mf_pos: [3.59060315 0.         2.9250257 ][0m
[2m18:39:25|[0mgym-duckietown[2m|simulator.py:1397|_compute_done_reward(): [0m[32mStopping the simulator because we are at an invalid pose.[0m
[2m18:39:25|[0mgym-duckietown[2m|simulator.py:1140|_drivable_pos(): [0m[35m[2.30922501 0.         0.50034314] 

[2m18:39:25|[0mgym-duckietown[2m|simulator.py:1252|_valid_pose(): [0m[35mpos: [2.15630702 0.         0.63038413][0m
[2m18:39:25|[0mgym-duckietown[2m|simulator.py:1253|_valid_pose(): [0m[35ml_pos: [2.21600015 0.         0.5532934 ][0m
[2m18:39:25|[0mgym-duckietown[2m|simulator.py:1254|_valid_pose(): [0m[35mr_pos: [2.0966139  0.         0.70747486][0m
[2m18:39:25|[0mgym-duckietown[2m|simulator.py:1255|_valid_pose(): [0m[35mf_pos: [2.2488159  0.         0.70201588][0m
[2m18:39:25|[0mgym-duckietown[2m|simulator.py:1140|_drivable_pos(): [0m[35m[2.04309221 0.         1.17431775] corresponds to tile at (3, 2) which is not drivable: {'coords': (3, 2), 'kind': 'floor', 'angle': 0, 'drivable': False, 'texture': <simulation.src.gym_duckietown.graphics.Texture object at 0x7fb2c32047b8>, 'color': array([1.04122034, 0.90729052, 0.85726043])}[0m
[2m18:39:25|[0mgym-duckietown[2m|simulator.py:1250|_valid_pose(): [0m[35mInvalid pose. Collision free: True On drivable ar


Done!



In [53]:
# TODO: visualize the results
view_results_ipython(new_env)