<a href="https://colab.research.google.com/github/romankoshlyak/mlis_judge/blob/master/mlis-pytorch/notebooks/MLIS_Tutorial.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"></a>


# Tensor
What is tensor?

Let's start with an vector. For example, python list [1.0, 2.5, 5.0, 3.5] . That's would be 1 dimension tensor.

In [None]:
import torch

# create tensor from existing list
x = torch.tensor([1.0, 2.5, 5.0, 3.5])
print("x={}".format(x))
# in pytorch tensor creating for abstracting data computation in GPU friendly way
# we can multiply tensor by number
print("2*x={}".format(2*x))
# we can do a lot of different operations with tensor
print("sigmoid(x)={}".format(torch.sigmoid(x)))

Okay, if it just vector, why to introduce a new term for it. Because vector is just 1 dimensional tensor. Matrix will be 2 dimensional tensor. For example, matrix [[0,1,2],[3,4,5]] is 2 dimensinal tensor

In [None]:
import torch

x = torch.tensor([[0,1,2],[3,4,5]])
print("x={}".format(x))
# we can get size/shape of tensor
print("x.size()={}".format(x.size()))
# we can change shape of the tensor
print("x.view(-1)={}".format(x.view(-1)))

As you can guess, tensor can be 3, 4, 5 .. any dimensional. Let's see some examples

In [None]:
import torch

# 3 dimensional random tensor
x = torch.rand(2,3,4)
print("x={}".format(x))

# Deriviative intro video

In [None]:
#@title Video explains deriviative
from IPython.display import IFrame

# Youtube
IFrame("https://www.youtube.com/embed/9vKqVkMQHKk", width="800", height="450", frameborder="0", allowfullscreen="allowfullscreen")


# Deriviative

Deriviative:
$$\frac{df(x)}{dx}=\lim_{\Delta{x} \to 0}\frac{f(x+\Delta{x})-f(x)}{\Delta{x}}$$


---


Gradient is a multi-variable generalization of the derivative:
$$\frac{\partial{f(x,y)}}{\partial{x}}=\lim_{\Delta{x} \to 0}\frac{f(x+\Delta{x},y)-f(x,y)}{\Delta{x}}$$
$$\frac{\partial{f(x,y)}}{\partial{y}}=\lim_{\Delta{y} \to 0}\frac{f(x,y+\Delta{y})-f(x,y)}{\Delta{y}}$$


---


Deep learning is based on gradient decent. This is method that based on moving in direction of -gradient. Pytorch automates process of calculating gradient.


---


Let's start with deriviative.
For some functions we can calculate deriviative analitically. For example:
$$f(x)=3*x^2$$
$$\frac{df}{dx}3*x^2=3*2*x^{(2-1)}=6*x$$

In [None]:
import torch

x = 2.0
delta_x = 0.01
def f(x):
  f_res = 3*x**2
  return f_res

print("x={}".format(x))
y = f(x)
print("y={}".format(y))
# Let's calculate gradient
grad = (f(x+delta_x)-f(x))/delta_x
print(f"grad={grad} with delta_x={delta_x}")

# Now let's calculate gradient using torch
# BTW, in this case x will be a 0 dimensional/scalar tensor
x = torch.tensor(float(x), requires_grad=True)
print(f"x={x}")
y = f(x)
print(f"y={y}")
# In order to calculate gradient using torch, we just call backward() method
y.backward()
print(f"grad={x.grad}")

# Deriviative visualization

We can visualize deriviative at point x as line that touch graph at that point. Slope of this line equals to deriviative of function in that point.

In [None]:
import torch
import matplotlib.pyplot as plt
import numpy as np
from ipywidgets import interact
plt.rcParams["figure.figsize"] = (16,16)

def f(x):
  f_res = 0.1*x**3+0.5
  return f_res

def visualize_deriviative(x=2.0, delta_x=0.1, grad_use_delta=True):
  if grad_use_delta:
    grad = (f(x+delta_x)-f(x))/delta_x
  else:
    t_x = torch.tensor(x, requires_grad=True)
    t_y = f(t_x)
    t_y.backward()
    grad = t_x.grad.item()

  xlim = [-3.0, 3.0]
  ylim = [-3.0, 3.0]
  xstep = 0.01
  points_x = torch.arange(*xlim, xstep)
  points_y = f(points_x)

  plt.xlim(*xlim)
  plt.ylim(*ylim)
  plt.title('f(x) and deriviative line')
  plt.xlabel('x')
  plt.ylabel('f(x)')
  plt.plot(points_x.numpy(), points_y.numpy(), color='blue')
  plt.scatter([x], [f(x)], color='green')
  if grad_use_delta:
    plt.scatter([x+delta_x], [f(x+delta_x)], color='red')  
  grad_x = np.array([xlim[0],x,xlim[1]])
  grad_y = grad_x*grad
  grad_y = grad_y-(grad_y[1]-f(x))
  plt.plot(grad_x, grad_y, color='green')
  plt.show()

interact(visualize_deriviative, x=(-3.0, 3.0, 0.1), delta_x=(1e-7, 1.0, 0.1), grad_use_delta=True)

# Video introduction to neural networks and gradient descent

In [None]:
#@title Video introduction to neural networks
from IPython.display import IFrame

# Youtube
IFrame("https://www.youtube.com/embed/aircAruvnKk", width="800", height="450", frameborder="0", allowfullscreen="allowfullscreen")


In [None]:
#@title Video introduction to gradient descent
from IPython.display import IFrame

# Youtube
IFrame("https://www.youtube.com/embed/IHZwWFHWa-w", width="800", height="450", frameborder="0", allowfullscreen="allowfullscreen")


# Gradient descent utils

Next cell contains imports and definitions needed for gradient decent. Make sure you run this cell.

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
import numpy as np
import math
from ipywidgets import interact
plt.rcParams["figure.figsize"] = (16,16)

def np_points(grid=(-3.0, 3.0, 0.01)):
    eps = 1e-7
    return np.arange(grid[0], grid[1]+eps, grid[2])

def visualize_points(points_x, points_y, test_points=None, target_func=None, model_prediction_func=None, xgrid=(-3.0, 3.0, 0.01), ygrid=(-3.0, 3.0), labels=('Points', 'x', 'y')):
    fig, ax = plt.subplots()
    ax.scatter(points_x, points_y)
    if test_points is not None:
        ax.scatter(*test_points, c='y')
    input_points = np_points(xgrid)
    if target_func is not None:
        if type(target_func) is tuple:
          ax.plot(*target_func, 'g')
        else:
          ax.plot(input_points, target_func(input_points), 'g')
    if model_prediction_func is not None:
        ax.plot(input_points, model_prediction_func(input_points), 'r')

    ax.set_title(labels[0])
    ax.set_xlabel(labels[1])
    ax.set_ylabel(labels[2])
    ax.set_xlim(left=xgrid[0], right=xgrid[1])
    ax.set_ylim(bottom=ygrid[0], top=ygrid[1])
    plt.show()

def visualize_gradient_descent(moves, func, show_annotation=True, anotated_steps={}, xgrid=(-3.0, 3.0, 0.01), ygrid=(-3.0, 3.0, 0.01), labels=('Arrows shows direction of -gradient at every step', 'x', 'y'), min_dist_between_steps=0.0):
    X, Y = np.meshgrid(np_points(xgrid), np_points(ygrid))
    Z = func(X, Y)
    fig, ax = plt.subplots()
    ax.set_title(labels[0])
    ax.set_xlabel(labels[1])
    ax.set_ylabel(labels[2])
    cs = ax.contourf(X, Y, Z, levels=100)
    cs = ax.contour(X, Y, Z, levels=5, colors='k')
    ax.clabel(cs, inline=1, fontsize=10)
    prev_x, prev_y = moves[0]
    xshift = 0.02*(xgrid[1]-xgrid[0])
    yshift = 0.02*(ygrid[1]-ygrid[0])
    for ind, (x,y) in enumerate(moves):
        if math.sqrt(((prev_x-x)/(xgrid[1]-xgrid[0]))**2+((prev_y-y)/(ygrid[1]-ygrid[0]))**2) > min_dist_between_steps:
            if prev_x != x or prev_y != y:
                ax.annotate(None, (x, y), xytext=(prev_x, prev_y), arrowprops={'arrowstyle':'->','color':'red'})
                prev_x, prev_y = x, y

    
    if show_annotation:
        for key in anotated_steps:
            value = anotated_steps[key]
            x, y = moves[key]
            ax.scatter(x, y, color='green')
            ax.annotate((value+" z({:.4f},{:.4f})={:.4f}").format(x, y, func(x, y)), (x, y), xytext=(x+xshift, y), arrowprops={'arrowstyle':'->'}, color='black')
 
        start_x, start_y = moves[0]
        ax.scatter(start_x, start_y, color='green')
        ax.annotate("Start point z({:.4f},{:.4f})={:.4f}".format(start_x, start_y, func(start_x, start_y)), (start_x, start_y), xytext=(start_x+xshift, start_y+yshift), arrowprops={'arrowstyle':'->'}, color='black')
        end_x, end_y = moves[-1]
        ax.scatter(end_x, end_y, color='green')
        ax.annotate("End point z({:.4f},{:.4f})={:.4f}".format(end_x, end_y, func(end_x,end_y)), (end_x, end_y), xytext=(end_x+xshift, end_y+yshift), arrowprops={'arrowstyle':'->'}, color='black')
        min_ind = np.unravel_index(Z.argmin(), Z.shape)
        min_x = X[min_ind]
        min_y = Y[min_ind]
        ax.scatter(min_x, min_y, color='green')
        ax.annotate("Min point z({:.4f},{:.4f})={:.4f}".format(min_x, min_y, func(min_x, min_y)), (min_x, min_y), xytext=(min_x+xshift, min_y-yshift), arrowprops={'arrowstyle':'->'}, color='black')
    plt.show()

# Gradient descent using delta

In [None]:
def func(x,y):
    fun = (x*x/5.0+y*y)**0.5
    return fun

def gradient_descent_grad_delta(steps_number=10, learning_rate=0.5, start_x=-2.5, start_y=2.5):
  delta = 1e-7
  # We start at some random point
  x = start_x
  y = start_y
  moves = []
  moves.append((x,y))
  for step in range(steps_number):
      # We calculate gradient
      grad_x = (func(x+delta,y)-func(x,y))/delta
      grad_y = (func(x,y+delta)-func(x,y))/delta
      # We move in direction of -gradient
      # Learning rate is how quickly we should move
      x = x-learning_rate*grad_x
      y = y-learning_rate*grad_y    
      moves.append((x,y))
  visualize_gradient_descent(moves=moves, func=func)

interact(gradient_descent_grad_delta, steps_number=(1,100,1), learning_rate=(0.01,2.0,0.01), start_x=(-3.0, 3.0, 0.01), start_y=(-3.0, 3.0, 0.01))

# Gradient descent using tensors

In [None]:
def func(x,y):
    fun = (x*x/5.0+y*y)**0.5
    return fun

def gradient_descent_grad_tensor(steps_number=10, learning_rate=0.5, start_x=-2.5, start_y=2.5):
    x = torch.tensor(start_x)
    y = torch.tensor(start_y)
    moves = []
    moves.append((x.item(),y.item()))
    for step in range(steps_number):
        # We will need gradients for this tensors
        x.requires_grad = True
        y.requires_grad = True
        z = func(x,y)
        # Calculate gradients and put them into *.grad
        z.backward()
        # Without gradient tracking it will run faster
        with torch.no_grad():
            x = x-learning_rate*x.grad
            y = y-learning_rate*y.grad
            moves.append((x.item(),y.item()))

    visualize_gradient_descent(moves, func)

interact(gradient_descent_grad_tensor, steps_number=(1,100,1), learning_rate=(0.01,2.0,0.01), start_x=(-3.0, 3.0, 0.01), start_y=(-3.0, 3.0, 0.01))

# Gradient descent with model

Why do we need all this extra stuff for solving same problem? Because our current problem is very simple, once we get to complicated stuff, all this extra stuff will pay off.

We will introduce a few classes:
* MyModel - represent current parameters that we optimizing, can evaluate function that we need to optimize
* MyL1Loss - can evaluate distance between our target values and our current values
* MyOptimizer - can perform one gradient step

This classes look random, but it very similar to what Pytorch uses



In [None]:
class MyModel:
    def __init__(self, start_x, start_y):
        self.x = torch.tensor(start_x, requires_grad=True)
        self.y = torch.tensor(start_y, requires_grad=True)
    
    def forward(self):
        return func(self.x, self.y)
  
    def __call__(self, *input, **kwargs):
        return self.forward()
    
    def parameters(self):
        return [{'params':[self.x, self.y]}]

class MyL1Loss:
    def forward(self, input, target):
        return (input-target).abs()
    def __call__(self, *input, **kwargs):
        return self.forward(*input)

class MyOptimizer:
    def __init__(self, param_groups, learning_rate):
        self.param_groups = param_groups
        self.learning_rate = learning_rate
    
    def zero_grad(self):
        for group in self.param_groups:
            for p in group['params']:
                if p.grad is not None:
                    p.grad.detach_()
                    p.grad.zero_()

    def step(self):
        for group in self.param_groups:
            for p in group['params']:
                d_p = p.grad.data
                p.data.add_(-self.learning_rate, d_p)

def func(x,y):
    fun = (x*x/5.0+y*y)**0.5
    return fun

def gradient_descent_with_model(steps_number=10, learning_rate=0.5, start_x=-2.5, start_y=2.5):
    target = torch.tensor(0.0)
    model = MyModel(start_x, start_y)
    loss = MyL1Loss()
    optimizer = MyOptimizer(model.parameters(), learning_rate)
    moves = []
    moves.append((model.x.item(),model.y.item()))
    for step in range(steps_number):
        optimizer.zero_grad()
        output = model()
        error = loss(output, target)
        error.backward()
        optimizer.step()
        moves.append((model.x.item(),model.y.item()))
    
    visualize_gradient_descent(moves, func)

interact(gradient_descent_with_model, steps_number=(1,100,1), learning_rate=(0.01,2.0,0.01), start_x=(-3.0, 3.0, 0.01), start_y=(-3.0, 3.0, 0.01))

# Gradient descent with Pytorch model

In [None]:
class PytorchModel(nn.Module):
    def __init__(self, start_x, start_y):
        super(PytorchModel, self).__init__()
        self.x = nn.Parameter(torch.tensor(start_x))
        self.y = nn.Parameter(torch.tensor(start_y))

    def forward(self):
        return func(self.x, self.y)

def func(x,y):
    fun = (x*x/5.0+y*y)**0.5
    return fun

def gradient_descent_with_pytorch_model(steps_number=10, learning_rate=0.5, start_x=-2.5, start_y=2.5):
    target = torch.tensor(0.0)
    model = PytorchModel(start_x, start_y)
    loss = nn.L1Loss()
    optimizer = optim.SGD(model.parameters(), learning_rate)
    moves = []
    moves.append((model.x.item(),model.y.item()))
    for step in range(steps_number):
        optimizer.zero_grad()
        output = model()
        error = loss(output, target)
        error.backward()
        optimizer.step()
        moves.append((model.x.item(),model.y.item()))
    
    visualize_gradient_descent(moves, func)

interact(gradient_descent_with_pytorch_model, steps_number=(1,100,1), learning_rate=(0.01,2.0,0.01), start_x=(-3.0, 3.0, 0.01), start_y=(-3.0, 3.0, 0.01))

# Machine learning
In previous examples, a function which we were optimizing was defined by the formula:
$z(x,y)=\sqrt{\frac{x^2}{5}+y^2}$

---


When we do a gradient descent on a function defined by data we could call it a machine learning. 

---


Let's say we need to find a line that approximate data points. In this way, we don't have an explicit formula for a function to optimize, but a function is defined by data. But we still can use a gradient descent.

In [None]:
class LineModel(nn.Module):
    def __init__(self, start_x, start_y):
        super(LineModel, self).__init__()
        self.line_coeficient = nn.Parameter(torch.tensor(start_x))
        self.line_offset = nn.Parameter(torch.tensor(start_y))

    def forward(self, x):
        return self.line_coeficient*x+self.line_offset

# Function defined by data
line_coeficient = 0.75
line_offset = -1.0
number_of_points = 200
points_x = torch.FloatTensor(number_of_points).uniform_(-3.0, 3.0)
noise = torch.FloatTensor(number_of_points).uniform_(-0.5, 0.5)
points_y = line_coeficient*points_x+line_offset + noise
def func(x,y):
    with torch.no_grad():
        model = LineModel(x,y)
        loss = nn.L1Loss()
        output = model(points_x)
        return loss(output, points_y)
func = np.vectorize(func)

def gradient_descent_with_line_model(steps_number=10, learning_rate=0.5, start_x=-2.5, start_y=2.5, show_input_space=True, show_model_space=True):
    target = points_y
    model = LineModel(start_x, start_y)
    loss = nn.L1Loss()
    optimizer = optim.SGD(model.parameters(), learning_rate)
    moves = []
    moves.append((model.line_coeficient.item(),model.line_offset.item()))
    for step in range(steps_number):
        optimizer.zero_grad()
        output = model(points_x)
        error = loss(output, target)
        error.backward()
        optimizer.step()
        moves.append((model.line_coeficient.item(),model.line_offset.item()))

    def model_prediction(x):
      with torch.no_grad():
        points_x = torch.from_numpy(x).view(-1, 1).float()
        points_y = model(points_x)
        return model(points_x).view(-1).numpy()
        
    if show_input_space:
        visualize_points(points_x, points_y, target_func=lambda x: x*line_coeficient+line_offset, model_prediction_func=model_prediction)

    if show_model_space:
        visualize_gradient_descent(
            moves,
            func,
            xgrid=(-3.0, 3.0, 1.0),
            ygrid=(-3.0, 3.0, 1.0),
            labels=('Mean of error', 'Line coeficient', 'Line offset'))

interact(gradient_descent_with_line_model, steps_number=(1,100,1), learning_rate=(0.01,2.0,0.01), start_x=(-3.0, 3.0, 0.01), start_y=(-3.0, 3.0, 0.01))

# Data Provider

Here we define a class that we will be using for all problems which will provide us with a data. In this case it will be a little bit upgrated line feating data with 5 segments

In [None]:
import torch

class DataProvider:
    def __create_data(self, seed, x_start, x_end, y_min, y_max, y_noise, number_of_segments, number_of_points_per_segment):
        torch.manual_seed(seed)
        segment_distances = torch.FloatTensor(number_of_segments+1).uniform_(0.0, 1.0)
        segment_distances[0] = 0.0
        segment_points_x = segment_distances.cumsum(dim=0)
        segment_points_x = segment_points_x*((x_end-x_start)/segment_points_x[-1])+x_start
        segment_points_y = torch.FloatTensor(number_of_segments+1).uniform_(y_min, y_max)
        data = torch.FloatTensor(number_of_segments*number_of_points_per_segment)
        target = torch.FloatTensor(number_of_segments*number_of_points_per_segment).uniform_(-y_noise, y_noise)
        __target_func = ([], [])
        for segment_id in range(number_of_segments):
            segments_points_start = segment_id*number_of_points_per_segment
            segments_points_end = segments_points_start + number_of_points_per_segment
            x_start = segment_points_x[segment_id]
            x_end = segment_points_x[segment_id+1]
            y_start = segment_points_y[segment_id]
            y_end = segment_points_y[segment_id+1]
            __target_func[0].append([x_start, x_end])
            __target_func[1].append([y_start, y_end])
            p = torch.FloatTensor(number_of_points_per_segment).uniform_(0.0, 1.0)
            x = p*(x_end-x_start)+x_start
            y = p*(y_end-y_start)+y_start
            data[segments_points_start:segments_points_end] = x
            target[segments_points_start:segments_points_end] += y
        perm = torch.randperm(data.size(0))
        data = data[perm]
        target = target[perm]

        return (data, target, __target_func)

    def create_case_data(self, case):
        seed = case
        x_start = -3.0
        x_end = 3.0
        y_min = -2.0
        y_max = 2.0
        y_noise = 0.1
        number_of_segments = 5
        number_of_points_per_segment = 20
        data, target, __target_func = self.__create_data(seed, x_start, x_end, y_min, y_max, y_noise, number_of_segments, number_of_points_per_segment)
        data_size = data.size(0)
        train_data = (data[:data_size//2].view(-1, 1), target[:data_size//2].view(-1, 1))
        test_data = (data[data_size//2:].view(-1, 1), target[data_size//2:].view(-1, 1))
        return (train_data, test_data, __target_func)

train_data, test_data, __target_func = DataProvider().create_case_data(7)
visualize_points(*train_data, test_points=test_data, target_func=__target_func)

# Neural network

Since line model can not feet 5 segments, we will turn to the help of neural network.

In [None]:
class TutorialModel(nn.Module):
    def __init__(self):
        super(TutorialModel, self).__init__()
        self.linear1 = nn.Linear(1, 8)
        self.linear2 = nn.Linear(8, 1)

    def forward(self, x):
        x = self.linear1(x)
        x = torch.sigmoid(x)
        x = self.linear2(x)
        return x

    def calc_error(self, output, target):
        # This is loss function
        return ((output-target)**2).mean()

class TrainingContext():
    def __init__(self, number_of_steps):
        self.moves = []
        self.number_of_steps = number_of_steps
        self.step = 0
    
    def increase_step(self):
        self.step += 1

class Solution():
    # Return trained model
    def train_model(self, train_data, train_target, context):
        # Model represent our neural network
        model = TutorialModel()
        # Optimizer used for training neural network
        optimizer = optim.SGD(model.parameters(), lr=0.2)
        # save initial parameters
        context.moves.append(torch.nn.utils.parameters_to_vector(model.parameters()))
        while context.step <= context.number_of_steps:
            # model.parameters()...gradient set to zero
            optimizer.zero_grad()
            # evaluate model => model.forward(data)
            output = model(train_data)
            # calculate error
            error = model.calc_error(output, train_target)
            # calculate deriviative of model.forward() and put it in model.parameters()...gradient
            error.backward()
            # print progress of the learning
            self.print_stats(context.step, error)
            # update model: model.parameters() -= lr * gradient
            optimizer.step()
            #from torch.nn.utils import parameters_to_vector, vector_to_parameters
            context.increase_step()
            # save parameters for visualization
            with torch.no_grad():
                context.moves.append(torch.nn.utils.parameters_to_vector(model.parameters()))
        return model

    def print_stats(self, step, error):
        if step % 1000 == 0:
            print("Step = {} Error = {}".format(step, error.item()))

seed = 7
number_of_steps = 10000
train_data, test_data, __target_func = DataProvider().create_case_data(seed)
context = TrainingContext(number_of_steps)
model = Solution().train_model(*train_data, context)
output = model(test_data[0])
error = model.calc_error(output, test_data[1])
print("Test error = {}".format(error))

def visualize_all(vis_dim1 = 0, vis_dim2 = 1, vis_step = 10, vis_base_step = number_of_steps-1, min_dist_between_steps = 0.05, show_input_space = True, show_model_space = True):
    if show_input_space:
        def model_prediction(x):
          with torch.no_grad():
            points_x = torch.from_numpy(x).view(-1, 1).float()
            points_y = model(points_x)
            return model(points_x).view(-1).numpy()
        visualize_points(*train_data, test_points=test_data, target_func=__target_func, model_prediction_func=model_prediction)

    if show_model_space:
        moves = [(x[vis_dim1].item(), x[vis_dim2].item()) for x in context.moves]
        dim1_data = [x[vis_dim1] for x in context.moves]
        dim2_data = [x[vis_dim2] for x in context.moves]
        min_x = min(dim1_data)
        max_x = max(dim1_data)
        diff_x = max_x-min_x
        min_y = min(dim2_data)
        max_y = max(dim2_data)
        diff_y = max_y-min_y
        final_parameters = context.moves[vis_base_step]
        def func(x,y):
            with torch.no_grad():
                model = TutorialModel()
                model_parameters = final_parameters.clone()
                model_parameters[vis_dim1] = x
                model_parameters[vis_dim2] = y
                torch.nn.utils.vector_to_parameters(model_parameters, model.parameters())
                output = model(train_data[0])
                return model.calc_error(output, train_data[1])
        func = np.vectorize(func)

        eps = 1e-7
        visualize_gradient_descent(
            moves,
            func,
            xgrid=(min_x-diff_x/2, max_x+diff_x/2+eps, (2*diff_x)/vis_step),
            ygrid=(min_y-diff_y/2, max_y+diff_y/2+eps, (2*diff_y)/vis_step),
            anotated_steps={vis_base_step: 'Visualization base'},
            labels=('Mean of error', 'Vis dim1', 'Vis dim2'),
            min_dist_between_steps=min_dist_between_steps)

parameters_size = context.moves[0].size(0)
moves_size = len(context.moves)
interact(visualize_all, vis_dim1=(0, parameters_size-1, 1), vis_dim2=(0, parameters_size-1, 1), vis_step=(3, 1000, 1), vis_base_step=(-1,moves_size-1,1), min_dist_between_steps=(0.0, 1.0, 0.01))