In [1]:
from IPython.core.display import HTML
with open ("../style.css", "r") as file:
    css = file.read()
HTML(css)

# 2D Random Walk

In [2]:
import random as rnd
import math
import numpy as np
rnd.seed(42)

The function `walk` takes one parameter:
* `n` is the number of steps that are taken.

The function performs a two dimensional [random walk](https://en.wikipedia.org/wiki/Random_walk) of length `n` in a square grid.  At every step, the randowm walker chooses a direction from the set
$$ \{ \texttt{north},  \texttt{east},  \texttt{south},  \texttt{west} \} $$
and walks a distance of length $1$ in the choosen direction. 

The function returns two `numpy` arrays `X` and `Y` that are both of length `n + 1`.
For all $i \in \{0, 1,\cdots, n\}$ we have that `X[i]` is the *x-coordinate* of
the position of the random walker after the $i^\textrm{th}$ step, while `Y[i]` is the *y-coordinate*.


In [None]:
def walk(n):
    X = np.zeros(n + 1, dtype=int)   # initialize X to contain n+1 zeros
    Y = np.zeros(n + 1, dtype=int)   # initialize Y to contain n+1 zeros
    "your code here"
    return X, Y  

In [None]:
X, Y = walk(10)
X, Y

In [None]:
import matplotlib.pyplot as plt
import seaborn           as sns

The function `show_walk(n)` creates a random walk of `n` steps and displays it.

In [None]:
def show_walk(n):
    X, Y = walk(n) 
    plt.figure(figsize=(12, 12))
    sns.set(style='darkgrid')
    size = 2 * math.trunc(math.sqrt(n))
    if n <= 1_001:
        step_size = 1
    elif n <= 40_000:
        step_size = 10
    else:
        step_size = 20
    plt.xticks(ticks=range(-size, size, step_size))
    plt.yticks(ticks=range(-size, size, step_size))
    plt.plot(X, Y)
    plt.title(f'Random walk of length {n}')

In [None]:
for n in [100, 1_000, 10_000, 40_000, 160_000, 250_000]:
    show_walk(n)

The function `average_distance(trials, n)` performs `trials` random walks of length `n`.  It returns a  `numpy` array of length `n + 1` containing the *average* distances of the walker from the origin after `i` steps where $i \in \{0,1,\cdots, n\}$.

In [None]:
def average_distance(trials, n):
    "your code here"

For testing, define `trials` as `10_000`.  To get accurate results, we should set the number of trials to a million. 

In [None]:
%%time
trials = 10_000
Distance = average_distance(trials, 100)
Distance

In [None]:
def plot_distance(Distance):
    X = np.array(range(101))
    Y = Distance
    plt.figure(figsize=(12, 10))
    sns.set(style='darkgrid')
    plt.xticks(ticks=range(0, 101, 10))
    plt.scatter(X, Y, c='b', s=4) 
    plt.xlabel('number of steps')
    plt.ylabel('distance from origin')
    plt.title('Average distance of a random walker from the origin')

In [None]:
plot_distance(Distance)

In [None]:
def plot_logarithmic_distance(Distance):
    X = np.log(np.array(range(1, 101)))
    Y = np.log(Distance[1:])
    plt.figure(figsize=(12, 10))
    sns.set(style='darkgrid')
    plt.scatter(X, Y, c='b', s=4) 
    plt.xlabel('logarithm of number of steps')
    plt.ylabel('logarithm of distance from origin')
    plt.title('Average distance of a random walker from the origin, log scale')

In [None]:
plot_logarithmic_distance(Distance)

In [None]:
import sklearn.linear_model as lm

The function `linear_regression` takes one input:
* `Distance` is a numpy array.  `Distance[i]` is the average distance
  of the random walker after the $i^{th}$ step.
  
Our assumption is that $i$ and `Distance[i]` are related by a power law
of the form
$$ \texttt{Distance}[i] = \alpha \cdot i^\beta. $$
The function estimates $\alpha$ and $\beta$ via an appropriate variation of linear regression and returns the pair $(\alpha, \beta)$.

In [None]:
def linear_regression(Distance):
    "your code her"
    return 𝛼, 𝛽 

In [None]:
𝛼,𝛽 = linear_regression(Distance)

Finally, we can print the formula we have found.

In [None]:
print(f'Distance[i] = {𝛼} * i^{𝛽}')