In [1]:
import pandas as pd
import numpy as np
import math
from scipy.stats import describe, randint
from scipy.interpolate import interp1d
import plotly.graph_objects as go
import os
import matplotlib.pyplot as plt
from mpl_toolkits import mplot3d
import seaborn as sns
import scipy.sparse


sns.set()

# Methods for exporting tables and images to latex
def saveimg(filename, fig):
    file_path = os.path.join(os.getcwd(), 'images')
    os.makedirs(file_path, exist_ok=True)
    fig.savefig(os.path.join(file_path, filename))
    
def savetable(filename, df):
    file_path = os.path.join(os.getcwd(), 'tables')
    os.makedirs(file_path, exist_ok=True)
    pd.DataFrame(df).to_latex(os.path.join(file_path, filename),escape = False, index = False)
    
val = [1,4,12,36,100,284,780,2172,5916,16268,44100,120292,324932,881500,2374444,6416596,17245332,46466676,124658732,335116620,897697164,2408806028,6444560484,17266613812,46146397316,123481354908,329712786220,881317491628]

# Methods used in the exercise

In [2]:
def random_neighbour(points):

    # Getting dimensions of points
    dimension = np.shape(points)
    
    # Generate if going up or down, do this for all points
    up_or_down = 2*np.random.randint(2, size=dimension[0]) - 1
    
    # Generate which diretion to go in for all points
    direction = np.random.randint(dimension[1], size=dimension[0])
    
    # Create new matrix, create a binary matrix of the direction
    indptr = range(len(direction)+1)
    data = np.ones(len(direction))
    points_to_change = scipy.sparse.csr_matrix((data, direction, indptr)).toarray()
    
    # Add the binary matrix multiplied by direction to the old points
    new_points = points + (points_to_change.T*up_or_down).T
    
    # Generating probabilities
    probabilities = np.ones(dimension[0])*1/(dimension[1]*2)
    
    return new_points, probabilities

def random_neighbour_avoiding(random_walks):
    
    # Start by swapping axis i and k so it is possible to iterate over all random walks
    random_walks_list = np.swapaxes(random_walks, 0, 1)
    
    # Creating lists for new points probabilities
    new_points = []
    probabilities = []
    
    # Iterate over random walks
    for random_walk in random_walks_list:
        
        # Making rw to list so it's possible to use 'not in'
        random_walk = random_walk.tolist()
        last_point = random_walk[-1]
        
        # Creating list for possible points
        viable_points = []
        
        # Get dimension for last point
        dimension = np.shape(last_point)[0]
        
        # Generate possible neigbours
        for n in range(dimension):
            
            change = np.zeros(dimension)
            change[n] = 1
            
            point_up = last_point - change
            point_down = last_point + change
            
            viable_points.append(list(point_up))
            viable_points.append(list(point_down))
        
        # Sort out points thats already in walk
        viable_points = list(filter(lambda x: x not in random_walk, viable_points))
        length = len(viable_points)
        
        # If no points left, append last one
        if length == 0:
            new_points.append(last_point)
            probabilities.append(0)
            
        # Select on of the remaining points and calculate probability
        else:   
            probabilities.append(1/length)
            new_point_index = np.random.randint(length, size=1)[0]
            new_points.append(viable_points[new_point_index])
        
    return new_points, probabilities

def is_self_avoiding(random_walk):
    return int(len(np.unique(random_walk, axis=0)) == len(random_walk))

def generate_weights(random_walks, weights, probabilities, do_resample=False):
        
    # Get indexes where probabilities is different from zero
    indexes_zero_prob = np.nonzero(probabilities)[0]
    
    # Get indexes where weights is different from zero
    indexes = np.intersect1d(np.nonzero(weights)[0], indexes_zero_prob)
    
    # Create new weights
    new_weights = np.zeros(len(weights))
    
    # Start by swapping axis i and k so it is possible to iterate over all random walks
    random_walks_list = np.swapaxes(random_walks, 0, 1)
    
    # Looping over and update weights
    if(do_resample):
        for index in indexes_zero_prob:
            new_weights[index] = is_self_avoiding(random_walks_list[index])/probabilities[index]
    else:
        for index in indexes:
            new_weights[index] = is_self_avoiding(random_walks_list[index])/probabilities[index]*weights[index]
        
    return new_weights      

def resample(random_walks, weights):
    
    # Calculate probabilities
    probabilities = weights/np.sum(weights)
    random_walks_list = np.swapaxes(random_walks, 0, 1)
    
    # Randomize replacement indices
    replacement_index = np.random.choice(np.arange(len(random_walks_list)), len(random_walks_list), p = probabilities)
    
    # Create resampled random_walks
    resamples_random_walks = list(map(lambda x: random_walks_list[x], replacement_index))
    
    return np.swapaxes(resamples_random_walks, 0, 1).tolist()

def init_walk (d, k, N , self_avoiding = True, do_resample = True):
    
    # The random_walk array has the following structure compared 
    # to the lecture notes random_walk[k][i][d], where:
    # k = step number k
    # i = simulation number
    # d = dimension 
    
    random_walks = []
    weights = [[1] * N]
    
    random_walks.append(np.zeros((N, d)))
    
    for step in range(k):
        
        if self_avoiding:
            step_all, probabilities = random_neighbour_avoiding(random_walks)
        else:
            step_all, probabilities = random_neighbour(random_walks[step])
            
        random_walks.append(step_all)
        weights.append(generate_weights(random_walks, weights[step], probabilities, do_resample=do_resample))
        
        if do_resample:
            random_walks = resample(random_walks, weights[-1])
            
        print("-", end="")
        
    if do_resample:
        return np.cumprod(np.mean(weights[1:], axis=1))
    else:
        return np.mean(weights[1:], axis=1), np.sqrt(np.var(weights[1:], axis=1)/N)*1.96
    
def estimate_parameters(cn):
    
    length = len(cn)
    
    # Finding optimal parameters
    y = np.log(cn)
    X = np.hstack((np.ones(length), np.arange(length) + 1, np.log(np.arange(length) + 1))).reshape(3,length).T
    theta =(np.linalg.inv(X.T@X)@X.T)@y

    # Un transform
    theta = [np.e**theta[0], np.e**theta[1], theta[2] + 1]
    
    return theta 

## Task 3

In [3]:
# Simulating data
n = 100
cn,interval = init_walk(2, n, 10**4, self_avoiding=False, do_resample=False)

----------------------------------------------------------------------------------------------------

In [4]:
# Generating table for report
pd.set_option("display.precision", 2)
pd.set_option('display.float_format', lambda x: '%.1f' % x if (x < 10**5) else '%.3e' % x)

df = pd.DataFrame(cn, columns=["$\tilde{c}_n(2)$"])
df["$I_{95\%}$"] = interval
df["$n$"] = pd.Series(np.arange(1,101))
df = df[["$n$", "$\tilde{c}_n(2)$", "$I_{95\%}$"]]
df = df.loc[[0,1,2,3,4,9,19,24]]

savetable("random_walk_results.tex",df)

## Task 4

In [5]:
# Simulating data
n = 100
cn,interval = init_walk(2, n, 10**4, self_avoiding=True, do_resample=False)

----------------------------------------------------------------------------------------------------

In [8]:
# Generating table for report
pd.set_option("display.precision", 2)
pd.set_option('display.float_format', lambda x: '%.1f' % x if (x < 10**5) else '%.3e' % x)

df = pd.DataFrame(cn, columns=["$\tilde{c}_n(2)$"])
df["$I_{95\%}$"] = interval
df["$n$"] = pd.Series(np.arange(1,101))
df = df[["$n$", "$\tilde{c}_n(2)$", "$I_{95\%}$"]]
df = df.loc[[0,1,2,3,4,9,19,49,99]]

savetable("self_avoding_results.tex",df)

## Task 5

In [20]:
# Simulating data
n = 100
num_tries = 10
cn_all = np.zeros((num_tries, n))
for sim in range(num_tries):
    cn_all[sim] = init_walk(2, n, 10**4, self_avoiding=True, do_resample=True)

----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

In [23]:
cn = np.mean(cn_all, axis = 0)
cn_interval = np.sqrt(np.var(cn_all, axis=0)/num_tries)*1.96

In [30]:
# Generating table for report
pd.set_option("display.precision", 2)
pd.set_option('display.float_format', lambda x: '%.1f' % x if (x < 10**5) else '%.3e' % x)
df = pd.DataFrame(cn, columns=["$\tilde{c}_n(2)$"])
df["$I_{95\%}$"] = cn_interval
df["$n$"] = pd.Series(np.arange(1,101))
df = df[["$n$", "$\tilde{c}_n(2)$", "$I_{95\%}$"]]
df = df.loc[[0,1,2,3,4,9,19,49,99]]

savetable("resampling_results.tex",df)

## Task 6

In [85]:
# Simulating data
n = 100
cn = init_walk(2, n, 10**4, self_avoiding=True, do_resample=True)

----------------------------------------------------------------------------------------------------

In [93]:
pd.set_option("display.precision", 3)
pd.set_option('display.float_format', lambda x: '%.3f' % x if (x < 10**5) else '%.3e' % x)

steps = [5, 10, 15, 20, 30, 50, 100]
parameters = []

for step in steps:
    parameter = estimate_parameters(cn[:step])
    parameters.append({"$n$" : step, "$A_2$": parameter[0], "$\mu_2$": parameter[1], "$\gamma_2$": parameter[2]})
    
df = pd.DataFrame(parameters)
savetable("parameters_2d.tex", df)

## Variance of Theta

In [35]:
cn = init_walk(2, 50, 10**2, self_avoiding=True, do_resample=True)
lncn = np.log(cn)
theta = estimate_parameters(cn)
theta = np.array([np.log(theta[0]), np.log(theta[1]), theta[2] - 1])
N = np.arange(1, len(cn) + 1)
X = np.vstack((np.ones(len(N)), N, np.log(N)))
y = theta@X
error = np.array(lncn - y)
variance_lncn = np.var(error)*np.linalg.inv(X@X.T)

--------------------------------------------------

## Multiple runs of Theta

In [73]:
thetas = []
for attempt in np.arange(1,11):
    cn = init_walk(2, 100, 10**4, self_avoiding = True, do_resample = True)
    theta = estimate_parameters(cn)
    thetas.append({"Run" : attempt, "$A_2$" : theta[0], "$\mu_2$" : theta[1], "$\gamma_2$" : theta[2]})


----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

In [82]:
pd.reset_option('all')
df = pd.DataFrame(thetas)
df_var = pd.DataFrame(df.var()).transpose()
df_var = df_var.set_index("Run")
savetable("multiple_theta.tex", df)
savetable("multiple_theta_variance.tex", df_var)


: boolean
    use_inf_as_null had been deprecated and will be removed in a future
    version. Use `use_inf_as_na` instead.





: boolean
    use_inf_as_null had been deprecated and will be removed in a future
    version. Use `use_inf_as_na` instead.




## Task 9

In [3]:
dimensions = [4,5,6,7,8,9,10,15,20,50]

parameters = []

for dimension in dimensions:
    cn = init_walk(dimension, 100, 10**4, self_avoiding=True, do_resample=True)
    parameter = estimate_parameters(cn)
    
    parameters.append({"$d$" : dimension, "$A_2$": parameter[0], "$\mu_2$": parameter[1], "$\gamma_2$": parameter[2]})

----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

In [32]:
pd.set_option("display.precision", 3)

df = pd.DataFrame(parameters)
df.columns = ["$d$", "$A_d$", "$\mu_d$", "$\gamma_d$"]
df["$\mu_d^{Graham}$"] = 2*df["$d$"]-1-1/(2*df["$d$"])-3/(2*df["$d$"])**2-16/(2*df["$d$"])**3
savetable("parameters_for_dimensions.tex", df[1:])

In [31]:
df

Unnamed: 0,$d$,$A_d$,$\mu_d$,$\gamma_d$,$\mu_d^{Graham}$
0,4,1.171,6.771,1.067,6.797
1,5,1.145,8.835,1.024,8.854
2,6,1.113,10.877,1.011,10.887
3,7,1.091,12.901,1.008,12.907
4,8,1.077,14.918,1.005,14.922
5,9,1.067,16.93,1.003,16.932
6,10,1.058,18.939,1.003,18.941
7,15,1.038,28.962,1.001,28.963
8,20,1.027,38.972,1.0,38.973
9,50,1.01,98.99,1.0,98.99


In [21]:
df.transpose()

Unnamed: 0,0,1,2,3,4
Test,1,2,3,4,5
Test2,1,2,3,4,5
