# Código eficiente

## 1. Benchmarking

Usaremos las siguientes herramientas:
- @btime
- @benchmark
- BenchmarkTools.DEFAULT_PAREMETERS.samples
- BenchmarkTools.DEFAULT_PARAMETERS.seconds

In [None]:
using BenchmarkTools
using LinearAlgebra

In [None]:
A = rand(1000,1000)
@btime norm(A)

In [None]:
function tiempo_funcion()
    B = rand(1000, 1000)
    @btime norm($B)
end

tiempo_funcion()

In [None]:
A = rand(10_000, 10_000)
b = @benchmark norm(A)

In [None]:
b.times

In [None]:
b.times.*10^-9

In [None]:
b = @benchmark(norm($A), samples=10)

In [None]:
BenchmarkTools.DEFAULT_PARAMETERS.samples = 50

In [None]:
b = @benchmark norm($A)

In [None]:
BenchmarkTools.DEFAULT_PARAMETERS.seconds = 50

In [None]:
b = @benchmark norm($A)

## 2. Evitar tipos de datos abstractos 

Usemos las funciones `supertype()` y `subtype()`

In [None]:
function registro_juegos_ganados(ngames)
    games_won = []   # inicializa un arreglo
    for i = 1:ngames
        r = rand()
        if r >= 0.5
            push!(games_won, i)
        end
    end
    return games_won
end

ntrials = 1000
@btime registro_juegos_ganados(2)
@btime registro_juegos_ganados(ntrials)

In [None]:
function registro_juegos_ganados_v2(ngames)
    games_won = Int64[]   # inicializa un arreglo de tipo Int64
    for i = 1:ngames
        r = rand()
        if r >= 0.5
            push!(games_won, i)
        end
    end
    return games_won
end

ntrials = 1000
@btime registro_juegos_ganados_v2(2)
@btime registro_juegos_ganados_v2(ntrials);

In [None]:
h = []
g = Int64[]

In [None]:
@show typeof(h)

In [None]:
@show typeof(g)

## 3. Evitar variables de ámbito global

In [6]:
ntrials = 1000
allgames = rand(ntrials)

function registro_juegos_ganados_vglobal()
    games_won = Int64[]
    for (curi, curgame) in enumerate(allgames)
        if curgame >= 0.5
            push!(games_won, curi)
        end
    end
    return games_won
end


registro_juegos_ganados_vglobal();
registro_juegos_ganados_vglobal();

In [7]:
@btime registro_juegos_ganados_vglobal();

  120.114 μs (4499 allocations: 140.98 KiB)


In [9]:
function registro_juegos_ganados_vlocal(ntrials)
    allgames = rand(ntrials)
    games_won = Int64[]
    for (curi, curgame) in enumerate(allgames)
        if curgame >= 0.5
            push!(games_won, curi)
        end
    end
    return games_won
end


registro_juegos_ganados_vlocal(ntrials);
registro_juegos_ganados_vlocal(ntrials);

In [11]:
@btime registro_juegos_ganados_vlocal(ntrials);

  10.443 μs (10 allocations: 16.27 KiB)


Si es indispensable utilizar variables en el ámbito global, se decrararán con `const`. Aquí `const` no significa que el valor de la variable declarada no cambia, si no se refiere a que el tipo de la variable permancerá constante.  

In [12]:
const myglobalint = 1
myglobalint = 2
@show myglobalint

myglobalint = 2




2

In [13]:
myglobalint = 1.5

LoadError: invalid redefinition of constant myglobalint

## 4. Prealojamiento de memoria

En lugar de usar un vector de enteros que se inicializa vacío y añadirle elementos con la función `push!()`, es mejor crear un vector con una dimensión específica y se va agregando elemento a elemento en una posición determinada.

In [14]:
function record_games_won_v2(ngames)
    games_won = Int64[]
    for i = 1:ngames
        r = rand()
        if r >= 0.5
            push!(games_won,i)
        end
    end
    return games_won
end

ntrials = 1000
record_games_won_v2(ntrials);

In [15]:
@btime record_games_won_v2(ntrials);

  16.221 μs (9 allocations: 8.33 KiB)


In [16]:
function record_games_won_preallocate(ntrials)
    allgames = rand(ntrials)
    games_won = Vector{Int64}(undef,ntrials)
    gi = 1
    for (curi,curgame) in enumerate(allgames)
        if curgame >= 0.5
            games_won[gi] = curi
            gi += 1
        end
    end
    return games_won[1:gi-1]
end

ntrials = 1000
record_games_won_preallocate(ntrials);

In [17]:
@btime record_games_won_preallocate(ntrials);

  6.702 μs (3 allocations: 19.69 KiB)


## 5. Usar operaciones vectorizadas (Broadcasting)

In [21]:
function record_games_won_preallocate(ntrials) 
    allgames = rand(ntrials)
    games_won = findall(allgames.>= 0.5)
    return games_won
end

ntrials = 1000;

In [22]:
record_games_won_preallocate(ntrials); 
@btime record_games_won_preallocate(ntrials); 

  3.246 μs (5 allocations: 16.24 KiB)


In [None]:
?findall

## 6. No es necesario vectorizar siempre las funciones

In [26]:
function find_hypotenuse_vectorized(b,hi)
    return sqrt.(b.^2 + hi.^2)
end

b = rand(ntrials)
hi = rand(ntrials);

In [29]:
@btime find_hypotenuse_vectorized($b,$hi);  # usa la funcion vectorizada
@btime find_hypotenuse_vectorized.($b,$hi);  # vectoriza la llamada

  4.603 μs (4 allocations: 31.75 KiB)
  1.274 μs (1 allocation: 7.94 KiB)


In [30]:
function find_hypotenuse_forloop(b,hi)
    accum_vec = similar(b)  # preallocating memory
    for i = 1:length(b)
        accum_vec[i] = sqrt(b[i]^2 + hi[i]^2)
    end
    return accum_vec
end

b = rand(ntrials)
hi = rand(ntrials);

In [32]:
@btime find_hypotenuse_forloop($b,$hi);  # usa una funcion convencional con prealojamiento de memoria

  2.130 μs (1 allocation: 7.94 KiB)


Se recomienda utilizar la operación de _broadcasting_ vectorizando la llamada a las funciones que reciben como argumentos vectores.

## 7. Reutilizar la memoria

In [33]:
function find_sum_of_sqrt_vectors(nvectors)
    sumvector = Vector{Float64}(undef, nvectors)
    v = sqrt.(1:nvectors)
    for i = 1:nvectors
        sumvector[i] = sum(v[1:i])
    end
    return sumvector
end

ntrials = 1000
find_sum_of_sqrt_vectors(ntrials);

In [34]:
@btime find_sum_of_sqrt_vectors(ntrials);

  546.477 μs (1002 allocations: 3.98 MiB)


In [36]:
function find_sum_of_sqrt_vectors_reusemem(nvectors)
    sumvector = Vector{Float64}(undef, nvectors)
    v = Vector{Float64}(undef, nvectors)
    for i = 1:nvectors
        v[1:i] .= sqrt.(1:i)
        sumvector[i] = sum(v)
        v .= 0
    end
    return sumvector
end

ntrials = 1000
find_sum_of_sqrt_vectors_reusemem(ntrials);

In [37]:
@btime find_sum_of_sqrt_vectors_reusemem(ntrials);

  1.044 ms (2 allocations: 15.88 KiB)


En la segunda función se reutiliza la memoria apartada (_preallocated_) en el vector `v` por lo que no se hace ningún solicitud de memoria extra.

## 9. Use `@view` cuando no se necesite una copia de datos

In [39]:
using SparseArrays
using LinearAlgebra
using Random

In [49]:
A = sprand(500, 500, 0.1)

function set_sum(A, rowids, colids)
    S = sum(A[rowids, colids])
end

set_sum(A, randperm(100), randperm(100));


In [50]:
@btime set_sum($A, randperm(100), randperm(100));

  58.840 μs (17 allocations: 27.09 KiB)


In [51]:
function set_sum_view(A, rowids, colids)
    S = sum(view(A, rowids, colids))
end

set_sum_view(A, randperm(100), randperm(100));

In [52]:
@btime set_sum_view($A, randperm(100), randperm(100));

  265.419 μs (2 allocations: 1.75 KiB)


In [54]:
function find_sum_of_sqrt_vectors_copies(nvectors)
    sumvector =  Vector{Float64}(undef, nvectors)
    v = sqrt.(1:nvectors)
    for i = 1:nvectors
        sumvector[i] = sum(v[1:i])
    end
    return sumvector
end

ntrials = 1000
@btime find_sum_of_sqrt_vectors_copies(ntrials);

  539.435 μs (1002 allocations: 3.98 MiB)


In [55]:
function find_sum_of_sqrt_vectors_views(nvectors)
    sumvector =  Vector{Float64}(undef, nvectors)
    v = sqrt.(1:nvectors)
    for i = 1:nvectors
        sumvector[i] = sum(@view v[1:i])
    end
    return sumvector
end

ntrials = 1000
@btime find_sum_of_sqrt_vectors_views(ntrials);


  54.560 μs (2 allocations: 15.88 KiB)


El subarreglo provisto por `@view` solo "observa" los datos pero no hace copia ni referencia a ellos.