# What's new in Mojo🔥 SDK v0.5

## Keyword parameters

In [None]:
from tensor import Tensor
from algorithm import vectorize

struct SquareMatrix[dtype: DType = DType.float32, dim: Int = 4]():
  var mat: Tensor[dtype]

  fn __init__(inout self, val: SIMD[dtype,1] = 5):
    self.mat = Tensor[dtype](self.dim,self.dim)
    alias simd_width = simdwidthof[dtype]()
    @parameter
    fn fill_val[simd_width: Int](idx: Int) -> None:
        self.mat.simd_store(idx, self.mat.simd_load[simd_width](idx).splat(val))
    vectorize[simd_width, fill_val](self.mat.num_elements())

  fn __getitem__(self,x:Int,y:Int)->SIMD[dtype,1]:
    return self.mat[x,y]

  fn print(self):
    print(self.mat)

In [None]:
SquareMatrix().print()

In [None]:
SquareMatrix(val=12).print()

In [None]:
SquareMatrix[DType.float64](10).print()

In [None]:
SquareMatrix[DType.float64,dim=3](1).print()

In [None]:
SquareMatrix[dtype=DType.float64,dim=3](val=1.5).print()

Keyword argument in `__getitem__()`

In [None]:
let sm = SquareMatrix()
sm.print()

print()
print('Keyword argument in __getitem__()')
print(sm[x=0, y=3])

## Automatic parameterization of functions

* Parameters are automatically added as input parameters on the function
* Function argument input parameters can now be referenced within the signature of the function

In [None]:
from math import mul
fn multiply(sm: SquareMatrix, val: SIMD[sm.dtype,1]) -> Tensor[sm.dtype]:
    alias simd_width: Int = simdwidthof[sm.dtype]()
    let result_tensor = Tensor[sm.dtype](sm.mat.shape())

    @parameter
    fn vectorize_multiply[simd_width: Int](idx: Int) -> None:
        result_tensor.simd_store[simd_width](idx, mul[sm.dtype,simd_width](sm.mat.simd_load[simd_width](idx),val))
    vectorize[simd_width, vectorize_multiply](sm.mat.num_elements())
    return result_tensor

fn main():
    let sm = SquareMatrix(5)
    let res = multiply(sm,100.0)
    print(res)
main()

## Load and save Tensors + String enhancements

In [None]:
from tensor import Tensor
from algorithm import vectorize
from time import now
from memory import memcpy

struct SquareMatrix[dtype: DType = DType.float32, dim: Int = 4]():
  var mat: Tensor[dtype]

  fn __init__(inout self, val:SIMD[dtype,1] = 5):
    self.mat = Tensor[dtype](self.dim,self.dim)
    alias simd_width = simdwidthof[dtype]()
    @parameter
    fn fill_val[simd_width: Int](idx: Int) -> None:
        self.mat.simd_store(idx, self.mat.simd_load[simd_width](idx).splat(val))
    vectorize[simd_width, fill_val](self.mat.num_elements())

  fn print(self):
    print(self.mat)

  fn prepare_filename(self, fname: String)->String:
    var fpath = fname
    if fpath.count('.') < 2:
        fpath += '.data'
    fpath = fpath.replace(".","_"+self.mat.spec().__str__()+".")
    if fpath.find('/'):
        fpath = './'+fpath
    return fpath

  fn save(self, fname: String='saved_matrix') raises -> String:
    let fpath = self.prepare_filename(fname)
    self.mat.tofile(fpath)
    print('File saved:',fpath)
    return fpath

  @staticmethod
  fn load[dtype: DType,dim: Int](fpath:String) raises -> Tensor[dtype]:
    let load_mat = Tensor[dtype].fromfile(fpath)
    let new_tensor = Tensor[dtype](dim,dim)
    memcpy(new_tensor.data(),load_mat.data(),load_mat.num_elements())
    _ = load_mat
    return new_tensor
    

In [None]:
let m = SquareMatrix()
m.print()
let fpath = m.save('saved_matrix')

In [None]:
print('Loading Tensor from file:',fpath)
print()
let load_mat = SquareMatrix.load[DType.float32,4](fpath)
print(load_mat)

## Benchmark enhancements

Benchmark row-wise `mean()` of a matrix by vectorizing across colums and parallelizing across rows

In [None]:
from random import rand
let tx = rand[DType.float32](5,7)
print(tx)

In [None]:
from tensor import Tensor
from random import rand
import benchmark
from time import sleep
from algorithm import vectorize, parallelize

alias dtype = DType.float32
alias simd_width = simdwidthof[DType.float32]()

fn row_mean_naive[dtype: DType](t: Tensor[dtype]) -> Tensor[dtype]:
    var res = Tensor[dtype](t.dim(0),1)
    for i in range(t.dim(0)):
        for j in range(t.dim(1)):
            res[i] += t[i,j]
        res[i] /= t.dim(1)
    return res

fn row_mean_fast[dtype: DType](t: Tensor[dtype]) -> Tensor[dtype]:
    var res = Tensor[dtype](t.dim(0),1)
    @parameter
    fn parallel_reduce_rows(idx1: Int)->None:
        @parameter
        fn vectorize_reduce_row[simd_width: Int](idx2: Int) -> None:
            res[idx1] += t.simd_load[simd_width](idx1*t.dim(1)+idx2).reduce_add()
        vectorize[2*simd_width,vectorize_reduce_row](t.dim(1))
        res[idx1] /= t.dim(1)
    parallelize[parallel_reduce_rows](t.dim(0),t.dim(0))
    return res

fn main():
    let t = rand[dtype](1000,100000)
    var result = Tensor[dtype](t.dim(0),1)

    @parameter
    fn bench_mean():
        _ = row_mean_naive(t)
    
    @parameter
    fn bench_mean_fast():
        _ = row_mean_fast(t)

    let report = benchmark.run[bench_mean]()
    let report_fast = benchmark.run[bench_mean_fast]()
    report.print()
    report_fast.print()
    print("Speed up:",report.mean()/report_fast.mean())

main()

## SIMD enhancements

In [None]:
def main():
    alias dtype = DType.float32
    alias simd_width = simdwidthof[DType.float32]()

    let a = SIMD[dtype].splat(0.5)
    let b = SIMD[dtype].splat(2.5) 

    print("SIMD a:",a)
    print("SIMD b:",b)
    print()
    print("SIMD a.join(b):",a.join(b))
main()