In [334]:
import torch

print('libraries imported')

libraries imported


##### 1. Mask it with the *idx* list for easier access and use only slicing instead of a loop.

In [335]:
x = torch.full(size=(13, 13), fill_value=1)
# x.new_full(size = (2, 2), fill_value = 2)
idx1 = [1, 6, 11] 
idx2 = [3, 4, 8, 9]
x[idx1] = 2     ## access the 2nd, 7th, 12th row using the mask
x[:, idx1] = 2  
x[idx2, 3:5] = 3
x[idx2, 8:10] = 3
x

tensor([[1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1],
        [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
        [1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1],
        [1, 2, 1, 3, 3, 1, 2, 1, 3, 3, 1, 2, 1],
        [1, 2, 1, 3, 3, 1, 2, 1, 3, 3, 1, 2, 1],
        [1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1],
        [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
        [1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1],
        [1, 2, 1, 3, 3, 1, 2, 1, 3, 3, 1, 2, 1],
        [1, 2, 1, 3, 3, 1, 2, 1, 3, 3, 1, 2, 1],
        [1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1],
        [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
        [1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1]])

##### 2. Eigendecomposition

In [336]:
square_matrix_size = (20, 20)

x = torch.randn(square_matrix_size, dtype = torch.float64)  ## use same dtypes or the multiplicaion fails
y = torch.diagflat(torch.arange(1, 21, dtype = torch.float64))

z = torch.mm(torch.mm(x, y), x.inverse()) ## M * [1..20] diagonals * M_inverse

result = torch.linalg.eigvals(z) ## directly computes the eigenvalues instead of torch.linalg.eig
result

tensor([ 1.0000+0.j, 20.0000+0.j, 19.0000+0.j, 18.0000+0.j,  2.0000+0.j,  3.0000+0.j,
         4.0000+0.j, 17.0000+0.j, 16.0000+0.j, 15.0000+0.j,  5.0000+0.j,  6.0000+0.j,
        14.0000+0.j, 13.0000+0.j, 12.0000+0.j,  7.0000+0.j,  8.0000+0.j, 11.0000+0.j,
        10.0000+0.j,  9.0000+0.j], dtype=torch.complex128)

##### 3. Flops per second

In [337]:
from time import perf_counter

from torch import square
time_start = perf_counter()

square_matrix_size = (5000, 5000)

x = torch.randn(square_matrix_size, dtype = torch.float64)
y = torch.randn(square_matrix_size, dtype = torch.float64)

z = torch.mm(x, y)
flops_estimate = pow(2 * square_matrix_size[0], 3) ## case for a MM multiplication

time_stop = perf_counter()
total_time = time_stop - time_start

print('({} x {}) multiplication took {} seconds'.format(square_matrix_size[0], square_matrix_size[1], total_time))
print('Estimated {} floating point operations per second.'.format(flops_estimate / total_time))
print('With Result: {}'.format(z))

(5000 x 5000) multiplication took 2.0359696999985317 seconds
Estimated 491166445159.1402 floating point operations per second.
With Result: tensor([[  15.4884,  -24.6764,   81.7239,  ...,   -9.3399,  -44.9573,
          -30.7097],
        [  46.1723,  -16.1209,   70.5612,  ...,   17.6019,    5.7808,
         -108.8492],
        [  41.6562,   75.4598,   10.2209,  ...,   -3.7426,  -17.2806,
           22.4497],
        ...,
        [  70.2391,    3.2419,  -13.0488,  ...,  -14.0357,   -4.9220,
          -36.7500],
        [ -58.3854,   15.0983, -144.9469,  ...,  -27.2994,   58.5320,
         -101.4557],
        [  44.7101, -132.8069,  -49.0600,  ...,  -95.6766,   -0.7823,
           70.8347]], dtype=torch.float64)


##### 4. Playing with strides

Building the same function that performs a row multiplication, one time using for loops and plain Python and the other time using tensors in PyTorch, to ultimately check the time difference as a performance measure.

In [348]:
def mul_row(t1): ## t1 -> tensor
    if (t1.dim() > 2):
        print('Tensor has more than two dimensions!')
        raise OverflowError
    else:
        flattened_tensor = t1.view(-1)
        total_size = flattened_tensor.size(dim=0)
        multiplying_factor = 1.0
        
        for i in range(1, total_size):
            if (i % t1.size()[1]  == 0):
                multiplying_factor += 1.0
            flattened_tensor[i] *= multiplying_factor
    
    result = flattened_tensor.view(t1.size()) ## back to original shape
    return result

def mul_row_fast(t1):
    n_rows = t1.size()[0]
    
    t2 = torch.arange(1.0, n_rows + 1).view(n_rows, 1) ## t2 can be seen as the multiplicaton tensor
    result = torch.mul(t1, t2) 
    
    return result

Checking the performance time difference.

In [357]:
from time import perf_counter

m = torch.full((2000, 800), 2.0)

time_start = perf_counter()
print(mul_row(m))
time_stop = perf_counter()    
total_time = time_stop - time_start
print('Original row multiplication took {} seconds'.format(total_time))

print('')

time_start = perf_counter()
print(mul_row_fast(m))
time_stop = perf_counter()    
total_time = time_stop - time_start
print('Fast PyTorch row multiplication took {} seconds'.format(total_time))      

tensor([[2.0000e+00, 2.0000e+00, 2.0000e+00,  ..., 2.0000e+00, 2.0000e+00,
         2.0000e+00],
        [4.0000e+00, 4.0000e+00, 4.0000e+00,  ..., 4.0000e+00, 4.0000e+00,
         4.0000e+00],
        [6.0000e+00, 6.0000e+00, 6.0000e+00,  ..., 6.0000e+00, 6.0000e+00,
         6.0000e+00],
        ...,
        [3.9960e+03, 3.9960e+03, 3.9960e+03,  ..., 3.9960e+03, 3.9960e+03,
         3.9960e+03],
        [3.9980e+03, 3.9980e+03, 3.9980e+03,  ..., 3.9980e+03, 3.9980e+03,
         3.9980e+03],
        [4.0000e+03, 4.0000e+03, 4.0000e+03,  ..., 4.0000e+03, 4.0000e+03,
         4.0000e+03]])
Original row multiplication took 16.533173599998918 seconds

tensor([[2.0000e+00, 2.0000e+00, 2.0000e+00,  ..., 2.0000e+00, 2.0000e+00,
         2.0000e+00],
        [8.0000e+00, 8.0000e+00, 8.0000e+00,  ..., 8.0000e+00, 8.0000e+00,
         8.0000e+00],
        [1.8000e+01, 1.8000e+01, 1.8000e+01,  ..., 1.8000e+01, 1.8000e+01,
         1.8000e+01],
        ...,
        [7.9840e+06, 7.9840e+06, 7.9840