## **Improve code efficiency using Linear Algebra**

### **Import libraries**

In [2]:
# !conda install -c pytorch pytorch --yes

Collecting package metadata (current_repodata.json): done
Solving environment: done


  current version: 23.3.1
  latest version: 23.5.0

Please update conda by running

    $ conda update -n base -c defaults conda

Or to minimize the number of packages updated during conda update use

     conda install conda=23.5.0



## Package Plan ##

  environment location: /home/ifkash/miniconda3/envs/py39

  added / updated specs:
    - pytorch


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    certifi-2023.5.7           |   py39h06a4308_0         152 KB
    cudatoolkit-11.3.1         |       h2bc3f7f_2       549.3 MB
    cudnn-8.2.1                |       cuda11.3_0       865.2 MB
    cupti-11.3.1               |                0         8.0 MB
    future-0.18.3              |   py39h06a4308_0         671 KB
    libprotobuf-3.20.3         |       he621ea3_0         2.4 MB
    magma-2.7.0         

In [3]:
import torch

In [6]:
import random

random.seed(123)

### **For-loops and dot products**

In [4]:
b = 0.
x = [1.2, 2.2]
w = [3.3, 4.3]

output = b
for xj, wj in zip(x, w):
    output += xj * wj

output

13.42

In [5]:
b = torch.tensor([0.])
x = torch.tensor([1.2, 2.2])
w = torch.tensor([3.3, 4.3])

x.dot(w) + b

tensor([13.4200])

### **Benchmark**

In [7]:
b = 0.
x = [random.random() for _ in range(10_000)]
w = [random.random() for _ in range(10_000)]

In [10]:
t_b = torch.tensor(b)
t_x = torch.tensor(x)
t_w = torch.tensor(w)

In [11]:
def for_loop(x, w, b):
    output = b
    
    for xj, wj in zip(x, w):
        output += xj * wj
    
    return output

In [12]:
def pytorch_dot(x, w, b):
    return x.dot(w) + b

In [13]:
%timeit for_loop(x, w, b)

394 µs ± 14.2 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [14]:
%timeit pytorch_dot(t_x, t_w, t_b)

2.96 µs ± 21.4 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


### **Dealing with multiple training examples via matrix multiplication**

In [17]:
b = 0.
X = [
    [1.2, 2.2],
    [4.4, 5.5]
]
w = [3.3, 4.3]

outputs = []

for x in X:
    output = b

    for xj, wj in zip(x, w):
        output += xj * wj
    
    outputs.append(output)

outputs

[13.42, 38.17]

In [18]:
b = torch.tensor([0.])
X = torch.tensor([
    [1.2, 2.2],
    [4.4, 5.5]
])
w = torch.tensor([3.3, 4.4])

X.matmul(w) + b

tensor([13.6400, 38.7200])

### **Benchmark**

In [19]:
b = 0.
X = [
    [random.random() for _ in range(10_000)]
    for i in range(1_000)
]
w = [random.random() for _ in range(10_000)]

In [20]:
def for_loop(X, w, b):
    outputs = []

    for x in X:
        output = b

        for xj, wj in zip(x, w):
            output += xj * wj

        outputs.append(output)

    return outputs

In [21]:
%timeit for_loop(X, w, b)

378 ms ± 7.41 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [22]:
t_b = torch.tensor(b)
t_X = torch.tensor(X)
t_w = torch.tensor(w)

In [23]:
def pytorch_implementation(X, w, b):
    return X.matmul(w) + b

In [24]:
%timeit pytorch_implementation(t_X, t_w, t_b)

927 µs ± 49.5 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


## **Multiplying two matrices**

In [25]:
X = torch.rand(100, 10)
W = torch.rand(50, 10)

R = torch.matmul(X, W.T)

In [26]:
R.shape

torch.Size([100, 50])

## **Broadcasting: computations and unequal tensor shapes**

In [27]:
a = torch.tensor([1.1, 2.1, 3.1, 4.1])
b = torch.tensor([5.4, 5.5, 5.6, 5.7])

a + b

tensor([6.5000, 7.6000, 8.7000, 9.8000])

In [28]:
A = torch.tensor([[1.1, 2.1, 3.1, 4.1],
                  [1.2, 2.2, 3.2, 4.2]])
b = torch.tensor([5.4, 5.5, 5.6, 5.7])

A + b

tensor([[6.5000, 7.6000, 8.7000, 9.8000],
        [6.6000, 7.7000, 8.8000, 9.9000]])