In [None]:
import numpy as np
import torch
from torch import nn
import tensorflow as tf

# Torch Tensor

Reference:
* https://pytorch.org/docs/stable/tensors.html

In [None]:
data = [[1, 2],[3, 4]]
x_data = torch.tensor(data)

In [None]:
np_array = np.array(data)
x_np = torch.from_numpy(np_array)

In [None]:
x_ones = torch.ones_like(x_data) # retains the properties of x_data
print(f"Ones Tensor: \n {x_ones} \n")

x_rand = torch.rand_like(x_data, dtype=torch.float) # overrides the datatype of x_data
print(f"Random Tensor: \n {x_rand} \n")

In [None]:
shape = (2,3,)
rand_tensor = torch.rand(shape)
ones_tensor = torch.ones(shape)
zeros_tensor = torch.zeros(shape)

print(f"Random Tensor: \n {rand_tensor} \n")
print(f"Ones Tensor: \n {ones_tensor} \n")
print(f"Zeros Tensor: \n {zeros_tensor}")

In [None]:
device = "cuda"
tensor = torch.rand(3,4).to(device)

print(f"Shape of tensor: {tensor.shape}")
print(f"Datatype of tensor: {tensor.dtype}")
print(f"Device tensor is stored on: {tensor.device}")

In [None]:
# We move our tensor to the GPU if available
if torch.cuda.is_available():
  tensor = tensor.to('cuda')

In [None]:
tensor = torch.ones(4, 4)
print('First row: ',tensor[0])
print('First column: ', tensor[:, 0])
print('Last column:', tensor[..., -1])
tensor[:,1] = 0
print(tensor)

In [None]:
# cat by column with dim=1
t1 = torch.cat([tensor, tensor, tensor], dim=1)
print(t1)

In [None]:
# cat by row with dim=0
t1 = torch.cat([tensor, tensor, tensor], dim=0)
print(t1)

In [None]:
print(f"tensor.shape: {tensor.shape}\n")

In [None]:
# This computes the matrix multiplication between two tensors. y1, y2, y3 will have the same value
y1 = tensor @ tensor.T
y2 = tensor.matmul(tensor.T)

y3 = torch.rand_like(tensor)
torch.matmul(tensor, tensor.T, out=y3)


# This computes the element-wise product. z1, z2, z3 will have the same value
z1 = tensor * tensor
z2 = tensor.mul(tensor)

z3 = torch.rand_like(tensor)
torch.mul(tensor, tensor, out=z3)

In [None]:
agg = tensor.sum()
# for python numerical value
agg_item = agg.item()
print(agg_item, type(agg_item))

In [None]:
# in-place operation
print(tensor, "\n")
tensor.add_(5)
print(tensor)

In [None]:
# conversion between numpy and tensor
t = torch.ones(5)
print(f"t: {t}")
n = t.numpy()
print(f"n:{n}")

t.add_(1)
print(f"t: {t}")
print(f"n: {n}")

n = np.zeros(5)
t = torch.from_numpy(n)
np.add(n, 1, out=n)
print(f"t: {t}")
print(f"n: {n}")

# Keras sparse_categorical_crossentropy() vs. Torch nn.CrossEntropyLoss()

Reference:
* https://pytorch.org/docs/1.9.1/generated/torch.nn.CrossEntropyLoss.html#torch.nn.CrossEntropyLoss
* https://www.tensorflow.org/api_docs/python/tf/keras/metrics/sparse_categorical_crossentropy

1. Formula

Binary cross entropy - https://ml-cheatsheet.readthedocs.io/en/latest/loss_functions.html#cross-entropy:
$$ \mathbb{J}(W) = -\frac{1}{N} \sum_{i=1}^N [y_i \log(\hat{y_i}) + (1-y_i) \log(1 - \hat{y_i})] $$

Categorical cross entropy - https://datascience.stackexchange.com/questions/41921/sparse-categorical-crossentropy-vs-categorical-crossentropy-keras-accuracy#answer-41923:
$$ \mathbb{J}(W) = -\frac{1}{N} \sum_{s \in S} \sum_{c \in C} 1_{s \in c} \log p(s \in C) $$

2. Implementation

Keras' SparseCategoricalCrossentropy:
* https://github.com/keras-team/keras/blob/v2.8.0/keras/losses.py#L1834-L1863

In [None]:
y_true = [1, 2]
y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]]

In [None]:
torch.tensor(y_pred).shape

In [None]:
torch.tensor(y_true, dtype=torch.long).shape

In [None]:
loss = tf.keras.losses.SparseCategoricalCrossentropy(reduction=tf.keras.losses.Reduction.NONE)(y_true, y_pred)
np.average(loss.numpy())

In [None]:
loss = tf.keras.losses.SparseCategoricalCrossentropy()(y_true, y_pred)
loss.numpy()

In [None]:
# manual calculation for SparseCategoricalCrossentropy
[-np.log(y_pred[i][y_index]) for i, y_index in enumerate(y_true)]

In [None]:
nn.CrossEntropyLoss(reduction="none", weight=)(torch.tensor(y_pred), torch.tensor(y_true, dtype=torch.long))

In [None]:
torch.tensor(y_pred)

In [None]:
input = torch.randn(3, 5, requires_grad=True)
target = torch.empty(3, dtype=torch.long).random_(5)

In [None]:
y_pred

In [None]:
torch.zeros(torch.tensor(y_pred).shape)

In [None]:
from torch.nn.functional import one_hot

def sparse_cross_entropy(y_true, y_pred):
    return torch.mean(-torch.log(torch.sum(one_hot(torch.tensor(y_true), num_classes=len(y_pred[0])) * torch.tensor(y_pred), axis=1)))

In [None]:
sparse_cross_entropy(y_true, y_pred)

In [None]:
input

# Torch Autograd

Reference:
* https://towardsdatascience.com/papers-simplified-gradients-without-backpropagation-96e8533943fc

In [None]:
import torch

def function(theta):
    return (theta[0]+theta[1])*theta[2]

theta=torch.tensor([3.,-1.,2.], requires_grad=True)
result=function(theta)

In [None]:
result.backward()
print(result)
print(theta.grad)