In [1]:
import numpy as np

In [2]:
def mse(y_true, y_pred):
    return np.mean(np.power(y_true - y_pred, 2))

def mse_grad(y_true, y_pred):
    return 2 * (y_pred - y_true) / np.size(y_true)

def binary_cross_entropy(y_true, y_pred):
    return np.mean(-y_true * np.log(y_pred) - (1 - y_true) * np.log(1 - y_pred))

def binary_cross_entropy_grad(y_true, y_pred): # wrt y_pred
    return ((1 - y_true) / (1 - y_pred) - y_true / y_pred) / np.size(y_true)

def cross_entropy(y_true,y_pred,epsilon=1e-15):
    y_pred = np.clip(y_pred, epsilon, 1.0-epsilon)
    return np.mean(-y_true*np.log(y_pred)) 

def cross_entropy_grad(y_true,y_pred,epsilon=1e-15):
    y_pred = np.clip(y_pred, epsilon, 1.0-epsilon)
    return np.mean(-y_true/y_pred) 

In [3]:
y_pred = np.random.randn(4)
y_pred = np.exp(y_pred)/np.sum( np.exp(y_pred) ) 
y = np.zeros(4)
y[2] = 1 

In [4]:
print(y)
print(y_pred)

[0. 0. 1. 0.]
[0.10982305 0.29057753 0.13858901 0.46101041]


In [5]:
cross_entropy_grad(y,y_pred)

-1.8038947767041258

In [6]:
import numpy as np

def cross_entropy_loss(y_true, y_pred):
    """
    Calculate the cross-entropy loss.

    Parameters:
    - y_true: 2D array with one-hot encoded true class labels (shape: [num_samples, num_classes]).
    - y_pred: 2D array with predicted probabilities (shape: [num_samples, num_classes]).

    Returns:
    - float: Cross-entropy loss.
    """
    epsilon = 1e-15  # Small constant to avoid log(0)
    num_samples = len(y_true)

    # Clip predicted probabilities to avoid log(0)
    y_pred = np.clip(y_pred, epsilon, 1 - epsilon)

    # Calculate cross-entropy loss
    loss = -np.sum(y_true * np.log(y_pred)) / num_samples
    return loss

def cross_entropy_loss_grad(y_true, y_pred):
    """
    Calculate the gradient of the cross-entropy loss with respect to the predicted probabilities.

    Parameters:
    - y_true: 2D array with one-hot encoded true class labels (shape: [num_samples, num_classes]).
    - y_pred: 2D array with predicted probabilities (shape: [num_samples, num_classes]).

    Returns:
    - 2D array: Gradient of the cross-entropy loss with respect to y_pred (shape: [num_samples, num_classes]).
    """
    epsilon = 1e-15  # Small constant to avoid division by zero
    num_samples = len(y_true)

    # Clip predicted probabilities to avoid division by zero
    y_pred = np.clip(y_pred, epsilon, 1 - epsilon)

    # Calculate the gradient of the cross-entropy loss
    grad = (-y_true / y_pred) / num_samples
    return grad

# Example usage
num_classes = 3
num_samples = 5

# Generate random one-hot encoded true labels and predicted probabilities
y_true = np.eye(num_classes)[np.random.choice(num_classes, num_samples)]
y_pred = np.random.rand(num_samples, num_classes)

# Calculate cross-entropy loss
loss = cross_entropy_loss(y_true, y_pred)
print(f"Cross-entropy loss: {loss:.4f}")

# Calculate the gradient of cross-entropy loss
grad = cross_entropy_loss_grad(y_true, y_pred)
print("Gradient of cross-entropy loss:")
print(grad)


Cross-entropy loss: 0.7182
Gradient of cross-entropy loss:
[[-0.         -0.23898495 -0.        ]
 [-0.24996778 -0.         -0.        ]
 [-0.66807594 -0.         -0.        ]
 [-0.         -0.         -1.39910054]
 [-0.         -0.20787214 -0.        ]]


In [7]:
y = [2,3, 4]

np.argmax(y)

2

In [8]:
y = np.array([i for i in range(16)])

In [14]:
y.reshape(2,8)

array([[ 0,  1,  2,  3,  4,  5,  6,  7],
       [ 8,  9, 10, 11, 12, 13, 14, 15]])

In [15]:
import numpy as np

# Example arrays
array1 = np.array([1, 2, 3, 4, 5])
array2 = np.array(['a', 'b', 'c', 'd', 'e'])

# Generate shuffled indices
shuffled_indices = np.arange(len(array1))
np.random.shuffle(shuffled_indices)

# Shuffle both arrays using the same indices
shuffled_array1 = array1[shuffled_indices]
shuffled_array2 = array2[shuffled_indices]

# Print the original and shuffled arrays
print("Original array1:", array1)
print("Original array2:", array2)
print("\nShuffled array1:", shuffled_array1)
print("Shuffled array2:", shuffled_array2)


Original array1: [1 2 3 4 5]
Original array2: ['a' 'b' 'c' 'd' 'e']

Shuffled array1: [4 5 3 2 1]
Shuffled array2: ['d' 'e' 'c' 'b' 'a']


In [19]:
xx = (np.arange(10))
np.random.shuffle(xx)
xx

array([3, 0, 2, 7, 9, 6, 5, 8, 1, 4])

In [20]:
xx[:4]

array([3, 0, 2, 7])

In [21]:
xx[4:8]

array([9, 6, 5, 8])

In [22]:
xx[8:12]

array([1, 4])

In [23]:
8/3

2.6666666666666665

In [24]:
8//3

2

In [29]:
batch_size = 8
(17+batch_size-1)//batch_size

3

In [34]:
x = np.array([_ for _ in range(16)]).reshape(4,4)

In [42]:
x = x.reshape(4,2,2)
x

array([[[ 0,  1],
        [ 2,  3]],

       [[ 4,  5],
        [ 6,  7]],

       [[ 8,  9],
        [10, 11]],

       [[12, 13],
        [14, 15]]])

In [45]:
for i in range(3):
    print(np.sum(x, axis=i))
    print()

[[24 28]
 [32 36]]

[[ 2  4]
 [10 12]
 [18 20]
 [26 28]]

[[ 1  5]
 [ 9 13]
 [17 21]
 [25 29]]



In [74]:
x = np.arange(16).reshape(-1,2)

In [51]:
np.size(x, axis=1)

2

In [53]:
x

array([[ 0,  1],
       [ 2,  3],
       [ 4,  5],
       [ 6,  7],
       [ 8,  9],
       [10, 11],
       [12, 13],
       [14, 15]])

In [55]:
np.mean(x, axis=1, keepdims=True)

array([[ 0.5],
       [ 2.5],
       [ 4.5],
       [ 6.5],
       [ 8.5],
       [10.5],
       [12.5],
       [14.5]])

In [75]:
x = x-np.max(x,axis=0)
x

array([[-14, -14],
       [-12, -12],
       [-10, -10],
       [ -8,  -8],
       [ -6,  -6],
       [ -4,  -4],
       [ -2,  -2],
       [  0,   0]])

In [76]:
x = np.exp(x) / np.sum( np.exp(x), axis=0, keepdims=True)
x

array([[7.18993625e-07, 7.18993625e-07],
       [5.31268423e-06, 5.31268423e-06],
       [3.92557218e-05, 3.92557218e-05],
       [2.90062731e-04, 2.90062731e-04],
       [2.14328979e-03, 2.14328979e-03],
       [1.58368885e-02, 1.58368885e-02],
       [1.17019658e-01, 1.17019658e-01],
       [8.64664814e-01, 8.64664814e-01]])

In [78]:
np.sum(x, axis=0,keepdims=True)

array([[1., 1.]])

In [126]:
x = np.arange(12).reshape(4,3)

In [127]:
x

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

In [128]:
out_grad = np.random.randn(4,1)
out_grad

array([[ 0.56187735],
       [-0.40159985],
       [ 1.56968335],
       [-0.4155947 ]])

In [129]:
n = np.size(x,axis=0)

In [134]:
input = x.T[1]
np.dot( (np.identity(n) - input.T )*input, out_grad) 

array([[-28.9294163 ],
       [-31.09769306],
       [-18.50351023],
       [-33.64724065]])

In [131]:
grad = np.hstack([ np.dot( (np.identity(n) - input.T )*input, out_grad) for input in x.T  ])

In [132]:
grad

array([[-19.23103107, -28.9294163 , -41.25653381],
       [-20.43583063, -31.09769306, -44.38828776],
       [ -9.812931  , -18.50351023, -29.82282174],
       [-22.97138337, -33.64724065, -46.95183021]])

In [None]:
input = x.T[2]
