In [1]:
import numpy as np

In [2]:
def mse(y_true, y_pred):
    return np.mean(np.power(y_true - y_pred, 2))

def mse_grad(y_true, y_pred):
    return 2 * (y_pred - y_true) / np.size(y_true)

def binary_cross_entropy(y_true, y_pred):
    return np.mean(-y_true * np.log(y_pred) - (1 - y_true) * np.log(1 - y_pred))

def binary_cross_entropy_grad(y_true, y_pred): # wrt y_pred
    return ((1 - y_true) / (1 - y_pred) - y_true / y_pred) / np.size(y_true)

def cross_entropy(y_true,y_pred,epsilon=1e-15):
    y_pred = np.clip(y_pred, epsilon, 1.0-epsilon)
    return np.mean(-y_true*np.log(y_pred)) 

def cross_entropy_grad(y_true,y_pred,epsilon=1e-15):
    y_pred = np.clip(y_pred, epsilon, 1.0-epsilon)
    return np.mean(-y_true/y_pred) 

In [3]:
y_pred = np.random.randn(4,3)
y_pred = np.exp(y_pred)/np.sum( np.exp(y_pred),axis=0) 
y_pred

array([[0.63352933, 0.07021526, 0.19086505],
       [0.18790572, 0.15119385, 0.08012539],
       [0.13921242, 0.61820793, 0.53989059],
       [0.03935252, 0.16038296, 0.18911897]])

In [4]:
y = np.array( [np.random.randint(0,4) for i in range(3)] )
y
y = np.array([[1 if val==i else 0 for i in range(4)] for val in y]).T
y

array([[0, 0, 1],
       [0, 1, 0],
       [1, 0, 0],
       [0, 0, 0]])

In [5]:
np.sum(np.argmax(y_pred,axis=0) == np.argmax(y,axis=0))

0

In [6]:
cross_entropy_grad(y,y_pred)

-1.5863830274245758

In [7]:
import numpy as np

def cross_entropy_loss(y_true, y_pred):
    """
    Calculate the cross-entropy loss.

    Parameters:
    - y_true: 2D array with one-hot encoded true class labels (shape: [num_samples, num_classes]).
    - y_pred: 2D array with predicted probabilities (shape: [num_samples, num_classes]).

    Returns:
    - float: Cross-entropy loss.
    """
    epsilon = 1e-15  # Small constant to avoid log(0)
    num_samples = len(y_true)

    # Clip predicted probabilities to avoid log(0)
    y_pred = np.clip(y_pred, epsilon, 1 - epsilon)

    # Calculate cross-entropy loss
    loss = -np.sum(y_true * np.log(y_pred)) / num_samples
    return loss

def cross_entropy_loss_grad(y_true, y_pred):
    """
    Calculate the gradient of the cross-entropy loss with respect to the predicted probabilities.

    Parameters:
    - y_true: 2D array with one-hot encoded true class labels (shape: [num_samples, num_classes]).
    - y_pred: 2D array with predicted probabilities (shape: [num_samples, num_classes]).

    Returns:
    - 2D array: Gradient of the cross-entropy loss with respect to y_pred (shape: [num_samples, num_classes]).
    """
    epsilon = 1e-15  # Small constant to avoid division by zero
    num_samples = len(y_true)

    # Clip predicted probabilities to avoid division by zero
    y_pred = np.clip(y_pred, epsilon, 1 - epsilon)

    # Calculate the gradient of the cross-entropy loss
    grad = (-y_true / y_pred) / num_samples
    return grad

# Example usage
num_classes = 3
num_samples = 5

# Generate random one-hot encoded true labels and predicted probabilities
y_true = np.eye(num_classes)[np.random.choice(num_classes, num_samples)]
y_pred = np.random.rand(num_samples, num_classes)

# Calculate cross-entropy loss
loss = cross_entropy_loss(y_true, y_pred)
print(f"Cross-entropy loss: {loss:.4f}")

# Calculate the gradient of cross-entropy loss
grad = cross_entropy_loss_grad(y_true, y_pred)
print("Gradient of cross-entropy loss:")
print(grad)


Cross-entropy loss: 1.1623
Gradient of cross-entropy loss:
[[-0.         -1.44399623 -0.        ]
 [-0.         -2.3464741  -0.        ]
 [-0.34301329 -0.         -0.        ]
 [-0.         -0.35776464 -0.        ]
 [-0.25717843 -0.         -0.        ]]


In [8]:
y = [2,3, 4]

np.argmax(y)

2

In [9]:
y = np.array([i for i in range(16)])

In [10]:
y.reshape(2,8)

array([[ 0,  1,  2,  3,  4,  5,  6,  7],
       [ 8,  9, 10, 11, 12, 13, 14, 15]])

In [11]:
import numpy as np

# Example arrays
array1 = np.array([1, 2, 3, 4, 5])
array2 = np.array(['a', 'b', 'c', 'd', 'e'])

# Generate shuffled indices
shuffled_indices = np.arange(len(array1))
np.random.shuffle(shuffled_indices)

# Shuffle both arrays using the same indices
shuffled_array1 = array1[shuffled_indices]
shuffled_array2 = array2[shuffled_indices]

# Print the original and shuffled arrays
print("Original array1:", array1)
print("Original array2:", array2)
print("\nShuffled array1:", shuffled_array1)
print("Shuffled array2:", shuffled_array2)


Original array1: [1 2 3 4 5]
Original array2: ['a' 'b' 'c' 'd' 'e']

Shuffled array1: [5 4 1 2 3]
Shuffled array2: ['e' 'd' 'a' 'b' 'c']


In [12]:
xx = (np.arange(10))
np.random.shuffle(xx)
xx

array([8, 7, 1, 2, 6, 5, 4, 9, 3, 0])

In [13]:
xx[:4]

array([8, 7, 1, 2])

In [14]:
xx[4:8]

array([6, 5, 4, 9])

In [15]:
xx[8:12]

array([3, 0])

In [16]:
8/3

2.6666666666666665

In [17]:
8//3

2

In [18]:
batch_size = 8
(17+batch_size-1)//batch_size

3

In [19]:
x = np.array([_ for _ in range(16)]).reshape(4,4)

In [20]:
x = x.reshape(4,2,2)
x

array([[[ 0,  1],
        [ 2,  3]],

       [[ 4,  5],
        [ 6,  7]],

       [[ 8,  9],
        [10, 11]],

       [[12, 13],
        [14, 15]]])

In [21]:
for i in range(3):
    print(np.sum(x, axis=i))
    print()

[[24 28]
 [32 36]]

[[ 2  4]
 [10 12]
 [18 20]
 [26 28]]

[[ 1  5]
 [ 9 13]
 [17 21]
 [25 29]]



In [22]:
x = np.arange(16).reshape(-1,2)

In [23]:
np.size(x, axis=1)

2

In [24]:
x

array([[ 0,  1],
       [ 2,  3],
       [ 4,  5],
       [ 6,  7],
       [ 8,  9],
       [10, 11],
       [12, 13],
       [14, 15]])

In [25]:
np.mean(x, axis=1, keepdims=True)

array([[ 0.5],
       [ 2.5],
       [ 4.5],
       [ 6.5],
       [ 8.5],
       [10.5],
       [12.5],
       [14.5]])

In [26]:
x = x-np.max(x,axis=0)
x

array([[-14, -14],
       [-12, -12],
       [-10, -10],
       [ -8,  -8],
       [ -6,  -6],
       [ -4,  -4],
       [ -2,  -2],
       [  0,   0]])

In [27]:
x = np.exp(x) / np.sum( np.exp(x), axis=0, keepdims=True)
x

array([[7.18993625e-07, 7.18993625e-07],
       [5.31268423e-06, 5.31268423e-06],
       [3.92557218e-05, 3.92557218e-05],
       [2.90062731e-04, 2.90062731e-04],
       [2.14328979e-03, 2.14328979e-03],
       [1.58368885e-02, 1.58368885e-02],
       [1.17019658e-01, 1.17019658e-01],
       [8.64664814e-01, 8.64664814e-01]])

In [28]:
np.sum(x, axis=0,keepdims=True)

array([[1., 1.]])

In [29]:
x = np.arange(12).reshape(4,3)

In [30]:
x

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

In [31]:
out_grad = np.random.randn(4,3)
out_grad

array([[ 1.15073218, -0.48409823, -0.37161561],
       [ 0.40576168,  1.88404322,  0.75831481],
       [ 1.19988577, -0.69621211,  0.61551282],
       [-0.09065144,  0.64985617,  0.20136055]])

In [32]:
out_grad[:,1:2]

array([[-0.48409823],
       [ 1.88404322],
       [-0.69621211],
       [ 0.64985617]])

In [33]:
n = np.size(x,axis=0)

In [34]:
index = 2
input = x[:,index:index+1]
print(f"{input}")
print(f"{out_grad[:,index:index+1]}")
np.dot( (np.identity(n) - input.T )*input, out_grad[:,index:index+1]) 

[[ 2]
 [ 5]
 [ 8]
 [11]]
[[-0.37161561]
 [ 0.75831481]
 [ 0.61551282]
 [ 0.20136055]]


array([[ -21.11805416],
       [ -47.14548333],
       [ -76.57518923],
       [-109.84656011]])

In [35]:
np.size(x, axis=1) == np.size(out_grad, axis=1)

True

In [36]:
print(np.hstack( [np.dot( (np.identity(n)-x[:,i:i+1].T)*x[:,i:i+1], out_grad[:,i:i+1]) for i in range(np.size(x,axis=1)) ] ))
print()
print(np.hstack( [[np.dot( (np.identity(n)-x[:,i:i+1].T)*x[:,i:i+1], out_grad[:,i:i+1])] for i in range(np.size(x,axis=1)) ] ))


[[   0.           -9.16124985  -21.11805416]
 [ -21.58492488  -27.1724336   -47.14548333]
 [ -38.40510522  -65.61354608  -76.57518923]
 [ -69.22249272  -80.27295444 -109.84656011]]

[[[   0.        ]
  [ -21.58492488]
  [ -38.40510522]
  [ -69.22249272]
  [  -9.16124985]
  [ -27.1724336 ]
  [ -65.61354608]
  [ -80.27295444]
  [ -21.11805416]
  [ -47.14548333]
  [ -76.57518923]
  [-109.84656011]]]


In [37]:
grad = np.hstack([ np.dot( (np.identity(n) - input.T )*input, og.T) for input,og in zip(x.T, out_grad.T)  ])

In [38]:
grad

array([-39.50497575, -38.28769072, -32.30566116, -40.32083874,
       -61.01591553, -52.99564443, -65.40530206, -54.03325556,
       -81.9720865 , -77.43728125, -76.30475273, -79.01388919])

In [39]:
input = x.T[2]


In [40]:


z = np.random.randn(4,3)

In [41]:
z

array([[ 1.04787812,  0.0807142 , -0.72862933],
       [ 0.18005803,  0.25579053,  0.55576527],
       [-0.03993665,  0.01745217,  1.78805094],
       [ 0.67513089,  0.31858005,  1.4928962 ]])

In [42]:
p = np.zeros(z.shape)

In [43]:
p

array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.]])

In [44]:
z = np.array([_ for _ in range(16)]).reshape(4,4)
z

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [45]:
z*z

array([[  0,   1,   4,   9],
       [ 16,  25,  36,  49],
       [ 64,  81, 100, 121],
       [144, 169, 196, 225]])

In [46]:
eps = 1e-8

In [47]:
np.sqrt(z)+eps

array([[1.00000000e-08, 1.00000001e+00, 1.41421357e+00, 1.73205082e+00],
       [2.00000001e+00, 2.23606799e+00, 2.44948975e+00, 2.64575132e+00],
       [2.82842713e+00, 3.00000001e+00, 3.16227767e+00, 3.31662480e+00],
       [3.46410163e+00, 3.60555129e+00, 3.74165740e+00, 3.87298336e+00]])

In [48]:
func = lambda x,y : x**2 + y

In [49]:
func(4,1)

17

In [50]:
from sklearn.preprocessing import StandardScaler

In [68]:
x = np.arange(16).reshape(4,4)

In [69]:
x

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [70]:
(x-np.mean(x,axis=0,keepdims=True)) / np.std(x,axis=0,keepdims=True)

array([[-1.34164079, -1.34164079, -1.34164079, -1.34164079],
       [-0.4472136 , -0.4472136 , -0.4472136 , -0.4472136 ],
       [ 0.4472136 ,  0.4472136 ,  0.4472136 ,  0.4472136 ],
       [ 1.34164079,  1.34164079,  1.34164079,  1.34164079]])

In [66]:
np.std(x,axis=0,keepdims=True)

array([[4.47213595, 4.47213595, 4.47213595, 0.70710678]])

In [64]:
scaler = StandardScaler()
x[:,3:4] = scaler.fit_transform(x[:,3:4])
x

array([[ 0,  1,  2, -1],
       [ 4,  5,  6,  0],
       [ 8,  9, 10,  0],
       [12, 13, 14,  1]])

In [55]:
x

array([[ 0,  1,  2, -1],
       [ 4,  5,  6,  0],
       [ 8,  9, 10,  0],
       [12, 13, 14,  1]])

In [71]:
scaler = StandardScaler()
scaler.fit_transform(x)

array([[-1.34164079, -1.34164079, -1.34164079, -1.34164079],
       [-0.4472136 , -0.4472136 , -0.4472136 , -0.4472136 ],
       [ 0.4472136 ,  0.4472136 ,  0.4472136 ,  0.4472136 ],
       [ 1.34164079,  1.34164079,  1.34164079,  1.34164079]])