In [2]:
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))
from mlp_latest.mlp.ActivationFunctions import ActivationFunction
from TensorT.tensor_scratch import TensorT
from mlp_latest.mlp.mlp import MLP


ModuleNotFoundError: No module named 'ActivationFunctions'

In [None]:
a = TensorT([[1, 2], [3, 4]])
b = TensorT([[5, 6], [7, 8]])

c = a * b  # calls modified __add__

# Check result data
print("Result:", c)
print("Result shape:", c.shape)      # Should print (2, 2)
print("Result data:", c.data)

# Check graph info
print("Operation:", c._op)           # Should print 'add'
print("Parents:", c._parent)        # Should show (a, b)


Result: tensor:
[[5, 12],
 [21, 32]], shape: (2, 2)
Result shape: (2, 2)
Result data: [[5, 12], [21, 32]]
Operation: mul
Parents: (tensor:
[[1, 2],
 [3, 4]], shape: (2, 2), tensor:
[[5, 6],
 [7, 8]], shape: (2, 2))


In [None]:
a = TensorT([[1, 2], [3, 4]])
b = TensorT([[5, 6], [7, 8]])

a.grad = None
b.grad = None
# Call backward on c with default grad (i.e., dL/dc=1)
# c = a.tmatmul(b) 
c = a-b
# Should create computation graph node with backward_fn defined
c.backward()

print("Gradient w.r.t a:")
print(a.grad)  # Expect all ones matrix (same shape as a) because d(c)/d(a)=1 elementwise

print("Gradient w.r.t b:")
print(b.grad)  # Same as above

# print("Gradient w.r.t c:")
# print(c.grad)
# print("----")
# print(c.data)

Gradient w.r.t a:
[[1.0, 1.0], [1.0, 1.0]]
Gradient w.r.t b:
[[-1.0, -1.0], [-1.0, -1.0]]


In [None]:
# Test tsum_axis
t = TensorT([[1, 2, 3], [4, 5, 6]])  # shape (2,3)
print(t.tsum_axis(axis=1, keepdims=True))  # Should be [[6], [15]] shape (2,1)
print(t.tsum_axis(axis=0, keepdims=True))  # Should be [[5, 7, 9]] shape (1,3)

# Test tmaximum  
t2 = TensorT([[-1, 2], [3, -4]])
print(t2.tmaximum(0))  # Should be [[0, 2], [3, 0]] - ReLU effect

# Test tclip
t3 = TensorT([[-600, 2], [3, 600]])  
print(t3.tclip(-500, 500))  # Should be [[-500, 2], [3, 500]]


tensor:
[[6],
 [15]], shape: (2, 1)
tensor:
[[5, 7, 9]], shape: (1, 3)
tensor:
[[0, 2],
 [3, 0]], shape: (2, 2)
tensor:
[[-500, 2],
 [3, 500]], shape: (2, 2)


In [None]:
a = TensorT([[2.0]])
b = TensorT([[3.0]])
c = a + b

for i in range(5):   # 5 backward calls
    c.backward()
    print(f"After {i+1} backward calls: a.grad={a.grad}, b.grad={b.grad}")


After 1 backward calls: a.grad=[[1.0]], b.grad=[[1.0]]
After 2 backward calls: a.grad=[[2.0]], b.grad=[[2.0]]
After 3 backward calls: a.grad=[[3.0]], b.grad=[[3.0]]
After 4 backward calls: a.grad=[[4.0]], b.grad=[[4.0]]
After 5 backward calls: a.grad=[[5.0]], b.grad=[[5.0]]


In [None]:
import numpy as np
import math
# from tensor_scratch import TensorT

def test_activations():
    print("=== Testing Activation Functions ===")
    
    # Test input
    input_data = [[0.5, 0.0], [-1.0, 2.0], [0.25, -0.456]]
    z = TensorT(input_data)
    print(f"Input: {input_data}")
    print(f"Input tensor ID: {id(z)}")
    print()
    
    # Test ReLU
    print("--- ReLU Test ---")
    relu_out = ActivationFunction.relu(z)
    print(f"Output: {relu_out.data}")
    print(f"Expected: {[[max(0, x) for x in row] for row in input_data]}")
    print(f"Op tracked: '{relu_out._op}'")
    print(f"Parent ID matches: {id(z) == id(relu_out._parent[0])}")
    
    # Test gradient
    grad_input = [[1.0, 1.0], [1.0, 1.0]]  # Ones gradient from upstream
    relu_grad = relu_out.backward_fn(grad_input)
    expected_relu_grad = [[1.0 if x > 0 else 0.0 for x in row] for row in input_data]
    print(f"Gradient: {relu_grad}")
    print(f"Expected gradient: {expected_relu_grad}")
    print()
    
    # Test Sigmoid
    print("--- Sigmoid Test ---")
    sigmoid_out = ActivationFunction.sigmoid(z)
    numpy_sigmoid = 1 / (1 + np.exp(-np.array(input_data)))
    # print(f"Output: {np.array(sigmoid_out.data)}")
    print(f"Output: {sigmoid_out}")
    print(f"NumPy expected: {numpy_sigmoid}")
    print(f"Values match: {np.allclose(sigmoid_out.data, numpy_sigmoid, atol=1e-6)}")
    print(f"Op tracked: '{sigmoid_out._op}'")
    print(f"Parent ID matches: {id(z) == id(sigmoid_out._parent[0])}")
    
    # Test sigmoid gradient  
    sigmoid_grad = sigmoid_out.backward_fn(grad_input)
    # Expected: grad * sigmoid_out * (1 - sigmoid_out)
    s_vals = np.array(sigmoid_out.data)
    expected_sigmoid_grad = s_vals * (1 - s_vals)
    print(f"Gradient: {np.array(sigmoid_grad)}")
    print(f"Expected gradient: {expected_sigmoid_grad}")
    print()
    
    # Test Tanh
    print("--- Tanh Test ---")
    tanh_out = ActivationFunction.tanh(z)
    numpy_tanh = np.tanh(np.array(input_data))
    print(f"Output: {np.array(tanh_out.data)}")
    print(f"NumPy expected: {numpy_tanh}")
    print(f"Values match: {np.allclose(tanh_out.data, numpy_tanh, atol=1e-6)}")
    print(f"Op tracked: '{tanh_out._op}'")
    print(f"Parent ID matches: {id(z) == id(tanh_out._parent[0])}")
    
    # Test tanh gradient
    tanh_grad = tanh_out.backward_fn(grad_input)
    # Expected: grad * (1 - tanh²(x))
    t_vals = np.array(tanh_out.data)
    expected_tanh_grad = 1 - t_vals**2
    print(f"Gradient: {np.array(tanh_grad)}")
    print(f"Expected gradient: {expected_tanh_grad}")
    print()
    
    # Test computational graph structure
    print("--- Graph Structure Test ---")
    print(f"All operations tracked correctly: {all([relu_out._op == 'relu',sigmoid_out._op == 'sigmoid',tanh_out._op == 'tanh'])}")

    print(f"All parents point to same input: {all([relu_out._parent[0] is z,sigmoid_out._parent[0] is z,tanh_out._parent[0] is z])}")

    print(f"All have backward functions: {all([relu_out.backward_fn is not None,sigmoid_out.backward_fn is not None,tanh_out.backward_fn is not None])}")
# Run the test
test_activations()


=== Testing Activation Functions ===
Input: [[0.5, 0.0], [-1.0, 2.0], [0.25, -0.456]]
Input tensor ID: 139774007851136

--- ReLU Test ---
Output: [[0.5, 0], [0, 2.0], [0.25, 0]]
Expected: [[0.5, 0], [0, 2.0], [0.25, 0]]
Op tracked: 'relu'
Parent ID matches: True
Gradient: ([[1.0, 0], [0, 1.0]],)
Expected gradient: [[1.0, 0.0], [0.0, 1.0], [1.0, 0.0]]

--- Sigmoid Test ---
Output: tensor:
[[0.6224593312018546, 0.5],
 [0.2689414213699951, 0.8807970779778823],
 [0.5621765008857981, 0.3879351629339386]], shape: (3, 2)
NumPy expected: [[0.62245933 0.5       ]
 [0.26894142 0.88079708]
 [0.5621765  0.38793516]]
Values match: True
Op tracked: 'sigmoid'
Parent ID matches: True
Gradient: [[[0.23500371 0.25      ]
  [0.19661193 0.10499359]]]
Expected gradient: [[0.23500371 0.25      ]
 [0.19661193 0.10499359]
 [0.24613408 0.23744147]]

--- Tanh Test ---
Output: [[ 0.46211716  0.        ]
 [-0.76159416  0.96402758]
 [ 0.24491866 -0.4268185 ]]
NumPy expected: [[ 0.46211716  0.        ]
 [-0.7615941

In [None]:
# def test_loss_functions():
#     """Test all loss functions with simple 2x3 tensors"""
    
#     # Create test data
#     y_true = TensorT([[0, 1, 0], [1, 0, 1]])
#     y_pred = TensorT([[0.1, 0.8, 0.1], [0.9, 0.2, 0.7]])
    
#     print("Testing all loss functions...")
    
#     # Test Cross Entropy Loss
#     try:
#         ce_loss = LossFunction.cross_entropy_loss(y_true, y_pred)
#         print(f"✓ Cross Entropy Loss: {ce_loss.data}")
#         grad_ce = ce_loss.backward_fn([[1.0]])
#         print(f"  Gradients computed: {type(grad_ce)}")
#     except Exception as e:
#         print(f"✗ Cross Entropy Loss failed: {e}")
    
#     # Test Mean Squared Error
#     try:
#         mse_loss = LossFunction.mean_squared_error(y_true, y_pred)
#         print(f"✓ Mean Squared Error: {mse_loss.data}")
#         grad_mse = mse_loss.backward_fn([[1.0]])
#         print(f"  Gradients computed: {type(grad_mse)}")
#     except Exception as e:
#         print(f"✗ Mean Squared Error failed: {e}")
    
#     # Test Categorical Cross Entropy
#     try:
#         cat_ce_loss = LossFunction.categorical_cross_entropy_loss(y_true, y_pred)
#         print(f"✓ Categorical Cross Entropy: {cat_ce_loss.data}")
#         grad_cat = cat_ce_loss.backward_fn([[1.0]])
#         print(f"  Gradients computed: {type(grad_cat)}")
#     except Exception as e:
#         print(f"✗ Categorical Cross Entropy failed: {e}")
    
#     # Test Hinge Loss
#     try:
#         hinge_loss = LossFunction.hinge_loss(y_true, y_pred)
#         print(f"✓ Hinge Loss: {hinge_loss.data}")
#         grad_hinge = hinge_loss.backward_fn([[1.0]])
#         print(f"  Gradients computed: {type(grad_hinge)}")
#     except Exception as e:
#         print(f"✗ Hinge Loss failed: {e}")
    
#     # Test Binary Cross Entropy
#     try:
#         bce_loss = LossFunction.binary_cross_entropy_loss(y_true, y_pred)
#         print(f"✓ Binary Cross Entropy: {bce_loss.data}")
#         grad_bce = bce_loss.backward_fn([[1.0]])
#         print(f"  Gradients computed: {type(grad_bce)}")
#     except Exception as e:
#         print(f"✗ Binary Cross Entropy failed: {e}")
    
#     # Test Mean Absolute Error
#     try:
#         mae_loss = LossFunction.mean_absolute_error(y_true, y_pred)
#         print(f"✓ Mean Absolute Error: {mae_loss.data}")
#         grad_mae = mae_loss.backward_fn([[1.0]])
#         print(f"  Gradients computed: {type(grad_mae)}")
#     except Exception as e:
#         print(f"✗ Mean Absolute Error failed: {e}")
    
#     # Test get() method
#     try:
#         loss_fn, grad_fn = LossFunction.get('mean_squared_error')
#         print(f"✓ get() method works: {loss_fn.__name__}, {grad_fn}")
#     except Exception as e:
#         print(f"✗ get() method failed: {e}")
    
#     print("\nAll loss functions have backward_fn attribute:")
#     for name in ['cross_entropy_loss', 'mean_squared_error', 'categorical_cross_entropy_loss', 
#                 'hinge_loss', 'binary_cross_entropy_loss', 'mean_absolute_error']:
#         loss_result = getattr(LossFunction, name)(y_true, y_pred)
#         has_backward = hasattr(loss_result, 'backward_fn')
#         print(f"  {name}: {has_backward}")

# # Run the test
# test_loss_functions()


In [None]:
# Test sigmoid
z = TensorT([[1.0, -1.0], [2.0, -2.0]])
sig_out = ActivationFunction.sigmoid(z)
print("Sigmoid shape:", sig_out.shape)  # Should be (2, 2)

# Test softmax  
z = TensorT([[1.0, 2.0], [3.0, 4.0]])
soft_out = ActivationFunction.softmax(z)
print("Softmax shape:", soft_out.shape)  # Should be (2, 2)
# Column sums should be approximately 1
col_sums = [sum(col) for col in zip(*soft_out.data)]
print("Column sums:", col_sums)  # Should be close to [1.0, 1.0]

# Test ReLU
z = TensorT([[-1.0, 2.0], [0.0, -3.0]])
relu_out = ActivationFunction.relu(z)
print("ReLU output:", relu_out.data)  # Should be [[0.0, 2.0], [0.0, 0.0]]


Sigmoid shape: (2, 2)
Softmax shape: (2, 2)
Column sums: [0.9999999999999999, 0.9999999999999999]
ReLU output: [[0, 2.0], [0, 0]]


In [None]:
# Test 1: Check types after initialization
mlp = MLP(input_size=3, hidden_layers=[4, 2], output_size=1)
print(f"Weight 1 type: {type(mlp.weights[0])}")  # Should be <class 'TensorT'>
print(f"Bias 1 type: {type(mlp.biases)}")    # Should be <class 'TensorT'>

# Test 2: Check shapes
print(f"Weight 1 shape: {mlp.weights.shape}")  # Should be (4, 3)
print(f"Bias 1 shape: {mlp.biases.shape}")    # Should be (4, 1)

# Test 3: Check bias values are zero
print(f"Bias 1 data: {mlp.biases.data}")      # Should be [[0.0], [0.0], [0.0], [0.0]]

# Test 4: Verify all layers
for i, (w, b) in enumerate(zip(mlp.weights, mlp.biases)):
    print(f"Layer {i+1}: W{w.shape}, b{b.shape}")


NameError: name 'MLP' is not defined