<a href="https://colab.research.google.com/github/leodavidfan/AI_Books/blob/main/Tensor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
import torch
import tensorflow as tf

# PyTorch scalar operations
scalar_pt = torch.tensor (5.0)
log_scalar = torch.log( scalar_pt )
exp_scalar = torch.exp( scalar_pt )

# TensorFlow scalar operations
scalar_tf = tf. constant (5.0)
log_scalar = tf. math .log( scalar_tf )
exp_scalar = tf. math .exp( scalar_tf )


In [8]:
# (Vector Operations). Implementation of basic vector operations:

# Create vectors
u = torch . tensor ([1. , 2., 3.])
v = torch . tensor ([4. , 5., 6.])

# Basic operations
sum_vec = u + v
scaled = 2 * u
dot_product = torch . dot(u, v)
norm = torch . norm (u)

# Vector transformations
normalized = u / norm
projection = ( torch .dot(u, v) / torch . dot(u, u)) * u

In [9]:
# (Matrix Operations). Implementation of matrix operations:

# Create matrices
A = torch . tensor ([[1. , 2.] , [3. , 4.]])
B = torch . tensor ([[5. , 6.] , [7. , 8.]])

# Basic operations
sum_matrix = A + B
product = torch . matmul (A, B)
transpose = A.t()
determinant = torch . det(A)
inverse = torch . inverse (A)
trace = torch . trace (A)

# Eigendecomposition
eigenvalues , eigenvectors = torch . linalg .eig(A)

In [12]:
# Create a rank -4 tensor
T = torch.randn (2, 3, 3, 2)

# Contract over middle indices
C = torch.einsum ('ijjk ->ik', T)

In [18]:
# Create a tensor
T = torch . randn (4, 3)

# Compute SVD
U, S, V = torch.linalg.svd (T)

# Reconstruct original tensor
T_reconstructed = U @ torch.diag(S) @ V

RuntimeError: mat1 and mat2 shapes cannot be multiplied (4x4 and 3x3)

In [21]:
# Memory - efficient tensor creation
efficient = torch . randn (1000 , 1000 ,dtype = torch . float32 ) # 4MB

 # Memory - inefficient tensor
inefficient = torch . randn (1000 , 1000 ,dtype = torch . float64 ) # 8MB

 # Use in - place operations
efficient . add_ (1) # In - place addition

tensor([[ 0.4966,  0.5558,  0.5378,  ...,  1.0561, -0.2736,  1.2252],
        [-0.0530, -0.9015,  1.1406,  ...,  0.9508,  1.3669,  1.4165],
        [ 0.6605,  2.2796,  2.2498,  ..., -0.6056,  0.6745, -0.4631],
        ...,
        [ 0.7781,  0.1003,  1.8459,  ...,  1.0867,  0.8836,  1.3513],
        [-0.6450,  0.9343,  0.6779,  ...,  0.2078,  0.3838, -0.1355],
        [ 1.0522,  1.8059,  2.0398,  ...,  0.6334,  0.3335,  2.3760]])

In [28]:
# Performance Comparison

import time
def slow_operation ( tensor ):
  result = torch . zeros_like ( tensor )
  for i in range ( tensor . shape [0]) :
    for j in range ( tensor . shape [1]) :
      result [i,j] = torch .sin ( tensor [i,j])
  return result

def fast_operation ( tensor ):
  return torch .sin ( tensor )

# Compare performance
x = torch . randn (1000 , 1000)

start = time . time ()
slow_result = slow_operation (x)
print (f" Loop time : { time . time () - start :.2f}s")

start = time . time ()
fast_result = fast_operation (x)
print (f" Vectorized time : { time . time () - start :.2f}s")

 Loop time : 17.94s
 Vectorized time : 0.00s


In [31]:
# Efficient Implementation

import numpy as np
from scipy import linalg
def analyze_matrix (A):
  """
  Comprehensive analysis of a matrix using eigenvalue
  decomposition .
  Parameters :
  -----------
  A : ndarray
  Square matrix to analyze

  Returns :
  --------
  dict
  Dictionary containing eigenvalues , eigenvectors , condition
  number ,
  and stability analysis
  """
    # Compute eigendecomposition
  eigenvals , eigenvecs = linalg .eig(A)

  # Compute condition number
  cond_num = np. linalg . cond (A)

  # Analyze stability
  is_stable = np.all(np. real ( eigenvals ) < 0)

  # Verify diagonalization
  D = np. diag ( eigenvals )
  P = eigenvecs
  P_inv = np. linalg .inv(P)
  reconstruction_error = np. linalg . norm (A - P @ D @ P_inv )

  return {
  'eigenvalues': eigenvals ,
  'eigenvectors': eigenvecs ,
  'condition_number': cond_num ,
  'is_stable': is_stable ,
  'reconstruction_error': reconstruction_error
 }

In [38]:
import numpy as np
import pandas as pd
class MovieData :
  def __init__ (self , ratings_file , movies_file ):
    """ Initialize MovieData with rating and movie information .
    """
    self . ratings = pd. read_csv ( ratings_file )
    self . movies = pd. read_csv ( movies_file )
    self . _prepare_matrices ()

  def _prepare_matrices ( self ):
    """
    Convert ratings into a 2D NumPy array ( users x movies ).
    Missing values remain NaN.
    """
    self . ratings_matrix = self . ratings . pivot (
    index ='user_id',
    columns ='movie_id',
    values ='rating'
    ). values

  # Compute the mean rating per user ( ignoring NaN)
    self. user_means = np. nanmean ( self . ratings_matrix , axis =1, keepdims = True )

  # Create a centered rating matrix (user - means subtracted )
    self. centered_ratings = self . ratings_matrix - self .user_means

In [49]:
# SVD-based recommender systems is the Alternating Least Squares
# (ALS) method.

class SVDRecommender :
  def __init__ (self , n_factors =20 , regularization =0.1) :
    """
    n_factors : Number of latent factors (k in R ~ P x Q^T).
    regularization : L2 regularization strength ( lambda ).
    """
    self . n_factors = n_factors
    self .reg = regularization

  def fit(self , ratings_matrix , n_epochs =10) :
    """
    Train the model using Alternating Least Squares ( ALS).
    ratings_matrix : 2D NumPy array of shape ( n_users , n_items ),
    with NaN for missing ratings .
    n_epochs : Number of ALS iterations .
    """
    self . ratings = ratings_matrix
    self . n_users , self . n_items = ratings_matrix . shape

    # Initialize user_factors and item_factors randomly
    self . user_factors = np. random . normal (0, 0.1 , ( self . n_users , self . n_factors ))
    self . item_factors = np. random . normal (0, 0.1 , ( self . n_items , self . n_factors ))

    for epoch in range ( n_epochs ):
    # 1) Update all user factors
      for u in range ( self . n_users ):
        rated_items = ~np. isnan ( self . ratings [u])
      if not np.any (rated_items ):
        continue

    # Solve (Q^T Q + lambda *I) p_u = Q^T r_u
    A = ( self . item_factors [ rated_items ].T @ self . item_factors [ rated_items ] + self .reg * np.eye( self . n_factors ))
    b = self . item_factors [ rated_items ].T @ self . ratings [u, rated_items ]
    self . user_factors [u] = np. linalg . solve (A, b)

    # 2) Update all item factors
    for i in range ( self . n_items ):
      rated_users = ~np. isnan ( self . ratings [:, i])
      if not np.any ( rated_users ):
        continue

      A = ( self . user_factors [ rated_users ].T @ self . user_factors [ rated_users ] + self .reg * np.eye( self . n_factors ))
      b = self . user_factors [ rated_users ].T @ self . ratings [rated_users , i]
      self . item_factors [i] = np. linalg . solve (A, b)

      # Print progress every 2 epochs (as an example )
      if ( epoch +1) % 2 == 0:
        rmse_val = self . compute_error ()
        print (f" Epoch { epoch +1}/{ n_epochs }, RMSE = { rmse_val:.4 f}")

  def compute_error ( self ):
    """
    Compute RMSE on known (non -NaN) ratings .
    """
    predicted_matrix = self . user_factors @ self . item_factors .T
    mask = ~np. isnan ( self . ratings )
    mse = np. mean (( self . ratings [ mask ] - predicted_matrix [ mask ])** 2)
    return np. sqrt (mse )

  def predict_rating (self , user_id , item_id ):
    """
    Predict a single rating using user and item factor dot product .
    """
    return np.dot ( self . user_factors [ user_id ], self . item_factors [item_id ])

  def recommend_items (self , user_id , n_recommendations =5):
    """
      Recommend top N items for a given user ,
    ignoring items already rated by that user .
    """
    user_vector = self . user_factors [ user_id ]
    predictions = user_vector @ self . item_factors .T
    already_rated = ~np. isnan ( self . ratings [ user_id ])
    predictions [ already_rated ] = -np.inf # to exclude rated items
    top_items = np. argsort ( predictions ) [:: -1][: n_recommendations]
    return top_items

In [50]:
# compute both metrics, given a trained model and a test ratings matrix

def evaluate_model (model , test_ratings ):
  """
  Compute RMSE and MAE on a test set with known (non -NaN) ratings .
  """
  predicted = model . user_factors @ model . item_factors .T
  mask = ~np. isnan ( test_ratings )

  errors = test_ratings [ mask ] - predicted [ mask ]
  rmse = np. sqrt (np. mean ( errors **2) )
  mae = np. mean (np.abs( errors ))

  return rmse , mae

In [56]:
# Stochastic Gradient Descent (SGD) version that updates biases and factor vectors together

class BiasedSVD :
  def __init__ (self , n_factors =20 , reg =0.1 , lr =0.005) :
    self . n_factors = n_factors
    self .reg = reg
    self .lr = lr

  def fit(self , ratings_matrix , n_epochs =10) :
    self . ratings = ratings_matrix
    self . n_users , self . n_items = ratings_matrix . shape

    self . global_mean = np. nanmean ( self . ratings )
    self . user_bias = np. zeros ( self . n_users )
    self . item_bias = np. zeros ( self . n_items )
    self . user_factors = np. random . normal (0, 0.1 , ( self . n_users , self . n_factors ))
    self . item_factors = np. random . normal (0, 0.1 , ( self . n_items , self . n_factors ))

    for epoch in range ( n_epochs ):
      user_ids , item_ids = np. where (~ np. isnan ( self . ratings ))
      indices = np. random . permutation (len( user_ids ))

    for idx in indices :
      u = user_ids [idx]

      i = item_ids [idx]
      rating = self . ratings [u, i]

      # Current prediction
      pred = ( self . global_mean +
      self . user_bias [u] +
      self . item_bias [i] +
      np. dot ( self . user_factors [u], self .item_factors [i]))

      # Error
      e_ui = rating - pred

      # Update bias terms
      self . user_bias [u] += self .lr * ( e_ui - self .reg * self . user_bias [u])
      self . item_bias [i] += self .lr * ( e_ui - self .reg * self . item_bias [i])

      # Update latent factors
      u_factors_old = self . user_factors [u]. copy ()
      self . user_factors [u] += self .lr * (e_ui * self . item_factors [i] - self .reg * self .user_factors [u])
      self . item_factors [i] += self .lr * ( e_ui * u_factors_old - self .reg * self .item_factors [i] )

      # Monitor training progress ( RMSE )
      rmse_val = self . _compute_rmse ()
      print (f" Epoch { epoch +1}/{ n_epochs }, RMSE = { rmse_val :.4f}")

  def _compute_rmse ( self ):
    predictions = self . _full_prediction_matrix ()
    mask = ~np. isnan ( self . ratings )
    mse = np. mean (( self . ratings [ mask ] - predictions [ mask ]) ** 2)
    return np. sqrt (mse )

  def _full_prediction_matrix ( self ):
    bias_term = ( self . global_mean +
    self . user_bias [:, None ] +
    self . item_bias [None , :])
    factor_term = self . user_factors @ self . item_factors .T
    return bias_term + factor_term

  def predict (self , user_id , item_id ):
   return self . _full_prediction_matrix ()[ user_id , item_id ]

  def recommend_items (self , user_id , n_recommendations =5):
    preds = self . _full_prediction_matrix ()[ user_id ]
    rated_mask = ~np. isnan ( self . ratings [ user_id ])
    preds [ rated_mask ] = -np.inf
    return np. argsort ( preds ) [:: -1][: n_recommendations ]

In [57]:
if __name__ == " __main__ ":
  # Choose our SVD model
  model = BiasedSVD ( n_factors =20 , reg =0.1 , lr =0.005)
  # Fit on training set
  model .fit( train_ratings , n_epochs =10)
  # Evaluate on test set
  rmse , mae = evaluate_model (model , test_ratings )
  print (f" Final RMSE on test = { rmse :.4f}, MAE = { mae :.4f}")

  # Make top -5 recommendations for user 0
  user_id = 0
  recommendations = model . recommend_items ( user_id , n_recommendations =5)
  print ("Top 5 Recommendations for user 0:", recommendations )

• Backpropagation applies the chain rule to compute how changes in parameters influence the final loss.

• The loss function measures the discrepancy between predictions and true labels.

• The chain rule in calculus is the mathematical linchpin enabling efficient gradient computation.

• Gradient descent moves parameters in the opposite direction of the gradient to minimize 𝐿(𝜃).

• Choices like activation function, loss function, and learning rate can vastly impact training efficacy and speed.

In [58]:
# Python snippet demonstrating the forward and backward pass for a single-neuron model with a Sigmoid activation and MSE loss:

import numpy as np

# Example values
x, w, b, y = 2.0, 0.5, 0.1, 1.0 # input, weight, bias, target

# Forward pass
z = w * x + b # linear combination
a = 1 / (1 + np.exp(-z)) # sigmoid activation
loss = (y - a)**2 # MSE loss
print("Forward pass results:")
print(f"z = {z}, a = {a}, loss = {loss}")

# Backward pass
dL_da = 2 * (a - y) # derivative of MSE w.r.t. a
da_dz = a * (1 - a) # derivative of sigmoid w.r.t. z
dz_dw = x # derivative of z w.r.t. w
dz_db = 1 # derivative of z w.r.t. b

# Combine derivatives to get the gradient
dL_dw = dL_da * da_dz * dz_dw
dL_db = dL_da * da_dz * dz_db
print("\nBackward pass (gradients):")
print(f"dL/da = {dL_da}")
print(f"da/dz = {da_dz}")
print(f"dz/dw = {dz_dw}")
print(f"dz/db = {dz_db}")
print(f"dL/dw = {dL_dw}")
print(f"dL/db = {dL_db}")

# Update parameters
eta = 0.1
w_new = w - eta * dL_dw
b_new = b - eta * dL_db
print("\nUpdated parameters:")
print(f"w_new = {w_new}")
print(f"b_new = {b_new}")

Forward pass results:
z = 1.1, a = 0.7502601055951177, loss = 0.062370014857361766

Backward pass (gradients):
dL/da = -0.4994797888097646
da/dz = 0.18736987954752055
dz/dw = 2.0
dz/db = 1
dL/dw = -0.1871749357314132
dL/db = -0.0935874678657066

Updated parameters:
w_new = 0.5187174935731413
b_new = 0.10935874678657066


Resources:

• Michael A. Nielsen: Neural Networks and Deep Learning (free online resource).

• Ian Goodfellow, Yoshua Bengio, Aaron Courville: Deep Learning (MIT Press).

• Andrew Ng’s Coursera course on Machine Learning for foundational gradient-based method insights.