<a href="https://colab.research.google.com/github/rahul0772/python-ml-ai-relearning/blob/main/AI%20and%20ML%20with%20PyTorch/day10_AI_ML_withPyTorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
print(torch.__version__)

2.9.0+cpu


In [10]:
# ==========================================
# MACHINE LEARNING BASICS ‚Äî SUMMARY NOTES
# ==========================================

# ------------------------------------------
# WHAT MACHINE LEARNING IS
# ------------------------------------------

# Machine Learning means:
# - We do NOT write rules by hand
# - We give the computer examples
# - The computer learns the pattern by itself

# ------------------------------------------
# DATA AND LABELS
# ------------------------------------------

# Data  = inputs (x values)
# Labels = correct answers (y values)

# Example:
# x ‚Üí input number
# y ‚Üí output number

# The goal of ML:
# Learn the relationship between x and y

# ------------------------------------------
# NEURAL NETWORKS
# ------------------------------------------

# A neural network is made of neurons.
# In our example, we will useg ONLY ONE neuron.

# That neuron learns this formula:
#     y = w*x + b
#
# w = weight  (how strongly x affects y)
# b = bias    (starting offset)

# ------------------------------------------
# TRAINING THE MODEL
# ------------------------------------------

# Training means:
# 1. Model makes a guess
# 2. We compare guess with correct answer
# 3. We calculate how wrong it is (loss)
# 4. The model updates itself
# 5. Repeat many times

# This is how the model "learns"

# ------------------------------------------
# LOSS FUNCTION
# ------------------------------------------

# Loss is a number that tells:
# "How wrong is the model?"

# Lower loss = better model
# Zero loss  = perfect model (rare)

# ------------------------------------------
# OPTIMIZER
# ------------------------------------------

# The optimizer:
# - Uses the loss
# - Adjusts weights and bias
# - Tries to reduce error each time

# ------------------------------------------
# PREDICTION
# ------------------------------------------

# After training:
# - We give a new input (x)
# - The model gives an output (y)
# - This is called a prediction

# Prediction = best guess
# Not exact, but very close

# ------------------------------------------
# WHAT WE PROVED
# ------------------------------------------

# We showed that:
# - A neural network can learn patterns
# - Even simple math relationships
# - Using the same process used in real ML systems

# ------------------------------------------
# BIG IDEA
# ------------------------------------------

# This simple example is NOT useless.
# It uses the SAME structure as:
# - Image recognition
# - Speech recognition
# - Self-driving cars
# - Medical AI

# Only difference:
# - More data
# - More neurons
# - More layers

In [6]:
# =========================
# SIMPLE PYTORCH SETUP NOTE
# =========================

'''# 1Ô∏è‚É£ Import required libraries
import torch                  # Main PyTorch library
import torch.nn as nn          # Tools to build neural networks
import torch.optim as optim    # Tools to train neural networks
import numpy as np             # Numerical library (not used yet, but often needed)

# -------------------------
# 2Ô∏è‚É£ Create the model
# -------------------------

# This model has:
# - 1 input value (x)
# - 1 output value (y)
# - One neuron
# It learns the formula: y = w*x + b
model = nn.Sequential(
    nn.Linear(1, 1)
)

# -------------------------
# 3Ô∏è‚É£ Define the loss function
# -------------------------

# Loss tells us how wrong the model is
# MSE = Mean Squared Error
# Bigger error ‚Üí bigger loss
criterion = nn.MSELoss()

# -------------------------
# 4Ô∏è‚É£ Define the optimizer
# -------------------------

# Optimizer updates the model to reduce loss
# SGD = Stochastic Gradient Descent
# lr = learning rate (how big each update step is)
optimizer = optim.SGD(model.parameters(), lr=0.01)

# -------------------------
# 5Ô∏è‚É£ Training data (inputs)
# -------------------------

# xs are the input x values
# Shape: 6 rows, 1 column
xs = torch.tensor(
    [[-1.0],
     [ 0.0],
     [ 1.0],
     [ 2.0],
     [ 3.0],
     [ 4.0]],
    dtype=torch.float32
)

# -------------------------
# 6Ô∏è‚É£ Training data (outputs)
# -------------------------

# ys are the correct answers
# This follows the rule: y = 2x - 1
ys = torch.tensor(
    [[-3.0],
     [-1.0],
     [ 1.0],
     [ 3.0],
     [ 5.0],
     [ 7.0]],
    dtype=torch.float32
)

# -------------------------
# END OF SETUP
# -------------------------

# At this point:
# - Model exists
# - Data exists
# - Loss and optimizer exist
# Next step would be TRAINING'''


'# 1Ô∏è‚É£ Import required libraries\nimport torch                  # Main PyTorch library\nimport torch.nn as nn          # Tools to build neural networks\nimport torch.optim as optim    # Tools to train neural networks\nimport numpy as np             # Numerical library (not used yet, but often needed)\n\n# -------------------------\n# 2Ô∏è‚É£ Create the model\n# -------------------------\n\n# This model has:\n# - 1 input value (x)\n# - 1 output value (y)\n# - One neuron\n# It learns the formula: y = w*x + b\nmodel = nn.Sequential(\n    nn.Linear(1, 1)\n)\n\n# -------------------------\n# 3Ô∏è‚É£ Define the loss function\n# -------------------------\n\n# Loss tells us how wrong the model is\n# MSE = Mean Squared Error\n# Bigger error ‚Üí bigger loss\ncriterion = nn.MSELoss()\n\n# -------------------------\n# 4Ô∏è‚É£ Define the optimizer\n# -------------------------\n\n# Optimizer updates the model to reduce loss\n# SGD = Stochastic Gradient Descent\n# lr = learning rate (how big each

In [3]:
# =========================
# TRAINING LOOP
# =========================
'''
# We repeat the learning process many times so the model can improve
for _ in range(500):   # train 500 times

    # 1. Clear old gradients
    optimizer.zero_grad()
    # Gradients are like "directions for improvement".
    # If we don‚Äôt clear them, old directions will mix with new ones and confuse learning.

    # 2. Make predictions
    outputs = model(xs)
    # The model takes input data (xs)
    # and tries to guess the output (y values).
    # At first, these guesses are usually very bad.

    # 3. Measure how wrong the predictions are
    loss = criterion(outputs, ys)
    # We compare:
    #   - outputs ‚Üí model's guesses
    #   - ys ‚Üí correct answers
    # The loss is a number that says:
    # "How bad was the guess?"
    # Bigger loss = worse guess

    # 4. Backpropagation (learn from mistakes)
    loss.backward()
    # This is where learning really happens.
    # The computer figures out:
    # "Which direction should I change my weights
    #  to reduce the loss next time?"

    # 5. Update the model weights
    optimizer.step()
    # The optimizer uses the gradients
    # and slightly changes the model‚Äôs weights and bias
    # so the next guess is better than the last one
'''

'\n# We repeat the learning process many times so the model can improve\nfor _ in range(500):   # train 500 times\n\n    # 1. Clear old gradients\n    optimizer.zero_grad()\n    # Why?\n    # Gradients are like "directions for improvement".\n    # If we don‚Äôt clear them, old directions will mix with new ones and confuse learning.\n\n    # 2. Make predictions\n    outputs = model(xs)\n    # The model takes input data (xs)\n    # and tries to guess the output (y values).\n    # At first, these guesses are usually very bad.\n\n    # 3. Measure how wrong the predictions are\n    loss = criterion(outputs, ys)\n    # We compare:\n    #   - outputs ‚Üí model\'s guesses\n    #   - ys ‚Üí correct answers\n    # The loss is a number that says:\n    # "How bad was the guess?"\n    # Bigger loss = worse guess\n\n    # 4. Backpropagation (learn from mistakes)\n    loss.backward()\n    # This is where learning really happens.\n    # The computer figures out:\n    # "Which direction should I change

In [5]:
# ============================================
# USING A TRAINED MODEL TO MAKE A PREDICTION
# ============================================

"""# At this point:
# - The model has already been trained
# - It has learned a relationship between x and y
# - Now we want to ASK the model a question

# Question:
# "If x = 10, what should y be?"

# ------------------------------------------------
# IMPORTANT:
# We are NOT training here.
# We are ONLY asking for an answer.
# ------------------------------------------------

with torch.no_grad():
    # torch.no_grad() tells PyTorch:
    # "Do not calculate gradients"
    # "Do not update weights"
    # "Just give me the output"

    # --------------------------------------------
    # Create the input value
    # --------------------------------------------
    input_x = torch.tensor([[10.0]], dtype=torch.float32)

    # Why tensor?
    # PyTorch models only understand tensors,
    # not normal Python numbers.

    # Why [[10.0]] ?
    # - One row
    # - One value
    # - Matches the model‚Äôs expected input shape

    # --------------------------------------------
    # Ask the model to predict
    # --------------------------------------------
    prediction = model(input_x)

    # What happens inside the model?
    # The model uses this formula:
    #     y = w * x + b
    #
    # w = weight (learned during training)
    # b = bias   (learned during training)
    #
    # The model plugs in x = 10
    # and calculates y

    # --------------------------------------------
    # Show the result
    # --------------------------------------------
    print(prediction)"""

'# At this point:\n# - The model has already been trained\n# - It has learned a relationship between x and y\n# - Now we want to ASK the model a question\n\n# Question:\n# "If x = 10, what should y be?"\n\n# ------------------------------------------------\n# IMPORTANT:\n# We are NOT training here.\n# We are ONLY asking for an answer.\n# ------------------------------------------------\n\nwith torch.no_grad():\n    # torch.no_grad() tells PyTorch:\n    # "Do not calculate gradients"\n    # "Do not update weights"\n    # "Just give me the output"\n\n    # --------------------------------------------\n    # Create the input value\n    # --------------------------------------------\n    input_x = torch.tensor([[10.0]], dtype=torch.float32)\n\n    # Why tensor?\n    # PyTorch models only understand tensors,\n    # not normal Python numbers.\n\n    # Why [[10.0]] ?\n    # - One row\n    # - One value\n    # - Matches the model‚Äôs expected input shape\n\n    # ------------------------------

In [8]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
# Model
model = nn.Sequential(nn.Linear(1, 1))
# Loss and optimizer
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)
# Data
xs = torch.tensor([[-1.0], [0.0], [1.0], [2.0], [3.0], [4.0]],
 dtype=torch.float32)
ys = torch.tensor([[-3.0], [-1.0], [1.0], [3.0], [5.0], [7.0]],
 dtype=torch.float32)
# Train
for _ in range(500):
 optimizer.zero_grad()
 outputs = model(xs)
 loss = criterion(outputs, ys)
 loss.backward()
 optimizer.step()
# Predict
with torch.no_grad():
 print(model(torch.tensor([[10.0]], dtype=torch.float32)))

tensor([[18.9930]])


In [9]:
# ==========================================
# SEEING WHAT THE NETWORK ACTUALLY LEARNED
# ==========================================

# Our model has ONLY ONE neuron.
# A single neuron learns this formula:
#     y = w*x + b
#
# where:
# w = weight
# b = bias

# If the true rule is:
#     y = 2x - 1
# then ideally:
#     weight = 2
#     bias   = -1

# ------------------------------------------
# 1Ô∏è‚É£ Get the layer from the model
# ------------------------------------------

# Our model was created like this:
# model = nn.Sequential(nn.Linear(1, 1))
#
# That means:
# - model[0] is the FIRST (and only) layer
layer = model[0]

# ------------------------------------------
# 2Ô∏è‚É£ Get the learned weight
# ------------------------------------------

# layer.weight is a tensor
# .data ‚Üí get raw values
# .numpy() ‚Üí convert to NumPy array for easy viewing
weights = layer.weight.data.numpy()

# ------------------------------------------
# 3Ô∏è‚É£ Get the learned bias
# ------------------------------------------

bias = layer.bias.data.numpy()

# ------------------------------------------
# 4Ô∏è‚É£ Print what the model learned
# ------------------------------------------

print("Weights:", weights)
print("Bias:", bias)

# ------------------------------------------
# WHAT THIS MEANS
# ------------------------------------------

# If output is something like:
# Weights: [[1.998695]]
# Bias:    [-0.9959542]
#
# Then the model learned:
#     y = 1.998695*x - 0.9959542
#
# This is VERY close to:
#     y = 2x - 1

Weights: [[1.9989815]]
Bias: [-0.996842]


##
INTRODUCTION TO COMPUTER VISION
==========================================




In [12]:

# 1Ô∏è‚É£ What is Computer Vision?
# - Teaching computers to "see" and recognize objects in images
# - Goes beyond just storing pixels
# - Example: Shoes look different but we still know they are shoes

# 2Ô∏è‚É£ Why rules-based programming fails
# - Hard to describe all variations with fixed rules
# - Humans learn by seeing many examples
# - Computers can learn the same way using ML

# 3Ô∏è‚É£ Fashion MNIST Dataset
# - Drop-in replacement for MNIST digits dataset
# - Contains 70,000 grayscale images (28x28 pixels)
# - 10 clothing types: shirts, trousers, dresses, shoes, etc.
# - Each image: 28x28 pixels, values 0-255
# - Monochrome = simpler to manage than color images

# 4Ô∏è‚É£ Key Idea
# - Each pixel is a number
# - Models use pixel values as input to recognize clothing
# - ML allows computers to learn patterns from examples instead of rules

# 5Ô∏è‚É£ Summary
# - Computer vision = recognizing items in images
# - Fashion MNIST = simple benchmark dataset for learning
# - Pixels = input, labels = clothing type, model learns the mapping

In [13]:
# ==========================================
# NEURONS FOR COMPUTER VISION
# ==========================================

# 1Ô∏è‚É£ Recap from Chapter 1
# - Single neuron learned y = 2x - 1
# - Input = x, Output = y
# - Very simple, only 1 input and 1 output

# 2Ô∏è‚É£ Images as inputs
# - Each image = 28x28 pixels ‚Üí 784 values
# - Each pixel value = 0 to 255
# - These pixel values are our x values
# - Labels (0 to 9) are our y values (10 classes of clothing)

# 3Ô∏è‚É£ Why one neuron is not enough
# - y = mx + c works for a line ‚Üí not enough for 784 pixels
# - Need multiple neurons to handle complex patterns
# - Each neuron learns its own parameters (weight w and bias b)

# 4Ô∏è‚É£ How multiple neurons work together
# - Each input pixel goes into each neuron
# - Each neuron produces an output (a probability for a class)
# - Output layer has 10 neurons ‚Üí one for each label (0 to 9)
# - Neuron with highest value = predicted class

# 5Ô∏è‚É£ Training process
# - Start with random weights ‚Üí random guesses (~10% correct for 10 classes)
# - Loss function measures error
# - Optimizer updates weights and biases
# - Repeat over many epochs
# - Over time, the network learns which pixel patterns correspond to which label

# 6Ô∏è‚É£ Goal
# - The network learns to "see" differences between clothing items
# - Each neuron contributes to understanding patterns in the image
# - Eventually, it can classify shirts, shoes, dresses, etc., correctly

# üß† One-line summary:
# Multiple neurons process all pixels in an image, update weights using loss, and gradually learn to classify images.

In [14]:
# ==========================================
# DESIGNING A NEURAL NETWORK FOR COMPUTER VISION
# ==========================================

# 1Ô∏è‚É£ Overview
# We want to classify images from Fashion MNIST:
# - Input: 28x28 grayscale images ‚Üí 784 pixel values
# - Output: 10 classes of clothing (0-9)
# - Use a neural network with multiple layers to learn patterns

# 2Ô∏è‚É£ Defining the network in PyTorch
import torch
import torch.nn as nn

# Example network:
# This line builds a neural network with multiple layers
# nn.Sequential means: "do these layers in order, one after another"
self.linear_relu_stack = nn.Sequential(
    nn.Linear(28*28, 128),   # Hidden layer: 128 neurons
    nn.ReLU(),               # Activation function
    nn.Linear(128, 10),      # Output layer: 10 neurons (one per class)
    nn.LogSoftmax(dim=1)     # Converts outputs to log-probabilities
)

# ------------------------------------------
# 3Ô∏è‚É£ Layer 1: Linear(28*28, 128)
# ------------------------------------------
# - Input: 28x28 pixels flattened to 784 values
# - Output: 128 neurons in the hidden layer
# - Each neuron learns weights (w) and bias (b)
# - Why 128 neurons? Arbitrary, chosen to balance:
#     ‚Ä¢ Learning capacity
#     ‚Ä¢ Training speed
#     ‚Ä¢ Avoiding overfitting (too many neurons can memorize training data)
#     ‚Ä¢ Avoiding underfitting (too few neurons can't capture patterns)
# - "Hidden layer" = not directly exposed to input or output

# ------------------------------------------
# 4Ô∏è‚É£ Activation function: ReLU
# ------------------------------------------
# - ReLU = Rectified Linear Unit
# - Formula: f(x) = max(0, x)
# - Converts negative numbers to 0
# - Purpose:
#     ‚Ä¢ Remove negative values
#     ‚Ä¢ Introduce non-linearity ‚Üí allows network to learn complex patterns
# - Without activation functions, multiple layers would collapse into a single linear layer

# ------------------------------------------
# 5Ô∏è‚É£ Output/second layer: Linear(128, 10)
# ------------------------------------------
# - Input: 128 neurons from previous layer
# - Output: 10 neurons (one per clothing class)
# - Each neuron outputs a value representing the likelihood of the image belonging to that class
# - Each neuron will represent "probability" for a clothing class
# - During training:
#     ‚Ä¢ Provide correct label (ground truth) for each image
#     ‚Ä¢ Network learns which neurons should output higher probability for that label

# ------------------------------------------
# 6Ô∏è‚É£ LogSoftmax Activation
# ------------------------------------------
# - Converts raw outputs (logits) to log-probabilities
# - Ensures that the output values sum to 1 in probability space
# - Makes it easy to pick the class with the highest probability
# - Example: if output neuron 3 > others ‚Üí predicted class = 3
# - Converts the 10 output numbers into log-probabilities
# - Makes it easy to see which class is most likely
# - dim=1 means we do it across the columns (the 10 neurons)

# ------------------------------------------
# 7Ô∏è‚É£ Flattening the image
# ------------------------------------------
# - Images are 2D (28x28)
# - Linear layers expect 1D input ‚Üí flatten to 784
# - Each pixel becomes an input value to the hidden layer neurons

# ------------------------------------------
# 8Ô∏è‚É£ Hyperparameters
# ------------------------------------------
# - Number of neurons, learning rate, number of layers, etc.
# - These are NOT learned by the network ‚Üí set by developer
# - Choosing them requires experimentation (hyperparameter tuning)
# - Trade-offs:
#     ‚Ä¢ More neurons ‚Üí more capacity but slower and risk overfitting
#     ‚Ä¢ Fewer neurons ‚Üí faster but may underfit

# ------------------------------------------
# 9Ô∏è‚É£ How training works
# ------------------------------------------
# 1. Feed flattened image to hidden layer ‚Üí compute activations
# 2. Hidden layer outputs go to output layer ‚Üí 10 values
# 3. LogSoftmax converts outputs to probabilities
# 4. Compute loss against ground truth label
# 5. Backpropagation updates weights and biases (w and b) in all neurons
# 6. Repeat for many epochs ‚Üí network gradually learns patterns
# - Goal: network predicts correct class for each input image

# üß† Summary:
# - Input layer: 784 pixels ‚Üí hidden layer: 128 neurons
# - Hidden layer uses ReLU for non-linearity
# - Output layer: 10 neurons ‚Üí log probabilities
# - Training updates weights and biases so network learns to classify images


NameError: name 'self' is not defined

In [15]:
# ==========================================
# DETAILED & EASY EXPLANATION: NEURAL NETWORK FOR COMPUTER VISION
# ==========================================

# 1Ô∏è‚É£ What is our goal?
# We want a computer to "see" images and classify them as one of 10 clothing types.
# Example: T-shirt, dress, sneaker, etc.
# - Input: 28x28 grayscale image ‚Üí 784 pixel values (numbers from 0 to 255)
# - Output: 10 classes ‚Üí the network predicts which clothing type it is

# 2Ô∏è‚É£ How we build the network in PyTorch
# nn.Sequential connects layers one after another (like a conveyor belt)
self.linear_relu_stack = nn.Sequential(
    nn.Linear(28*28, 128),  # 1st layer: hidden layer with 128 neurons
    nn.ReLU(),               # Activation function: adds non-linearity
    nn.Linear(128, 10),      # Output layer: 10 neurons (one for each clothing class)
    nn.LogSoftmax(dim=1)     # Converts outputs to probabilities
)

# ------------------------------------------
# 3Ô∏è‚É£ Step by step: what happens inside the network
# ------------------------------------------

# Step 1: Flatten the image
# - The image is 28x28 pixels ‚Üí 2D array
# - Neural networks expect 1D arrays ‚Üí flatten to 784 numbers
# - Each pixel value becomes an input to the first layer

# Step 2: Hidden layer (128 neurons)
# - Each neuron has weights (w) and a bias (b)
# - Each neuron multiplies inputs by weights, adds bias ‚Üí produces an output
# - ReLU activation: replaces negative outputs with 0
#   Why? Helps the network learn complex patterns, not just straight lines
# - Think of this layer as "learning small patterns" in the image

# Step 3: Output layer (10 neurons)
# - Each neuron corresponds to a clothing class
# - Neurons take inputs from hidden layer ‚Üí combine using weights & bias
# - Raw output numbers (logits) are generated for each class

# Step 4: LogSoftmax
# - Converts raw outputs into probabilities (values between 0 and 1)
# - Sum of probabilities = 1
# - Highest probability = predicted class
# Example: if output neuron for "sneaker" = 0.8 ‚Üí network predicts sneaker

# ------------------------------------------
# 4Ô∏è‚É£ Hyperparameters (set by us, not learned)
# - Number of neurons: 128
# - Number of layers: 2 (hidden + output)
# - Learning rate, epochs, batch size, etc.
# - Trade-offs:
#   ‚Ä¢ More neurons ‚Üí network can learn more, but slower & may overfit
#   ‚Ä¢ Fewer neurons ‚Üí faster, but may not learn enough (underfit)

# ------------------------------------------
# 5Ô∏è‚É£ Training process
# This is how the network learns to classify images:

# 1. Feed an image (flattened) to the network
# 2. Hidden layer calculates outputs ‚Üí applies ReLU
# 3. Output layer produces 10 raw values (one per class)
# 4. LogSoftmax converts outputs to probabilities
# 5. Compare predicted probabilities with correct label ‚Üí compute loss
# 6. Backpropagation: adjust all weights and biases to reduce loss
# 7. Repeat many times (epochs) ‚Üí network improves

# ------------------------------------------
# 6Ô∏è‚É£ Analogy for understanding
# - Input pixels ‚Üí conveyor belt ‚Üí hidden neurons detect patterns ‚Üí output neurons "vote" for clothing type
# - Training = teaching the neurons to vote correctly over many examples
# - ReLU = makes neurons more flexible (can ignore negative patterns)
# - LogSoftmax = tells which vote is strongest (prediction)

# ------------------------------------------
# 7Ô∏è‚É£ Key points to remember
# - Input layer: 784 pixels
# - Hidden layer: 128 neurons with ReLU
# - Output layer: 10 neurons ‚Üí probabilities
# - Training updates weights and biases so network learns patterns
# - After training, network can classify new images it hasn't seen before

# üß† Summary:
# Pixels ‚Üí Hidden neurons (ReLU) ‚Üí Output neurons ‚Üí Probabilities ‚Üí Predicted class

NameError: name 'self' is not defined