### DL LAB 2A

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical

In [2]:
# Load dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/letter-recognition/letter-recognition.data"
columns = ['letter', 'x-box', 'y-box', 'width', 'height', 'onpix', 'x-bar', 'y-bar', 'x2bar', 'y2bar', 'xybar',
           'x2ybr', 'xy2br', 'x-ege', 'xegvy', 'y-ege', 'yegvx']
data = pd.read_csv(url, names=columns)

In [3]:
# 2. Separate features and labels
X = data.drop('letter', axis=1).values
y = data['letter'].values

In [4]:
# 3. Encode labels (A-Z -> 0-25)
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
y_categorical = to_categorical(y_encoded)

In [5]:
# 4. Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y_categorical, test_size=0.2, random_state=42, stratify=y_categorical)

In [6]:
# 5. Feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [7]:
# 6. Build the DNN model
model = Sequential([
    Dense(128, activation='relu', input_shape=(16,)),
    Dense(64, activation='relu'),
    Dense(26, activation='softmax')  # 26 letters A-Z
])

model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [8]:
# 7. Train the model
history = model.fit(X_train, y_train, epochs=100, batch_size=32, validation_split=0.1)

Epoch 1/100
[1m450/450[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.4298 - loss: 2.1062 - val_accuracy: 0.7644 - val_loss: 0.8268
Epoch 2/100
[1m450/450[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7821 - loss: 0.7540 - val_accuracy: 0.8294 - val_loss: 0.5878
Epoch 3/100
[1m450/450[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8427 - loss: 0.5478 - val_accuracy: 0.8525 - val_loss: 0.4802
Epoch 4/100
[1m450/450[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.8655 - loss: 0.4526 - val_accuracy: 0.8813 - val_loss: 0.3901
Epoch 5/100
[1m450/450[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.8961 - loss: 0.3724 - val_accuracy: 0.8888 - val_loss: 0.3520
Epoch 6/100
[1m450/450[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9099 - loss: 0.3052 - val_accuracy: 0.9137 - val_loss: 0.2905
Epoch 7/100
[1m450/45

In [9]:
# 8. Evaluate the model
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"Test Accuracy: {test_accuracy:.4f}")

Test Accuracy: 0.9657


In [10]:
model.save("DNN.h5")



In [11]:
# Evaluate model
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test Accuracy: {accuracy:.4f}')

[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9639 - loss: 0.1981
Test Accuracy: 0.9657


In [12]:
# 9. Make predictions (optional)
y_pred = model.predict(X_test)
predicted_labels = label_encoder.inverse_transform(np.argmax(y_pred, axis=1))

[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step


In [14]:
import random

def random_sample_predict(model, scaler, label_encoder, X_test, y_test):
    # Pick a random index
    idx = random.randint(0, len(X_test) - 1)

    # Select random sample
    sample = X_test[idx].reshape(1, -1)
    true_label = np.argmax(y_test[idx])
    true_letter = label_encoder.inverse_transform([true_label])[0]

    # Predict
    prediction = model.predict(sample)
    predicted_class = np.argmax(prediction, axis=1)
    predicted_letter = label_encoder.inverse_transform(predicted_class)[0]

    print(f"\n--- Random Sample Test ---")
    print(f"True Letter: {true_letter}")
    print(f"Predicted Letter: {predicted_letter}")

# Call this function after model training
random_sample_predict(model, scaler, label_encoder, X_test, y_test)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step

--- Random Sample Test ---
True Letter: E
Predicted Letter: E


### DL LAB 2B

In [15]:
# 1. Import Libraries
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np

# 2. Create a small custom dataset (manually for simplicity)
texts = [
    "The movie was fantastic and thrilling",
    "I hated the movie, it was boring and bad",
    "An excellent movie with brilliant performances",
    "The film was dull and too long",
    "Loved the story and the acting was amazing",
    "Terrible movie, complete waste of time",
    "What a masterpiece, loved every moment",
    "Worst movie ever, so disappointed",
    "Absolutely stunning, a wonderful experience",
    "I regret watching this movie, very bad"
]

labels = [1, 0, 1, 0, 1, 0, 1, 0, 1, 0]  # 1 = Positive, 0 = Negative

# 3. Tokenize the texts
max_words = 1000
max_len = 20

tokenizer = Tokenizer(num_words=max_words, oov_token="<OOV>")
tokenizer.fit_on_texts(texts)

sequences = tokenizer.texts_to_sequences(texts)
padded_sequences = pad_sequences(sequences, maxlen=max_len, padding='post')

# 4. Build the Model
model = keras.Sequential([
    layers.Embedding(input_dim=max_words, output_dim=64, input_length=max_len),
    layers.Bidirectional(layers.LSTM(32)),
    layers.Dense(32, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

# 5. Compile Model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# 6. Train the Model
model.fit(padded_sequences, np.array(labels), epochs=20, batch_size=2, verbose=2)

# 7. Real-time Prediction Function
def predict_sentiment(review):
    seq = tokenizer.texts_to_sequences([review])
    padded = pad_sequences(seq, maxlen=max_len, padding='post')
    pred = model.predict(padded, verbose=0)[0][0]
    sentiment = "Positive" if pred >= 0.5 else "Negative"
    print(f"\nReview Sentiment: {sentiment} (Score: {pred:.4f})")

# 8. Real-time Testing
sample_review1 = "The movie was fantastic! I really loved the performances."
predict_sentiment(sample_review1)

sample_review2 = "The film was boring and too long. Not good at all."
predict_sentiment(sample_review2)

sample_review3 = "I absolutely hated this movie. Worst experience ever."
predict_sentiment(sample_review3)

sample_review4 = "An excellent masterpiece. Great story and acting."
predict_sentiment(sample_review4)

Epoch 1/20




5/5 - 4s - 891ms/step - accuracy: 0.3000 - loss: 0.6962
Epoch 2/20
5/5 - 1s - 115ms/step - accuracy: 0.6000 - loss: 0.6911
Epoch 3/20
5/5 - 0s - 11ms/step - accuracy: 0.8000 - loss: 0.6861
Epoch 4/20
5/5 - 0s - 12ms/step - accuracy: 0.9000 - loss: 0.6808
Epoch 5/20
5/5 - 0s - 11ms/step - accuracy: 1.0000 - loss: 0.6741
Epoch 6/20
5/5 - 0s - 12ms/step - accuracy: 1.0000 - loss: 0.6637
Epoch 7/20
5/5 - 0s - 12ms/step - accuracy: 1.0000 - loss: 0.6487
Epoch 8/20
5/5 - 0s - 28ms/step - accuracy: 1.0000 - loss: 0.6297
Epoch 9/20
5/5 - 0s - 12ms/step - accuracy: 1.0000 - loss: 0.6073
Epoch 10/20
5/5 - 0s - 12ms/step - accuracy: 1.0000 - loss: 0.5529
Epoch 11/20
5/5 - 0s - 11ms/step - accuracy: 1.0000 - loss: 0.4892
Epoch 12/20
5/5 - 0s - 12ms/step - accuracy: 1.0000 - loss: 0.4003
Epoch 13/20
5/5 - 0s - 11ms/step - accuracy: 1.0000 - loss: 0.2851
Epoch 14/20
5/5 - 0s - 11ms/step - accuracy: 1.0000 - loss: 0.1720
Epoch 15/20
5/5 - 0s - 11ms/step - accuracy: 1.0000 - loss: 0.1115
Epoch 16/20
5

In [16]:
# 1. Import libraries
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np

# 2. Load the IMDB dataset (with raw text)
imdb = keras.datasets.imdb

# Set vocabulary size
vocab_size = 10000

# Load dataset (already preprocessed as integers)
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=vocab_size)

# 3. Decode function to get back text
word_index = imdb.get_word_index()
reverse_word_index = {value: key for key, value in word_index.items()}

def decode_review(text_ints):
    return ' '.join([reverse_word_index.get(i - 3, '?') for i in text_ints])

# 4. Prepare data (pad sequences)
maxlen = 200
x_train = pad_sequences(x_train, maxlen=maxlen)
x_test = pad_sequences(x_test, maxlen=maxlen)

# 5. Build model
model = keras.Sequential([
    layers.Embedding(vocab_size, 64, input_length=maxlen),
    layers.Bidirectional(layers.LSTM(64)),
    layers.Dense(64, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

# 6. Compile model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# 7. Train model
model.fit(x_train, y_train, epochs=5, batch_size=64, validation_split=0.2)

# 8. Real-time testing function
def predict_sentiment_text(model, review_text):
    # 8.1 Preprocessing: convert review to integers
    words = review_text.lower().split()
    review_seq = []
    for word in words:
        idx = word_index.get(word, 2)  # 2 is for unknown words
        review_seq.append(idx)

    review_seq = pad_sequences([review_seq], maxlen=maxlen)

    pred = model.predict(review_seq, verbose=0)[0][0]
    sentiment = "Positive" if pred >= 0.5 else "Negative"
    print(f"\nReview Sentiment: {sentiment} (Score: {pred:.4f})")

# 9. Real examples
sample_review1 = "The movie was fantastic! I really loved the performances."
predict_sentiment_text(model, sample_review1)

sample_review2 = "The film was boring and too long. Not good at all."
predict_sentiment_text(model, sample_review2)

sample_review3 = "it is so disappointing."
predict_sentiment_text(model, sample_review3)

sample_review4 = "An excellent movie. Great direction and amazing acting!"
predict_sentiment_text(model, sample_review4)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json
[1m1641221/1641221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Epoch 1/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 19ms/step - accuracy: 0.6802 - loss: 0.5678 - val_accuracy: 0.8520 - val_loss: 0.3495
Epoch 2/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 18ms/step - accuracy: 0.9083 - loss: 0.2372 - val_accuracy: 0.8472 - val_loss: 0.3714
Epoch 3/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 18ms/step - accuracy: 0.9327 - loss: 0.1766 - val_accuracy: 0.8668 - val_loss: 0.3616
Epoch 4/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 18ms/step - accuracy: 0.9615 - loss: 0.1122 - val_accuracy: 0.8690 - val_

Okay, let's break down your entire notebook, cell by cell, in simple terms. I'll explain the code and the important Deep Learning (DL) words you'll encounter. This will be great for your practical!

**Core Idea of Deep Learning:**
Imagine you're trying to teach a computer to recognize things (like letters or movie sentiment). Instead of writing exact rules, you show it lots of examples and let it learn the patterns itself. A "Deep Neural Network" (DNN) is like a computer's brain with many layers that help it learn these complex patterns.

---

**File: Ass_2.ipynb**

---

**PART 1: DL LAB 2A - Recognizing Handwritten Letters**
*(Goal: Teach the computer to identify which letter of the alphabet (A-Z) an image represents, based on some numerical features of that letter's image.)*

**Cell 0: Markdown**
```markdown
### DL LAB 2A
```
*   **What it is:** Just a title for this section.
*   **Terminology:**
    *   **Markdown:** A simple way to format text (like headings, bold, lists).

**Cell 1: Imports**
```python
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical
```
*   **What it does:** This cell brings in all the tools (libraries) needed for this part.
*   **Terminology:**
    *   `pandas` (`pd`): A library for working with data in tables (like Excel spreadsheets, called DataFrames).
    *   `numpy` (`np`): For heavy-duty math and working with lists of numbers (arrays).
    *   `sklearn` (Scikit-learn): A popular library for general Machine Learning tasks.
        *   `train_test_split`: A function to split your data into a "training set" (to teach the model) and a "test set" (to see how well it learned on new data).
        *   `LabelEncoder`: Converts text labels (like 'A', 'B', 'C') into numbers (0, 1, 2). Computers prefer numbers.
        *   `StandardScaler`: A tool to rescale your numerical features so they are all on a similar range (e.g., around 0 with a standard spread). This helps the DNN learn better.
    *   `tensorflow.keras`: TensorFlow is a big library for Deep Learning. Keras is a user-friendly way to build models with TensorFlow.
        *   `Sequential`: A type of model where you stack layers one after another, like building blocks.
        *   `Dense`: A basic type of layer in a neural network where every "neuron" (processing unit) in this layer is connected to every neuron in the previous layer.
        *   `to_categorical`: A function to convert number labels (like 0, 1, 2) into a special format called "one-hot encoding" (e.g., 0 becomes `[1,0,0]`, 1 becomes `[0,1,0]`). This is often needed for classifying into multiple categories.

**Cell 2: Load Dataset**
```python
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/letter-recognition/letter-recognition.data"
columns = [...] # list of column names
data = pd.read_csv(url, names=columns)
```
*   **What it does:** Downloads the letter recognition dataset from the internet and loads it into a pandas DataFrame (table). It also assigns names to the columns.
*   **Terminology:**
    *   **Dataset:** A collection of data used for training and testing. Here, it's data about letter images.
    *   **URL:** A web address.
    *   `pd.read_csv()`: Pandas function to read data from a CSV (Comma Separated Values) file.

**Cell 3: Separate Features and Labels**
```python
X = data.drop('letter', axis=1).values
y = data['letter'].values
```
*   **What it does:** Splits the data into two parts:
    *   `X`: The **features** (the 16 numerical measurements for each letter image). These are the inputs the model will use to make predictions.
    *   `y`: The **labels** (the actual letter 'A' through 'Z'). This is what we want the model to predict.
*   **Terminology:**
    *   **Features (X):** The input variables or characteristics used for prediction.
    *   **Labels (y) / Target Variable:** The output variable you are trying to predict.

**Cell 4: Encode Labels**
```python
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
y_categorical = to_categorical(y_encoded)
```
*   **What it does:** Converts the letter labels into a format suitable for the DNN.
    1.  `LabelEncoder()`: Changes letters ('A', 'B', ...) to numbers (0, 1, ...).
    2.  `to_categorical()`: Changes these numbers into **one-hot encoded** vectors. For 26 letters:
        *   'A' (or 0) becomes `[1, 0, 0, ..., 0]` (a list of 26 numbers with 1 at the 0th position).
        *   'B' (or 1) becomes `[0, 1, 0, ..., 0]` (1 at the 1st position).
*   **Terminology:**
    *   **One-Hot Encoding:** A way to represent categorical data (like letters) as binary vectors. Each category gets its own position in the vector, which is 1 if the sample belongs to that category, and 0 otherwise. Important for `categorical_crossentropy` loss.

**Cell 5: Train-Test Split**
```python
X_train, X_test, y_train, y_test = train_test_split(X, y_categorical, test_size=0.2, random_state=42, stratify=y_categorical)
```
*   **What it does:** Divides the data:
    *   80% for **training** (`X_train`, `y_train`): The model learns from this.
    *   20% for **testing** (`X_test`, `y_test`): Used to evaluate how well the model performs on unseen data.
*   **Terminology:**
    *   `test_size=0.2`: 20% of data for testing.
    *   `random_state=42`: Ensures the split is the same every time you run the code (for reproducibility).
    *   `stratify=y_categorical`: Tries to keep the same proportion of each letter in both training and testing sets.

**Cell 6: Feature Scaling**
```python
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
```
*   **What it does:** Rescales the numerical features in `X_train` and `X_test`.
    *   `fit_transform` on `X_train`: Learns the scaling parameters (mean, standard deviation) from the training data and then applies the scaling.
    *   `transform` on `X_test`: Applies the scaling learned from the training data. (Important: Don't `fit` on test data to avoid data leakage).
*   **Terminology:**
    *   **Feature Scaling:** Making sure all input features have a similar range of values. This helps the DNN learn more efficiently. `StandardScaler` makes features have roughly a mean of 0 and a standard deviation of 1.

**Cell 7: Build the DNN Model**
```python
model = Sequential([
    Dense(128, activation='relu', input_shape=(16,)),
    Dense(64, activation='relu'),
    Dense(26, activation='softmax')
])
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
```
*   **What it does:** Defines the architecture (structure) of our Deep Neural Network.
    *   `Sequential([...])`: We're building a model layer by layer.
    *   `Dense(128, activation='relu', input_shape=(16,))`: The first **hidden layer**.
        *   `Dense`: A fully connected layer.
        *   `128`: It has 128 "neurons" or units.
        *   `activation='relu'`: **ReLU (Rectified Linear Unit)** is an **activation function**. It decides if a neuron should be "activated" or not. ReLU is simple: if input is positive, output is the input; if negative, output is 0.
        *   `input_shape=(16,)`: This layer expects 16 input features (our letter image features). Only needed for the first layer.
    *   `Dense(64, activation='relu')`: The second hidden layer with 64 neurons and ReLU activation.
    *   `Dense(26, activation='softmax')`: The **output layer**.
        *   `26`: It has 26 neurons, one for each letter of the alphabet.
        *   `activation='softmax'`: **Softmax** activation is used for multi-class classification. It converts the raw outputs of the neurons into probabilities, ensuring they all add up to 1. The neuron with the highest probability is the model's predicted letter.
    *   `model.compile(...)`: Configures how the model will learn.
        *   `optimizer='adam'`: **Adam** is an efficient **optimizer**. The optimizer's job is to adjust the model's internal "knobs" (weights) to reduce mistakes.
        *   `loss='categorical_crossentropy'`: The **loss function** measures how "wrong" the model's predictions are compared to the true labels (which are one-hot encoded). The model tries to minimize this loss.
        *   `metrics=['accuracy']`: We want to track **accuracy** (the percentage of correctly classified letters) during training.
*   **Terminology:**
    *   **Neural Network / DNN:** A model inspired by the human brain, with layers of interconnected nodes (neurons). "Deep" means it has multiple hidden layers.
    *   **Layer:** A stage of computation in the network.
    *   **Hidden Layer:** Layers between the input and output layers. They learn complex features.
    *   **Output Layer:** The final layer that gives the prediction.
    *   **Neuron/Unit:** A small computational unit within a layer.
    *   **Activation Function (ReLU, Softmax):** A function applied to the output of each neuron to introduce non-linearity, allowing the network to learn complex patterns.
    *   **Optimizer (Adam):** An algorithm that adjusts the model's parameters (weights) to minimize the loss function.
    *   **Loss Function (Categorical Crossentropy):** A way to measure the error or "badness" of the model's predictions for multi-class problems.
    *   **Metrics (Accuracy):** How we evaluate the model's performance.

**Cell 8: Train the Model**
```python
history = model.fit(X_train, y_train, epochs=100, batch_size=32, validation_split=0.1)
```
*   **What it does:** This is where the actual learning happens!
    *   `model.fit()`: Trains the model using the training data (`X_train`, `y_train`).
    *   `epochs=100`: An **epoch** is one complete pass through the entire training dataset. Here, it goes through the data 100 times.
    *   `batch_size=32`: The model processes the training data in small groups (batches) of 32 samples at a time before updating its weights.
    *   `validation_split=0.1`: It sets aside 10% of the training data to use as **validation data**. After each epoch, the model's performance is checked on this validation data. This helps monitor if the model is **overfitting** (learning the training data too well but not generalizing to new data).
*   **Terminology:**
    *   **Training:** The process of teaching the model by showing it examples and letting it adjust its weights.
    *   **Epoch:** One full iteration over the entire training dataset.
    *   **Batch Size:** The number of training samples processed before the model's weights are updated.
    *   **Validation Data:** A subset of training data used to tune hyperparameters and monitor for overfitting during training.
    *   **Overfitting:** When a model performs very well on training data but poorly on unseen (test or validation) data. It has "memorized" the training set instead of learning general patterns.

**Cell 9: Evaluate the Model**
```python
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"Test Accuracy: {test_accuracy:.4f}")
```
*   **What it does:** Tests the trained model on the `X_test` and `y_test` (data it has never seen before) to see how well it generalized.
*   **Terminology:**
    *   **Evaluation:** Assessing the model's performance on unseen data.
    *   `Test Accuracy`: The accuracy on the test set, a good indicator of how the model will perform in the real world.

**Cell 10: Save the Model**
```python
model.save("DNN.h5")
```
*   **What it does:** Saves the entire trained model (architecture, weights, optimizer state) to a file named `DNN.h5`. This allows you to load and use it later without retraining.
*   **Terminology:**
    *   **Model Saving:** Storing the trained model for later use. `.h5` is a common format.

**Cell 11: Evaluate Model (Again)**
```python
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test Accuracy: {accuracy:.4f}')
```
*   **What it does:** Same as Cell 9, just showing evaluation again. The `verbose=0` in Cell 9 just meant "don't print progress bars during evaluation." This cell will print them.

**Cell 12: Make Predictions**
```python
y_pred = model.predict(X_test)
predicted_labels = label_encoder.inverse_transform(np.argmax(y_pred, axis=1))
```
*   **What it does:** Uses the trained model to predict letters for the test data.
    *   `model.predict(X_test)`: Gets the raw probability outputs from the softmax layer for each sample in `X_test`.
    *   `np.argmax(y_pred, axis=1)`: For each sample, finds the index (0-25) of the neuron with the highest probability. This is the predicted numerical label.
    *   `label_encoder.inverse_transform(...)`: Converts these predicted numerical labels back to actual letters ('A', 'B', ...).
*   **Terminology:**
    *   **Prediction/Inference:** Using the trained model to make predictions on new data.

**Cell 13: Random Sample Test**
```python
import random
# ... (function definition) ...
random_sample_predict(model, scaler, label_encoder, X_test, y_test)
```
*   **What it does:** A custom function that picks a random sample from the test set, shows its true letter, and what the model predicted for it. A nice way to see the model in action.

---

**PART 2: DL LAB 2B - Movie Review Sentiment Analysis**
*(Goal: Teach the computer to read a movie review and decide if it's "positive" or "negative".)*

**Cell 14: Markdown**
```markdown
### DL LAB 2B
```
*   Just a title for this new section.

**Cell 15: Sentiment Analysis with a Small Custom Dataset**
*   **What it does:** This cell builds a model to classify short text snippets as positive or negative using a very small, manually created dataset.
*   **Key Steps & Terminology:**
    *   `texts`: A list of example sentences (movie reviews).
    *   `labels`: 0 for negative, 1 for positive. This is **binary classification** (two classes).
    *   `Tokenizer(num_words=max_words, oov_token="<OOV>")`:
        *   **Tokenizer:** Tool to break text into individual words (tokens) and convert them into numbers (sequences).
        *   `num_words`: Only considers the `max_words` most frequent words (vocabulary size).
        *   `oov_token="<OOV>"`: A special token for "Out Of Vocabulary" words (words the tokenizer hasn't seen during training).
    *   `tokenizer.fit_on_texts(texts)`: Learns the vocabulary from your `texts`.
    *   `sequences = tokenizer.texts_to_sequences(texts)`: Converts each review into a sequence of numbers (each number representing a word).
    *   `padded_sequences = pad_sequences(sequences, maxlen=max_len, padding='post')`:
        *   **Padding:** Makes all word sequences the same length (`max_len`) by adding zeros (usually at the end - `padding='post'`). DNNs need fixed-size inputs.
    *   **Model Architecture:**
        *   `layers.Embedding(input_dim=max_words, output_dim=64, input_length=max_len)`:
            *   **Embedding Layer:** Crucial for text! It learns a dense vector (a list of numbers, here 64 numbers long) for each word in your vocabulary. These vectors capture the "meaning" or context of words (e.g., "good" and "great" might have similar vectors).
        *   `layers.Bidirectional(layers.LSTM(32))`:
            *   **LSTM (Long Short-Term Memory):** A special type of **Recurrent Neural Network (RNN)** layer. RNNs are good for sequence data (like text or time series) because they have a "memory" – they can consider previous words in a sentence when processing the current word.
            *   **Bidirectional:** The LSTM processes the sequence from left-to-right AND right-to-left, giving it more context from both directions.
        *   `layers.Dense(32, activation='relu')`: A standard hidden layer.
        *   `layers.Dense(1, activation='sigmoid')`: The output layer for binary classification.
            *   `1`: One neuron because there are only two outcomes (positive/negative).
            *   `activation='sigmoid'`: **Sigmoid** activation squashes the output to a probability between 0 and 1. If > 0.5, often classified as positive; if < 0.5, as negative.
    *   `model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])`:
        *   `loss='binary_crossentropy'`: The appropriate loss function for binary (0/1) classification problems.
    *   `model.fit(...)`: Trains the model on this small dataset.
    *   `predict_sentiment(review)`: A function to take a new review, preprocess it (tokenize, pad), and predict its sentiment.

**Cell 16: Sentiment Analysis with the IMDB Dataset**
*   **What it does:** This cell does the same task (sentiment analysis) but uses a much larger, standard dataset called IMDB, which Keras can load directly.
*   **Key Differences & Terminology:**
    *   `imdb = keras.datasets.imdb`: Loads the IMDB dataset module.
    *   `(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=vocab_size)`:
        *   Loads the IMDB data. The reviews (`x_train`, `x_test`) are already preprocessed into sequences of word IDs (integers). `y_train`, `y_test` are 0 or 1.
    *   `word_index = imdb.get_word_index()`: Gets a dictionary mapping words to their integer IDs.
    *   `reverse_word_index`: Creates a dictionary to map IDs back to words.
    *   `decode_review(text_ints)`: A function to convert a sequence of word IDs back into readable text.
    *   `x_train = pad_sequences(x_train, maxlen=maxlen)`: Pads the IMDB review sequences to a fixed length (`maxlen`).
    *   **Model:** Similar architecture (Embedding, Bidirectional LSTM, Dense layers) but configured for the IMDB dataset's `vocab_size` and chosen `maxlen`.
    *   `model.fit(x_train, y_train, ...)`: Trains on the larger IMDB dataset.
    *   `predict_sentiment_text(model, review_text)`:
        *   This function is more complex because it takes raw text.
        *   It manually converts words to their IDs using `word_index`.
        *   Then pads the sequence and predicts.

**Cell 17: Empty Code Cell**
*   This cell is empty, nothing happens here.

---

**Key Takeaways for Your Practical:**

1.  **Understand the Goal:** Are you doing multi-class classification (like letters A-Z) or binary classification (like positive/negative)?
2.  **Data Preprocessing is Key:**
    *   **Numerical Data (Lab 2A):** Scaling (e.g., `StandardScaler`).
    *   **Categorical Labels (Lab 2A):** `LabelEncoder` then `to_categorical` (one-hot encoding).
    *   **Text Data (Lab 2B):** `Tokenizer`, then convert to sequences, then `pad_sequences`.
3.  **Model Building Blocks (Keras `Sequential` model):**
    *   **Input Layer:** Defined by `input_shape` in the first layer.
    *   **Hidden Layers:** `Dense` layers (for general patterns), `LSTM` (for sequences/text).
    *   **Output Layer:**
        *   `Dense(num_classes, activation='softmax')` for multi-class.
        *   `Dense(1, activation='sigmoid')` for binary.
    *   **Activation Functions:** `relu` (common for hidden layers), `softmax` (multi-class output), `sigmoid` (binary output).
4.  **Compilation (`model.compile`):**
    *   **Optimizer:** `adam` is a good default.
    *   **Loss Function:**
        *   `categorical_crossentropy` for multi-class (with one-hot encoded labels).
        *   `binary_crossentropy` for binary.
    *   **Metrics:** `accuracy` is common.
5.  **Training (`model.fit`):**
    *   `epochs`: How many times to go through the data.
    *   `batch_size`: How many samples to process before an update.
    *   `validation_split` or `validation_data`: To monitor for overfitting.
6.  **Evaluation (`model.evaluate`):** Use the test set to get a final, unbiased performance measure.
7.  **Prediction (`model.predict`):** Use the trained model on new, unseen data.

**Simple Analogies:**

*   **DNN:** A team of specialists. Each layer specializes in finding certain types of patterns. Early layers find simple patterns, later layers combine them into complex ones.
*   **Epoch:** Reading a textbook once. Multiple epochs mean re-reading it to understand better.
*   **Batch Size:** Studying in chunks. Instead of reading the whole textbook at once, you study a few pages (batch) then try to recall/update your understanding.
*   **Loss Function:** Your teacher telling you how many questions you got wrong on a quiz. You try to study so you get fewer wrong next time.
*   **Optimizer:** The study method you use to improve your quiz scores.
*   **Embedding Layer (for text):** A super-smart dictionary that knows not just what words mean, but how they relate to each other.

Good luck with your practical! Focus on understanding the *purpose* of each step and the main Keras components.