# Introduction to TensorFlow
TensorFlow is an end-to-end open-source platform for machine learning developed by Google. It provides a comprehensive ecosystem of tools, libraries, and community resources that lets researchers and developers easily build and deploy ML-powered applications.

## Why TensorFlow?
- Production Ready: Robust and scalable for production environments

- Flexibility: From research to production with the same framework

- Keras Integration: High-level API for fast prototyping

- Cross-Platform: Run on CPU, GPU, TPU, mobile, and web

- TensorBoard: Powerful visualization toolkit

- Large Community: Extensive documentation and support

## Key Components:
- Tensors: Multi-dimensional arrays with automatic differentiation

- Keras API: High-level neural networks API

- Eager Execution: Imperative programming environment

- Distribution Strategies: Training across multiple GPUs/devices

- SavedModel: Universal format for saving models

## TensorFlow vs PyTorch:
- TensorFlow: Better for production, more structured, excellent deployment tools

- PyTorch: More Pythonic, better for research, dynamic computation graphs

### Installation and Setup

In [None]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import seaborn as sns

In [None]:
print(f"TensorFlow version: {tf.__version__}")
print(f"Keras version: {tf.keras.__version__}")

In [None]:
# Check for GPU availability
print(f"GPU Available: {tf.config.list_physical_devices('GPU')}")
print(f"TensorFlow built with CUDA: {tf.test.is_built_with_cuda()}")

if tf.config.list_physical_devices('GPU'):
    print("GPU device name:", tf.test.gpu_device_name())
else:
    print("No GPU found, using CPU")

### TensorFlow Tensors: The Fundamental Data Structure
Tensors are the central unit of data in TensorFlow. A tensor is a generalization of vectors and matrices to potentially higher dimensions.

### Tensor Creation and Basic Properties

In [None]:
print("=== Tensor Creation and Properties ===")

In [None]:
# Creating tensors from Python lists
scalar = tf.constant(5)                    # 0-dimensional tensor (scalar)
print(f"Scalar: {scalar.numpy()}")

In [None]:
vector = tf.constant([1, 2, 3, 4])         # 1-dimensional tensor (vector)
print(f"Vector: {vector.numpy()}")

In [None]:
matrix = tf.constant([[1, 2], [3, 4]])     # 2-dimensional tensor (matrix)
print(f"Matrix:\n{matrix.numpy()}")

In [None]:
tensor_3d = tf.constant([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])  # 3D tensor
print(f"3D Tensor:\n{tensor_3d.numpy()}")

In [None]:
# Tensor properties
print(f"Shape: {matrix.shape}")

In [None]:
print(f"Rank: {tf.rank(matrix).numpy()}")  # Number of dimensions

In [None]:
print(f"Size: {tf.size(matrix).numpy()}")  # Total number of elements

In [None]:
print(f"Data type: {matrix.dtype}")

In [None]:
print(f"Device: {matrix.device}")

In [None]:
# Special tensors
zeros = tf.zeros([2, 3])                    # 2x3 matrix of zeros
print(f"\nZeros:\n{zeros.numpy()}")

In [None]:
ones = tf.ones([3, 2])                      # 3x2 matrix of ones
print(f"Ones:\n{ones.numpy()}")

In [None]:
eye = tf.eye(3)                             # 3x3 identity matrix
print(f"Identity:\n{eye.numpy()}")

In [None]:
random_normal = tf.random.normal(shape=[2, 2], mean=0.0, stddev=1.0)    # 2x2 matrix from normal distribution
print(f"Random Normal:\n{random_normal.numpy()}")

In [None]:
random_uniform = tf.random.uniform(shape=[2, 2], minval=0, maxval=10, dtype=tf.float32)  # 2x2 matrix from uniform distribution
print(f"Random Uniform:\n{random_uniform.numpy()}")

### Tensor Operations and Broadcasting

In [None]:
print("=== Tensor Operations ===")

# Basic arithmetic operations
a = tf.constant([1, 2, 3])
b = tf.constant([4, 5, 6])

In [None]:
print(f"a + b: {tf.add(a, b).numpy()}")        # Element-wise addition
print(f"a - b: {tf.subtract(a, b).numpy()}")   # Element-wise subtraction
print(f"a * b: {tf.multiply(a, b).numpy()}")   # Element-wise multiplication
print(f"a / b: {tf.divide(a, b).numpy()}")     # Element-wise division
print(f"a ** 2: {tf.pow(a, 2).numpy()}")       # Element-wise power

In [None]:
# Matrix operations
matrix_a = tf.constant([[1, 2], [3, 4]])
matrix_b = tf.constant([[5, 6], [7, 8]])

print(f"\nMatrix multiplication:\n{tf.matmul(matrix_a, matrix_b).numpy()}")

In [None]:
print(f"Element-wise multiplication:\n{tf.multiply(matrix_a, matrix_b).numpy()}")

In [None]:
# Reduction operations
tensor = tf.constant([[1, 2, 3], [4, 5, 6]])
print(f"\nSum of all elements: {tf.reduce_sum(tensor).numpy()}")
print(f"Mean of all elements: {tf.reduce_mean(tensor).numpy()}")
print(f"Max of all elements: {tf.reduce_max(tensor).numpy()}")
print(f"Sum along rows: {tf.reduce_sum(tensor, axis=0).numpy()}")
print(f"Sum along columns: {tf.reduce_sum(tensor, axis=1).numpy()}")

In [None]:
# Broadcasting
vector = tf.constant([1, 2])
matrix = tf.constant([[1, 2], [3, 4], [5, 6]])

print(f"\nBroadcasting example:")
print(f"Vector: {vector.numpy()}")
print(f"Matrix:\n{matrix.numpy()}")
print(f"Vector + Matrix:\n{tf.add(vector, matrix).numpy()}")

### Tensor Reshaping and Manipulation

In [None]:
# Create a 1D tensor
original = tf.range(12)
print(f"Original tensor: {original.numpy()}")
print(f"Original shape: {original.shape}")

In [None]:
# Reshape operations
reshaped_2d = tf.reshape(original, [3, 4])
print(f"\nReshaped to 3x4:\n{reshaped_2d.numpy()}")

In [None]:
reshaped_3d = tf.reshape(original, [2, 3, 2])
print(f"\nReshaped to 2x3x2:\n{reshaped_3d.numpy()}")
print(f"Shape: {reshaped_3d.shape}")

In [None]:
# Transpose
matrix = tf.constant([[1, 2, 3], [4, 5, 6]])
print(f"\nOriginal matrix:\n{matrix.numpy()}")
print(f"Transposed:\n{tf.transpose(matrix).numpy()}")

In [None]:
# Concatenation
t1 = tf.constant([[1, 2], [3, 4]])
t2 = tf.constant([[5, 6], [7, 8]])

print(f"\nVertical concatenation:\n{tf.concat([t1, t2], axis=0).numpy()}")
print(f"Horizontal concatenation:\n{tf.concat([t1, t2], axis=1).numpy()}")

In [None]:
# Slicing and indexing
tensor = tf.constant([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
print(f"\nOriginal tensor:\n{tensor.numpy()}")

In [None]:
print(f"First row: {tensor[0].numpy()}")

In [None]:
print(f"Element at (1,2): {tensor[1, 2].numpy()}")

In [None]:
print(f"First two rows:\n{tensor[:2].numpy()}")

In [None]:
print(f"Last column:\n{tensor[:, -1].numpy()}")

### Practice Question 1
Create and manipulate tensors:

1.    Create a 2x4 tensor with values from 1 to 8

2.    Reshape it to 4x2

3.    Extract the diagonal elements

4.    Calculate the mean of all elements

5.    Create a boolean mask for elements greater than 4

In [None]:
# Try your solution here


In [None]:
# Try your solution here


In [None]:
# Try your solution here


In [None]:
# Try your solution here


In [None]:
# Try your solution here


<details> <summary>Click to reveal solution</summary>

```python

# 1. Create 2x4 tensor
tensor_2x4 = tf.reshape(tf.range(1, 9), [2, 4])
print(f"1. 2x4 tensor:\n{tensor_2x4.numpy()}")

# 2. Reshape to 4x2
tensor_4x2 = tf.reshape(tensor_2x4, [4, 2])
print(f"\n2. Reshaped to 4x2:\n{tensor_4x2.numpy()}")

# 3. Extract diagonal (for square matrices, but we can get main diagonal elements)
diagonal = tf.linalg.diag_part(tensor_2x4) if tensor_2x4.shape[0] == tensor_2x4.shape[1] else "Not a square matrix"
print(f"\n3. Diagonal elements: {diagonal}")

# Alternative: Get elements where row == col
rows, cols = tensor_2x4.shape
diagonal_indices = tf.minimum(rows, cols)
diagonal_elements = [tensor_2x4[i, i].numpy() for i in range(diagonal_indices)]
print(f"   Main diagonal elements: {diagonal_elements}")

# 4. Calculate mean
mean_value = tf.reduce_mean(tf.cast(tensor_2x4, tf.float32))
print(f"\n4. Mean of all elements: {mean_value.numpy():.2f}")

# 5. Boolean mask for elements > 4
boolean_mask = tensor_2x4 > 4
print(f"\n5. Boolean mask (elements > 4):\n{boolean_mask.numpy()}")
print(f"   Elements greater than 4: {tensor_2x4[boolean_mask].numpy()}")

```
</details>

### Automatic Differentiation with GradientTape
TensorFlow uses ``GradientTape`` for automatic differentiation - it records operations for automatic differentiation.

### Basic Gradient Computation


In [None]:
# Simple gradient computation
x = tf.Variable(3.0)  # Create a trainable variable

with tf.GradientTape() as tape: # context manager
    y = x ** 2 + 2 * x + 1

# Compute gradient of y with respect to x
dy_dx = tape.gradient(y, x)
print(f"y = x² + 2x + 1")
print(f"x = {x.numpy()}")
print(f"y = {y.numpy()}")
print(f"dy/dx = {dy_dx.numpy()}")  # Should be 2x + 2 = 8

In [None]:
# Multiple variables
print(f"\n=== Multiple Variables ===")
w = tf.Variable(2.0)
b = tf.Variable(1.0)

with tf.GradientTape() as tape:
    z = w * x + b

gradients = tape.gradient(z, [w, b])
print(f"z = w * x + b = {z.numpy()}")
print(f"dz/dw = {gradients[0].numpy()}")  # Should be x = 3
print(f"dz/db = {gradients[1].numpy()}")  # Should be 1

### Complex Computational Graphs

In [None]:
print("=== Complex Computational Graphs ===")

# More complex function
x = tf.Variable(2.0)
w = tf.Variable(3.0)
b = tf.Variable(1.0)

with tf.GradientTape(persistent=True) as tape: # context manager
    y = w * tf.sin(x ** 2) + b
    z = tf.exp(y) + y ** 2

# Compute gradients
dy_dx = tape.gradient(y, x)
dy_dw = tape.gradient(y, w)
dy_db = tape.gradient(y, b)
dz_dx = tape.gradient(z, x)

print(f"y = w * sin(x²) + b = {y.numpy():.4f}")
print(f"z = exp(y) + y² = {z.numpy():.4f}")
print(f"dy/dx = {dy_dx.numpy():.4f}")  # w * 2x * cos(x²)
print(f"dy/dw = {dy_dw.numpy():.4f}")  # sin(x²)
print(f"dy/db = {dy_db.numpy():.4f}")  # 1
print(f"dz/dx = {dz_dx.numpy():.4f}")  # dz/dy * dy/dx

del tape  # Important for persistent tapes

### GradientTape for Machine Learning

In [None]:
print("=== GradientTape for ML Training ===")

# Simple linear regression example
# True parameters
true_w = tf.constant(2.0)
true_b = tf.constant(1.0)

# Generate synthetic data
num_samples = 100
X = tf.random.normal([num_samples])
noise = tf.random.normal([num_samples], stddev=0.1)
y = true_w * X + true_b + noise

# Trainable variables
w = tf.Variable(0.0)
b = tf.Variable(0.0)

# Training parameters
learning_rate = 0.1
epochs = 50

In [None]:
print("Training linear regression...")
for epoch in range(epochs):
    with tf.GradientTape() as tape:
        # Forward pass
        y_pred = w * X + b
        # Compute loss (MSE)
        loss = tf.reduce_mean(tf.square(y_pred - y))

    # Compute gradients
    gradients = tape.gradient(loss, [w, b])

    # Update parameters
    w.assign_sub(learning_rate * gradients[0])
    b.assign_sub(learning_rate * gradients[1])

    if epoch % 10 == 0:
        print(f"Epoch {epoch}: w = {w.numpy():.3f}, b = {b.numpy():.3f}, loss = {loss.numpy():.4f}")

print(f"\nFinal parameters: w = {w.numpy():.3f}, b = {b.numpy():.3f}")
print(f"True parameters: w = {true_w.numpy():.3f}, b = {true_b.numpy():.3f}")

### Practice Question 2
Implement and compute gradients for:

f(x) = x³ - 3x² + x where x=2

Multi-variable: L = (w₁x₁ + w₂x₂ + b - y)² where w₁=1, w₂=2, x₁=3, x₂=4, b=1, y=10

Chain rule: z = log(1 + exp(wx + b)) where w=2, x=3, b=1

In [None]:
# Try your solution here


In [None]:
# Try your solution here


In [None]:
# Try your solution here


<details> <summary>Click to reveal solution</summary>

```python
# Solution
print("=== Practice Question 2 Solution ===")

# 1. f(x) = x³ - 3x² + x
x1 = tf.Variable(2.0)
with tf.GradientTape() as tape:
    f = x1**3 - 3*x1**2 + x1
df_dx = tape.gradient(f, x1)
print(f"1. f(x) = x³ - 3x² + x = {f.numpy()}")
print(f"   df/dx = {df_dx.numpy()} (should be 3x² - 6x + 1 = {3*4 - 6*2 + 1})")

# 2. L = (w₁x₁ + w₂x₂ + b - y)²
w1 = tf.Variable(1.0)
w2 = tf.Variable(2.0)
x1_val = tf.constant(3.0)
x2_val = tf.constant(4.0)
b = tf.Variable(1.0)
y_true = tf.constant(10.0)

with tf.GradientTape() as tape:
    prediction = w1 * x1_val + w2 * x2_val + b
    L = (prediction - y_true) ** 2

gradients = tape.gradient(L, [w1, w2, b])
print(f"\n2. L = (w₁x₁ + w₂x₂ + b - y)² = {L.numpy()}")
print(f"   dL/dw₁ = {gradients[0].numpy()} (should be 2x₁(w₁x₁+w₂x₂+b-y) = {2*3*(1*3+2*4+1-10)} = 12)")
print(f"   dL/dw₂ = {gradients[1].numpy()} (should be 2x₂(w₁x₁+w₂x₂+b-y) = {2*4*(1*3+2*4+1-10)} = 16)")
print(f"   dL/db = {gradients[2].numpy()} (should be 2(w₁x₁+w₂x₂+b-y) = {2*(1*3+2*4+1-10)} = 4)")

# 3. z = log(1 + exp(wx + b))
w3 = tf.Variable(2.0)
x3 = tf.constant(3.0)
b3 = tf.Variable(1.0)

with tf.GradientTape() as tape:
    linear = w3 * x3 + b3
    z = tf.math.log(1 + tf.math.exp(linear))

dz_dw = tape.gradient(z, w3)
print(f"\n3. z = log(1 + exp(wx + b)) = {z.numpy():.4f}")
print(f"   dz/dw = {dz_dw.numpy():.4f} (should be x * σ(wx+b) = {3 * (1/(1+tf.math.exp(-(2*3+1)).numpy()):.4f})")

```
</details>

### Building Neural Networks with Keras
Keras is TensorFlow's high-level API for building and training deep learning models. It provides a user-friendly interface while maintaining flexibility.

### Understanding the Sequential API

In [None]:
print("=== Building Neural Networks with Keras ===")

# Simplest way: Sequential API
model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(10,)),  # Input layer
    tf.keras.layers.Dense(32, activation='relu'),                     # Hidden layer 1
    tf.keras.layers.Dense(16, activation='relu'),                     # Hidden layer 2
    tf.keras.layers.Dense(1, activation='sigmoid')                    # Output layer
])

In [None]:
print("Model Architecture:")
model.summary()

In [None]:
# Alternative way to build Sequential model
model_alt = tf.keras.Sequential()
model_alt.add(tf.keras.layers.Dense(64, activation='relu', input_shape=(10,)))
model_alt.add(tf.keras.layers.Dense(32, activation='relu'))
model_alt.add(tf.keras.layers.Dense(16, activation='relu'))
model_alt.add(tf.keras.layers.Dense(1, activation='sigmoid'))

In [None]:
print("\nAlternative Model Architecture:")
model_alt.summary()

### Functional API for Complex Architectures

In [None]:
print("=== Functional API for Complex Models ===")

# Functional API allows for more complex architectures
inputs = tf.keras.Input(shape=(10,))  # Define input

# Create layers
x = tf.keras.layers.Dense(64, activation='relu')(inputs)

# Branch 1
branch1 = tf.keras.layers.Dense(32, activation='relu')(x)
branch1 = tf.keras.layers.Dense(16, activation='relu')(branch1)

# Branch 2
branch2 = tf.keras.layers.Dense(32, activation='tanh')(x)
branch2 = tf.keras.layers.Dense(16, activation='tanh')(branch2)

# Concatenate branches
concatenated = tf.keras.layers.concatenate([branch1, branch2])

# Output layer
outputs = tf.keras.layers.Dense(1, activation='sigmoid')(concatenated)

# Create model
functional_model = tf.keras.Model(inputs=inputs, outputs=outputs)

In [None]:
print("Functional Model Architecture:")
functional_model.summary()

In [None]:
# Plot model architecture
tf.keras.utils.plot_model(functional_model, show_shapes=True, show_layer_names=True)

### Practice Question 3
Build different neural network architectures with input of length 20 for classification:

1. Sequential model with 3 hidden layers (128, 64, 32 neurons) for binary classification

2. Functional API model with two parallel branches (one branch with 64, 32, 16 neurons, and other branch with 128 and 64 neurons) that merge

In [None]:
# Try your solution here


In [None]:
# Try your solution here


<details> <summary>Click to reveal solution for part-1</summary>

```python
# 1. Sequential Model
print("1. Sequential Model:")
sequential_model = tf.keras.Sequential([
    tf.keras.layers.Dense(128, activation='relu', input_shape=(20,)),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')  # Binary classification
])

sequential_model.summary()
```
</details>

<details> <summary>Click to reveal solution for part-2</summary>

```python
# 2. Functional API with Parallel Branches
print("\n2. Functional API with Parallel Branches:")
inputs = tf.keras.Input(shape=(20,))

# Branch 1: Deeper network
branch1 = tf.keras.layers.Dense(64, activation='relu')(inputs)
branch1 = tf.keras.layers.Dense(32, activation='relu')(branch1)
branch1 = tf.keras.layers.Dense(16, activation='relu')(branch1)

# Branch 2: Wider but shallower network
branch2 = tf.keras.layers.Dense(128, activation='tanh')(inputs)
branch2 = tf.keras.layers.Dense(64, activation='tanh')(branch2)

# Merge branches
merged = tf.keras.layers.concatenate([branch1, branch2])
merged = tf.keras.layers.Dropout(0.2)(merged)

# Output
outputs = tf.keras.layers.Dense(1, activation='sigmoid')(merged)

functional_model = tf.keras.Model(inputs=inputs, outputs=outputs)
functional_model.summary()
```
</details>

### Model Compilation and Training

### Understanding Loss Functions, Optimizers, and Metrics

In [None]:
print("=== Model Compilation ===")

# Create a sample model for demonstration
demo_model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(10,)),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

In [None]:
# Different loss functions for different problems
print("Common Loss Functions:")
print("- BinaryCrossentropy: Binary classification")
print("- CategoricalCrossentropy: Multi-class classification")
print("- SparseCategoricalCrossentropy: Multi-class with integer labels")
print("- MeanSquaredError: Regression")
print("- MeanAbsoluteError: Regression")

In [None]:
# Different optimizers
print("\nCommon Optimizers:")
print("- SGD: Stochastic Gradient Descent")
print("- Adam: Adaptive Moment Estimation (most popular)")
print("- RMSprop: Root Mean Square Propagation")
print("- Adagrad: Adaptive Gradient Algorithm")

In [None]:
# Common metrics
print("\nCommon Metrics:")
print("- Accuracy: Classification accuracy")
print("- Precision: True positives / (True positives + False positives)")
print("- Recall: True positives / (True positives + False negatives)")
print("- AUC: Area Under ROC Curve")
print("- MAE: Mean Absolute Error (regression)")
print("- MSE: Mean Squared Error (regression)")

In [None]:
# Compile the model
demo_model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy', 'precision', 'recall']
)

In [None]:
print(f"\nModel compiled with:")
print(f"Optimizer: {demo_model.optimizer.get_config()['name']}")
print(f"Loss: {demo_model.loss}")

### Advanced Optimizer Configuration

In [None]:
print("=== Advanced Optimizer Configuration ===")

# Custom optimizer configuration
custom_adam = tf.keras.optimizers.Adam(
    learning_rate=0.001,      # Learning rate
    beta_1=0.9,              # Exponential decay rate for 1st moment estimates
    beta_2=0.999,            # Exponential decay rate for 2nd moment estimates
    epsilon=1e-07,           # Small constant for numerical stability
    amsgrad=False            # Whether to apply AMSGrad variant
)

In [None]:
custom_sgd = tf.keras.optimizers.SGD(
    learning_rate=0.01,
    momentum=0.9,            # Accelerate SGD in relevant direction
    nesterov=True            # Nesterov accelerated gradient
)

In [None]:
# Learning rate scheduling
def learning_rate_schedule(epoch, lr):
    """Custom learning rate schedule"""
    if epoch < 10:
        return lr
    else:
        return lr * tf.math.exp(-0.1)

In [None]:
# Or use built-in schedulers
lr_scheduler = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=0.01,
    decay_steps=10000,
    decay_rate=0.96,
    staircase=True
)

optimizer_with_scheduler = tf.keras.optimizers.Adam(learning_rate=lr_scheduler)

### Complete Training Pipeline

In [None]:
print("=== Complete Training Pipeline ===")

# Generate synthetic dataset for demonstration
def generate_synthetic_data(num_samples=1000, input_dim=10):
    """Generate synthetic binary classification data"""
    X = tf.random.normal([num_samples, input_dim])
    # Simple decision boundary: positive if sum of features > 0
    y = (tf.reduce_sum(X, axis=1) > 0).numpy().astype(np.float32)
    return X.numpy(), y

# Generate data
X_train, y_train = generate_synthetic_data(1000, 10)
X_val, y_val = generate_synthetic_data(200, 10)
X_test, y_test = generate_synthetic_data(200, 10)

print(f"Training data shape: {X_train.shape}, {y_train.shape}")
print(f"Validation data shape: {X_val.shape}, {y_val.shape}")
print(f"Test data shape: {X_test.shape}, {y_test.shape}")

In [None]:
# Create and compile model
training_model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(10,)),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

training_model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss='binary_crossentropy',
    metrics=['accuracy', 'precision', 'recall']
)

In [None]:
# Callbacks for enhanced training
callbacks = [
    # Early stopping to prevent overfitting
    tf.keras.callbacks.EarlyStopping(
        monitor='val_loss',
        patience=10,
        restore_best_weights=True
    ),

    # Reduce learning rate when plateau
    tf.keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=5,
        min_lr=1e-7
    ),

    # Model checkpointing
    tf.keras.callbacks.ModelCheckpoint(
        'best_model.h5',
        monitor='val_accuracy',
        save_best_only=True,
        mode='max'
    ),

    # TensorBoard for visualization
    tf.keras.callbacks.TensorBoard(
        log_dir='./logs',
        histogram_freq=1
    )
]

In [None]:
# Train the model
print("Starting training...")
history = training_model.fit(
    X_train, y_train,
    batch_size=32,
    epochs=50,
    validation_data=(X_val, y_val),
    callbacks=callbacks,
    verbose=1
)

print("Training completed!")

In [None]:
# Evaluate on test set
test_loss, test_accuracy, test_precision, test_recall = training_model.evaluate(X_test, y_test, verbose=0)
print(f"\nTest Results:")
print(f"Loss: {test_loss:.4f}")
print(f"Accuracy: {test_accuracy:.4f}")
print(f"Precision: {test_precision:.4f}")
print(f"Recall: {test_recall:.4f}")

### Visualization of Training History

In [None]:
print("=== Training History Visualization ===")

# Plot training history
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 10))

# Loss
ax1.plot(history.history['loss'], label='Training Loss')
ax1.plot(history.history['val_loss'], label='Validation Loss')
ax1.set_title('Training and Validation Loss')
ax1.set_xlabel('Epoch')
ax1.set_ylabel('Loss')
ax1.legend()
ax1.grid(True)

# Accuracy
ax2.plot(history.history['accuracy'], label='Training Accuracy')
ax2.plot(history.history['val_accuracy'], label='Validation Accuracy')
ax2.set_title('Training and Validation Accuracy')
ax2.set_xlabel('Epoch')
ax2.set_ylabel('Accuracy')
ax2.legend()
ax2.grid(True)

# Precision
ax3.plot(history.history['precision'], label='Training Precision')
ax3.plot(history.history['val_precision'], label='Validation Precision')
ax3.set_title('Training and Validation Precision')
ax3.set_xlabel('Epoch')
ax3.set_ylabel('Precision')
ax3.legend()
ax3.grid(True)

# Recall
ax4.plot(history.history['recall'], label='Training Recall')
ax4.plot(history.history['val_recall'], label='Validation Recall')
ax4.set_title('Training and Validation Recall')
ax4.set_xlabel('Epoch')
ax4.set_ylabel('Recall')
ax4.legend()
ax4.grid(True)

plt.tight_layout()
plt.show()

In [None]:
# Calculate additional metrics
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score

In [None]:
# Predictions
y_pred_proba = training_model.predict(X_test)
y_pred = (y_pred_proba > 0.5).astype(int).flatten()

print("\nDetailed Classification Report:")
print(classification_report(y_test, y_pred))

In [None]:
# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.show()

In [None]:
# ROC AUC Score
auc_score = roc_auc_score(y_test, y_pred_proba)
print(f"ROC AUC Score: {auc_score:.4f}")

### End-to-End Regression: California Housing Dataset

In [None]:
## Step 1: Load and Explore the Dataset

from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [None]:
# Load the California Housing dataset
housing = fetch_california_housing()
X, y = housing.data, housing.target
feature_names = housing.feature_names

In [None]:
print("Dataset Information:")
print(f"Number of samples: {X.shape[0]}")
print(f"Number of features: {X.shape[1]}")
print(f"Feature names: {feature_names}")
print(f"Target range: ${y.min():.2f} - ${y.max():.2f} hundred thousands")
print(f"Target mean: ${y.mean():.2f} hundred thousands")

In [None]:
# Create DataFrame for easier exploration
import pandas as pd
df = pd.DataFrame(X, columns=feature_names)
df['MedHouseVal'] = y

In [None]:
print("\nFirst 5 rows of the dataset:")
print(df.head())

In [None]:
print("\nBasic Statistics:")
print(df.describe())

In [None]:
## Step 2: Essential Data Visualization

print("\n=== Essential Data Exploration ===")

# Create focused visualizations
fig, axes = plt.subplots(2, 3, figsize=(15, 10))

# 1. Target Distribution
axes[0, 0].hist(y, bins=50, alpha=0.7, color='skyblue', edgecolor='black')
axes[0, 0].axvline(y.mean(), color='red', linestyle='--', linewidth=2, label=f'Mean: ${y.mean():.2f}K')
axes[0, 0].set_xlabel('Median House Value (Thousands $)')
axes[0, 0].set_ylabel('Frequency')
axes[0, 0].set_title('Distribution of House Prices')
axes[0, 0].legend()
axes[0, 0].grid(True, alpha=0.3)

# 2. Correlation Matrix
axes[0, 1].set_title('Feature Correlation Matrix')
correlation_matrix = df.corr()
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0,
            fmt='.2f', square=True, ax=axes[0, 1])

# 3. Most Important Feature vs Target
most_correlated = correlation_matrix['MedHouseVal'].abs().sort_values(ascending=False).index[1]
axes[0, 2].scatter(df[most_correlated], df['MedHouseVal'], alpha=0.5, s=10)
axes[0, 2].set_xlabel(most_correlated)
axes[0, 2].set_ylabel('Median House Value ($K)')
axes[0, 2].set_title(f'Price vs {most_correlated}')
axes[0, 2].grid(True, alpha=0.3)

# 4. Geographical Distribution
scatter = axes[1, 0].scatter(df['Longitude'], df['Latitude'], c=df['MedHouseVal'],
                            cmap='viridis', alpha=0.6, s=10)
plt.colorbar(scatter, ax=axes[1, 0], label='Median House Value ($K)')
axes[1, 0].set_xlabel('Longitude')
axes[1, 0].set_ylabel('Latitude')
axes[1, 0].set_title('Housing Prices by Location')

# 5. Feature Distributions
df[feature_names[:4]].boxplot(ax=axes[1, 1])
axes[1, 1].set_title('Key Feature Distributions')
axes[1, 1].tick_params(axis='x', rotation=45)

# 6. Data Summary
axes[1, 2].axis('off')
summary_text = f"""
Dataset Summary:
---------------
Samples: {len(df):,}
Features: {len(feature_names)}
Target: Median House Value

Key Statistics:
- Mean Price: ${y.mean():.2f}K
- Price Range: ${y.min():.2f}K - ${y.max():.2f}K

Top Correlated Features:
"""
top_features = correlation_matrix['MedHouseVal'].abs().sort_values(ascending=False)[1:4]
for feature, corr in top_features.items():
    summary_text += f"- {feature}: {corr:.3f}\n"

axes[1, 2].text(0.1, 0.9, summary_text, fontsize=10, verticalalignment='top',
                bbox=dict(boxstyle='round', facecolor='lightgray', alpha=0.5))

plt.tight_layout()
plt.show()

# Basic Statistical Analysis
print("\n=== Statistical Analysis ===")
print("Top 3 features correlated with target:")
for feature, corr in top_features.items():
    print(f"  {feature:15}: {corr:.4f}")

print(f"\nData Quality Check:")
print(f"No missing values: {df.isnull().sum().sum() == 0}")
print(f"Reasonable price range: ${y.min():.2f}K - ${y.max():.2f}K")

In [None]:
## Step 3: Data Preprocessing and Splitting

print("\n=== Data Preprocessing ===")

# Handle any potential infinite values
X = np.nan_to_num(X, nan=0.0, posinf=0.0, neginf=0.0)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

X_train, X_val, y_train, y_val = train_test_split(
    X_train, y_train, test_size=0.2, random_state=42
)

print(f"Training set: {X_train.shape[0]:,} samples")
print(f"Validation set: {X_val.shape[0]:,} samples")
print(f"Test set: {X_test.shape[0]:,} samples")

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

print(f"\nFeature scaling applied:")
print(f"Training mean after scaling: {X_train_scaled.mean():.4f}")
print(f"Training std after scaling: {X_train_scaled.std():.4f}")

In [None]:
## Step 4: Build Simple Neural Network Model

print("\n=== Building Simple Neural Network Model ===")

def create_simple_regression_model(input_dim):
    """Create a simple neural network for regression"""
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(64, activation='relu', input_shape=(input_dim,)),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(16, activation='relu'),
        tf.keras.layers.Dense(1)  # Linear activation for regression
    ])
    return model

# Create the model
model = create_simple_regression_model(X_train.shape[1])

print("Model Architecture:")
model.summary()

# Compile the model
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss='mse',  # Mean Squared Error for regression
    metrics=['mae']  # Mean Absolute Error
)

print("Model compiled successfully!")

In [None]:
## Step 5: Model Training

print("\n=== Model Training ===")

# Define callbacks
callbacks = [
    tf.keras.callbacks.EarlyStopping(
        monitor='val_loss',
        patience=15,
        restore_best_weights=True,
        verbose=1
    ),
    tf.keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=10,
        min_lr=1e-7,
        verbose=1
    )
]

# Train the model
EPOCHS = 100

print("Starting model training...")
history = model.fit(
    X_train_scaled, y_train,
    batch_size=32,
    epochs=EPOCHS,
    validation_data=(X_val_scaled, y_val),
    callbacks=callbacks,
    verbose=1,
    shuffle=True
)

print("Training completed!")

In [None]:
## Step 6: Model Evaluation

print("\n=== Model Evaluation ===")

# Make predictions
y_pred = model.predict(X_test_scaled, verbose=0).flatten()

# Calculate metrics
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)
mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100

print("Model Performance Metrics:")
print(f"Mean Squared Error (MSE): {mse:.4f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")
print(f"Mean Absolute Error (MAE): {mae:.4f}")
print(f"R² Score: {r2:.4f}")
print(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%")

# Business interpretation
avg_price = y_test.mean()
print(f"\nBusiness Interpretation:")
print(f"Average house price in test set: ${avg_price:.2f}K")
print(f"Typical prediction error: ±${mae:.2f}K")
print(f"Error as percentage of average price: {(mae/avg_price)*100:.1f}%")

In [None]:
## Step 7: Results Visualization

print("\n=== Results Visualization ===")

# Create comprehensive results visualization
fig, axes = plt.subplots(2, 3, figsize=(15, 10))

# 1. Training History
axes[0, 0].plot(history.history['loss'], label='Training Loss', linewidth=2)
axes[0, 0].plot(history.history['val_loss'], label='Validation Loss', linewidth=2)
axes[0, 0].set_title('Training History - Loss')
axes[0, 0].set_xlabel('Epoch')
axes[0, 0].set_ylabel('MSE Loss')
axes[0, 0].legend()
axes[0, 0].grid(True, alpha=0.3)

# 2. Predictions vs Actual
axes[0, 1].scatter(y_test, y_pred, alpha=0.5, s=20)
# Perfect prediction line
min_val = min(y_test.min(), y_pred.min())
max_val = max(y_test.max(), y_pred.max())
axes[0, 1].plot([min_val, max_val], [min_val, max_val], 'r--', lw=2, label='Perfect Prediction')
axes[0, 1].set_xlabel('Actual Prices ($K)')
axes[0, 1].set_ylabel('Predicted Prices ($K)')
axes[0, 1].set_title('Predictions vs Actual Values')
axes[0, 1].legend()
axes[0, 1].grid(True, alpha=0.3)

# 3. Residual Analysis
residuals = y_test - y_pred
axes[0, 2].scatter(y_pred, residuals, alpha=0.5, s=20)
axes[0, 2].axhline(y=0, color='red', linestyle='--', linewidth=2)
axes[0, 2].set_xlabel('Predicted Prices ($K)')
axes[0, 2].set_ylabel('Residuals (Actual - Predicted)')
axes[0, 2].set_title('Residual Plot')
axes[0, 2].grid(True, alpha=0.3)

# 4. Error Distribution
axes[1, 0].hist(residuals, bins=30, alpha=0.7, color='lightcoral', edgecolor='black')
axes[1, 0].axvline(residuals.mean(), color='red', linestyle='--', linewidth=2,
                  label=f'Mean: {residuals.mean():.3f}')
axes[1, 0].set_xlabel('Prediction Error')
axes[1, 0].set_ylabel('Frequency')
axes[1, 0].set_title('Error Distribution')
axes[1, 0].legend()
axes[1, 0].grid(True, alpha=0.3)

# 5. Feature Importance
first_layer_weights = model.layers[0].get_weights()[0]
feature_importance = np.mean(np.abs(first_layer_weights), axis=1)

axes[1, 1].barh(feature_names, feature_importance, color='lightgreen', alpha=0.7)
axes[1, 1].set_xlabel('Average Absolute Weight')
axes[1, 1].set_title('Feature Importance (First Layer)')
axes[1, 1].grid(True, alpha=0.3)

# 6. Performance Summary
axes[1, 2].axis('off')
performance_text = f"""
Model Performance Summary:
------------------------
RMSE: ${rmse:.2f}K
MAE:  ${mae:.2f}K
R²:   {r2:.4f}
MAPE: {mape:.2f}%

Business Impact:
---------------
Average Price: ${avg_price:.2f}K
Typical Error: ±${mae:.2f}K
Accuracy: {100 - mape:.1f}%

Interpretation:
- Good for price estimation
- {100 - mape:.1f}% accuracy on average
- Useful for market analysis
"""
axes[1, 2].text(0.1, 0.9, performance_text, fontsize=10, verticalalignment='top',
                bbox=dict(boxstyle='round', facecolor='lightblue', alpha=0.5))

plt.tight_layout()
plt.show()

In [None]:
# Save the model
model.save('california_housing_model.h5')
print("Model saved as 'california_housing_model.h5'")

In [None]:
# Create prediction function
def predict_house_price(features):
    """
    Predict house price for new data
    features: numpy array of shape (n_samples, 8) with original feature values
    """
    # Scale features
    features_scaled = scaler.transform(features)

    # Make prediction
    predictions = model.predict(features_scaled, verbose=0)

    return predictions.flatten()

# Test prediction with sample data
print("\nSample Prediction Test:")
sample_house = np.array([[8.3252, 41.0, 6.984127, 1.023810, 322.0, 2.555556, 37.88, -122.23]])
predicted_price = predict_house_price(sample_house)
actual_price = 4.526  # Typical value for these features

print(f"Predicted price: ${predicted_price[0]:.2f}K")
print(f"Typical actual price: ${actual_price:.2f}K")
print(f"Prediction error: ${abs(predicted_price[0] - actual_price):.2f}K")