***ML LAB CIA 2***

**Q1**

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical

# Load the Iris dataset
from sklearn.datasets import load_iris
iris = load_iris()
X = iris.data  # Features (sepal/petal dimensions)
y = iris.target  # Labels (species: 0, 1, 2)

# Preprocess data
scaler = StandardScaler()
X = scaler.fit_transform(X)
y = to_categorical(y)  # One-hot encode labels for SoftMax

# Split data into train/test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Function to build and evaluate the model
def train_model(activation='softmax'):
    model = Sequential([
        Dense(16, activation='relu', input_shape=(4,)),  # Hidden layer
        Dense(3, activation=activation)  # Output layer (SoftMax or Sigmoid)
    ])

    # Compile with categorical crossentropy for SoftMax, binary for Sigmoid
    loss = 'categorical_crossentropy' if activation == 'softmax' else 'binary_crossentropy'
    model.compile(optimizer='adam', loss=loss, metrics=['accuracy'])

    # Train
    history = model.fit(X_train, y_train, epochs=50, validation_split=0.2, verbose=0)

    # Evaluate
    _, accuracy = model.evaluate(X_test, y_test, verbose=0)
    print(f"Activation: {activation}, Test Accuracy: {accuracy:.4f}")

# Compare SoftMax vs. Sigmoid
train_model(activation='softmax')  # Use this for multi-class (correct)
train_model(activation='sigmoid')  # Incorrect for multi-class (for comparison)

**Q2**

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Input
from tensorflow.keras.regularizers import l2
import matplotlib.pyplot as plt

# Generate synthetic data
np.random.seed(42)
X = np.random.rand(1000, 5)  # 5 socio-economic features
y = X.dot(np.random.rand(5)) + np.random.rand(1000) * 0.1  # Grades (0-1 scale)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Build model (with optional regularization)
def build_model(use_regularization=False):
    model = Sequential()
    model.add(Input(shape=(5,)))  # Explicit input layer

    # Hidden layers with conditional L2/dropout
    reg = l2(0.01) if use_regularization else None
    model.add(Dense(128, activation='relu', kernel_regularizer=reg))
    model.add(Dense(128, activation='relu', kernel_regularizer=reg))
    if use_regularization:
        model.add(Dropout(0.5))  # Only add dropout if regularization is enabled

    model.add(Dense(1))  # Output layer (linear for regression)
    model.compile(optimizer='adam', loss='mse', metrics=['mae'])
    return model

# Train without regularization (overfit)
model_no_reg = build_model(use_regularization=False)
history_no_reg = model_no_reg.fit(X_train, y_train, epochs=100,
                                 validation_split=0.2, verbose=0)

# Train with dropout + L2 (regularized)
model_with_reg = build_model(use_regularization=True)
history_with_reg = model_with_reg.fit(X_train, y_train, epochs=100,
                                     validation_split=0.2, verbose=0)

# Plot results
plt.figure(figsize=(10, 5))
plt.plot(history_no_reg.history['val_loss'], label='No Regularization', linestyle='--')
plt.plot(history_with_reg.history['val_loss'], label='With Dropout + L2', linestyle='--')
plt.xlabel('Epochs')
plt.ylabel('Validation Loss (MSE)')
plt.legend()
plt.title('Overfitting Mitigation with Regularization')
plt.show()

# Test performance
print("Test MAE (No Regularization):", model_no_reg.evaluate(X_test, y_test, verbose=0)[1])
print("Test MAE (With Regularization):", model_with_reg.evaluate(X_test, y_test, verbose=0)[1])

**Q3**

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.regularizers import l2
import matplotlib.pyplot as plt

# Generate synthetic insurance claim data
np.random.seed(42)
n_samples = 1000
X = np.random.rand(n_samples, 10)  # 10 features (e.g., age, BMI, medical history)
y = X.dot(np.random.rand(10)) * 10000 + np.random.randn(n_samples) * 500  # Claim amounts ($)

# Split into train/test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Function to build and train the model
def train_model(use_regularization=False):
    model = Sequential()
    model.add(Dense(256, activation='relu', input_shape=(X_train.shape[1],)))
    model.add(Dense(256, activation='relu'))
    model.add(Dense(128, activation='relu'))

    if use_regularization:
        model.add(Dropout(0.5))
        model.add(Dense(64, activation='relu', kernel_regularizer=l2(0.01)))
    else:
        model.add(Dense(64, activation='relu'))

    model.add(Dense(1))  # Output layer for regression

    model.compile(optimizer='adam', loss='mse', metrics=['mae'])

    history = model.fit(X_train, y_train, epochs=100,
                       validation_split=0.2, verbose=0)
    return model, history

# Intentionally overfit (no regularization)
model_overfit, history_overfit = train_model(use_regularization=False)

# Apply regularization (dropout + L2)
model_reg, history_reg = train_model(use_regularization=True)

# Plot training vs validation loss
plt.figure(figsize=(10, 5))
plt.plot(history_overfit.history['loss'], label='Train (Overfit)')
plt.plot(history_overfit.history['val_loss'], label='Validation (Overfit)', linestyle='--')
plt.plot(history_reg.history['val_loss'], label='Validation (Regularized)', linestyle='--')
plt.xlabel('Epochs')
plt.ylabel('Loss (MSE)')
plt.legend()
plt.title('Overfitting vs. Regularization')
plt.show()

# Evaluate on test data
print("Test MAE (Overfit Model): ${:,.2f}".format(model_overfit.evaluate(X_test, y_test, verbose=0)[1]))
print("Test MAE (Regularized Model): ${:,.2f}".format(model_reg.evaluate(X_test, y_test, verbose=0)[1]))

**Q4**

In [None]:
import numpy as np
from hmmlearn import hmm
import matplotlib.pyplot as plt

# Define the hidden states and observations
states = ["Cooking", "Sleeping", "Watching TV"]
observations = ["kitchen", "bedroom", "living room"]

# Create simulated sensor data sequences
# Each sequence is a day's worth of room observations
room_sequences = [
    ['kitchen', 'bedroom', 'living room', 'kitchen', 'bedroom'],
    ['kitchen', 'living room', 'living room', 'bedroom', 'bedroom'],
    ['living room', 'kitchen', 'bedroom', 'kitchen', 'bedroom'],
    ['bedroom', 'bedroom', 'living room', 'kitchen', 'living room']
]

# Convert observations to numerical values
obs_map = {obs: i for i, obs in enumerate(observations)}
num_sequences = len(room_sequences)
sequence_lengths = [len(seq) for seq in room_sequences]
X = np.concatenate([[obs_map[obs] for obs in seq] for seq in room_sequences]).reshape(-1, 1)

# Build and train the HMM
model = hmm.CategoricalHMM(n_components=len(states), random_state=42)
model.fit(X, lengths=sequence_lengths)

# Print learned parameters
print("Start Probabilities:", model.startprob_)
print("\nTransition Matrix:")
print(model.transmat_)
print("\nEmission Probabilities:")
print(model.emissionprob_)

# Predict activities for a new sequence
new_sequence = ['kitchen', 'living room', 'bedroom', 'kitchen']
numeric_seq = np.array([obs_map[obs] for obs in new_sequence]).reshape(-1, 1)
predicted_states = model.predict(numeric_seq)

print("\nPredicted Activities:")
for obs, state in zip(new_sequence, predicted_states):
    print(f"{obs} -> {states[state]}")

**Q5**

In [None]:
import numpy as np
from hmmlearn import hmm

# Define states and observations
states = ["Genuine", "Intruder"]
observations = ["early", "mid", "late"]  # Login times

# Simulated login sequences (each sequence is a separate user's login pattern)
sequences = [
    ['early', 'early', 'mid', 'early', 'mid'],      # Genuine user 1
    ['late', 'late', 'early', 'late', 'late'],      # Intruder 1
    ['early', 'mid', 'early', 'mid', 'early'],      # Genuine user 2
    ['mid', 'late', 'late', 'mid', 'late'],         # Intruder 2
    ['early', 'early', 'early', 'mid', 'early'],    # Genuine user 3
    ['late', 'mid', 'late', 'late', 'mid']          # Intruder 3
]

# Convert to numerical values and proper format
obs_map = {obs: i for i, obs in enumerate(observations)}
X = np.concatenate([[[obs_map[obs]] for obs in seq] for seq in sequences])
lengths = [len(seq) for seq in sequences]  # All lengths are 5 in this case

# Train HMM
model = hmm.CategoricalHMM(
    n_components=len(states),
    random_state=42  # Increased iterations for better convergence
)
model.fit(X, lengths=lengths)

# Print learned parameters
print("Start Probabilities (Genuine vs Intruder):\n", model.startprob_)
print("\nTransition Matrix:\n", model.transmat_)
print("\nEmission Probabilities (Time of Day):\n", model.emissionprob_)

# Predict on new sequences
test_sequences = [
    ['early', 'mid', 'early', 'mid', 'early'],    # Likely genuine
    ['late', 'late', 'mid', 'late', 'late'],      # Likely intruder
]

for seq in test_sequences:
    numeric_seq = np.array([[obs_map[obs]] for obs in seq])
    logprob, state_sequence = model.decode(numeric_seq)
    print(f"\nSequence: {seq}")
    print("Predicted States:", [states[i] for i in state_sequence])
    print("Log Probability:", logprob)