In [None]:

# ================== OBJECTIVE ================== 
"""
Aim: To implement a sequence model using a custom GRU architecture
for both classification and regression tasks.
"""

# ================== MOUNT GOOGLE DRIVE ==================
from google.colab import drive
drive.mount('/content/drive')

# ================== IMPORTS ==================
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, mean_squared_error, r2_score

# ================== USER CONFIGURATION ==================
task_type = "classification"  # Choose: "classification" or "regression"
data_path = '/content/drive/MyDrive/your_dataset.csv'  # Replace with your dataset path
target_col = 'target'  # Replace with the name of your target column
SEQ_LEN = 10  # Length of sequences for GRU

# ================== LOAD DATA ==================
df = pd.read_csv(data_path)
print("Dataset shape:", df.shape)
print("First few rows:\n", df.head())
print("Missing values:\n", df.isnull().sum())

# ================== HANDLE MISSING VALUES (IF ANY) ==================
df.dropna(inplace=True)

# ================== ENCODING & SCALING ==================
if task_type == 'classification' and df[target_col].dtype == object:
    le = LabelEncoder()
    df[target_col] = le.fit_transform(df[target_col])

features = df.drop(columns=[target_col])
scaler = MinMaxScaler()
scaled_features = scaler.fit_transform(features)
target = df[target_col].values

# ================== CREATE SEQUENCES ==================
def create_sequences(X, y, seq_len=10):
    Xs, ys = [], []
    for i in range(len(X) - seq_len):
        Xs.append(X[i:i+seq_len])
        ys.append(y[i+seq_len])
    return np.array(Xs), np.array(ys)

X, y = create_sequences(scaled_features, target, SEQ_LEN)

# ================== SPLIT DATA ==================
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# ================== VISUALIZATION ==================
if task_type == 'classification':
    plt.figure(figsize=(6, 3))
    sns.countplot(x=y)
    plt.title("Target Class Distribution")
    plt.show()
else:
    plt.figure(figsize=(6, 3))
    plt.hist(y, bins=30)
    plt.title("Target Value Distribution")
    plt.xlabel("Target")
    plt.ylabel("Frequency")
    plt.show()

# ================== BASELINE MODEL ==================
baseline_model = Sequential()
baseline_model.add(GRU(32, input_shape=(X.shape[1], X.shape[2])))
baseline_model.add(Dense(1 if task_type == "regression" else len(np.unique(y)), 
                         activation=None if task_type == "regression" else "softmax"))

baseline_model.compile(
    loss='mse' if task_type == 'regression' else 'sparse_categorical_crossentropy',
    optimizer='adam',
    metrics=['mae'] if task_type == 'regression' else ['accuracy']
)

baseline_model.fit(X_train, y_train, epochs=5, validation_split=0.2, verbose=0)

# ================== CUSTOM GRU MODEL ==================
model = Sequential()
model.add(GRU(128, return_sequences=True, input_shape=(X.shape[1], X.shape[2])))
model.add(Dropout(0.3))
model.add(GRU(64))
model.add(Dropout(0.3))
model.add(Dense(64, activation='relu'))

if task_type == 'regression':
    model.add(Dense(1))
    loss_fn = 'mse'
    metrics = ['mae']
else:
    model.add(Dense(len(np.unique(y)), activation='softmax'))
    loss_fn = 'sparse_categorical_crossentropy'
    metrics = ['accuracy']

model.compile(loss=loss_fn, optimizer='adam', metrics=metrics)
model.summary()

# ================== TRAINING ==================
early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
history = model.fit(X_train, y_train, epochs=30, validation_split=0.2,
                    batch_size=32, callbacks=[early_stop], verbose=1)

# ================== TRAINING PLOTS ==================
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history[metrics[0]], label='Train')
plt.plot(history.history['val_' + metrics[0]], label='Validation')
plt.title(f'Epochs vs {metrics[0].capitalize()}')
plt.xlabel('Epochs')
plt.ylabel(metrics[0].capitalize())
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Epochs vs Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

# ================== PERFORMANCE EVALUATION ==================
if task_type == "classification":
    y_pred = np.argmax(model.predict(X_test), axis=1)
    print("\nClassification Report:\n", classification_report(y_test, y_pred))
    
    conf_matrix = confusion_matrix(y_test, y_pred)
    plt.figure(figsize=(6, 5))
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues')
    plt.title("Confusion Matrix")
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.show()

else:
    y_pred = model.predict(X_test).flatten()
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    print(f"\nMean Squared Error: {mse:.4f}")
    print(f"R2 Score: {r2:.4f}")
    
    plt.figure(figsize=(6, 4))
    plt.scatter(y_test, y_pred, alpha=0.5)
    plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--')
    plt.title("True vs Predicted")
    plt.xlabel("True Values")
    plt.ylabel("Predicted Values")
    plt.grid(True)
    plt.show()

# ================== RESULT TABLE ==================
baseline_metric = baseline_model.evaluate(X_test, y_test, verbose=0)[1]
custom_metric = model.evaluate(X_test, y_test, verbose=0)[1]

result_df = pd.DataFrame({
    'Model': ['Baseline GRU', 'Custom GRU'],
    f'{metrics[0].capitalize()}': [baseline_metric, custom_metric]
})
print("\nModel Comparison:\n", result_df)

# ================== CONCLUSION ==================
"""
Conclusion:
- The GRU-based framework handles both classification and regression tasks.
- Custom model improves over baseline using additional GRU layers, dropout, and tuning.
- For classification: evaluated using accuracy & classification report.
- For regression: evaluated using MAE, MSE, and R² Score.
- The same code can be reused for any time-sequential dataset by simply switching the task_type.
"""
