In [None]:
import kagglehub
import pandas as pd

In [None]:
path = kagglehub.dataset_download("sumanthvrao/daily-climate-time-series-data")
print("Path to dataset files:", path)

In [None]:
import os
print(os.listdir(path))

In [None]:
train_file_name = "DailyDelhiClimateTrain.csv"
test_file_name = "DailyDelhiClimateTest.csv"

train_file_path = os.path.join(path, train_file_name)
test_file_path = os.path.join(path, test_file_name)

train_df = pd.read_csv(train_file_path)
test_df = pd.read_csv(test_file_path)

print("Train DataFrame head:")
display(train_df.head())
print("\nTest DataFrame head:")
display(test_df.head())

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from keras import layers
import matplotlib.pyplot as plt
import os
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import math

np.random.seed(42)
tf.random.set_seed(42)


train_file_name = "DailyDelhiClimateTrain.csv"
test_file_name = "DailyDelhiClimateTest.csv"

TRAIN_CSV_PATH = os.path.join(path, train_file_name)
TEST_CSV_PATH = os.path.join(path, test_file_name)

TIME_COLUMN = "date"
TARGET_COLUMN = "meantemp"
FEATURE_COLUMNS = ["humidity", "wind_speed", "meanpressure"]


HYPERPARAMETERS_TO_TEST = {
    'lookback': [14, 30],
    'hidden_units': [32],
}
N_EPOCHS = 50
PATIENCE = 10
BATCH_SIZE = 32

N_CV_SPLITS = 3
VALIDATION_BLOCK_SIZE = 90 #fold will be 90 days long.


def load_data(train_path, test_path, time_col):
    train_df = pd.read_csv(train_path, parse_dates=[time_col], index_col=time_col)
    test_df = pd.read_csv(test_path, parse_dates=[time_col], index_col=time_col)
    return train_df.sort_index(), test_df.sort_index()

def create_windows(data, lookback, target_column_index):
    X, y = [], []
    for i in range(len(data) - lookback):
        X.append(data[i:(i + lookback)])
        y.append(data[i + lookback, target_column_index])
    return np.array(X), np.array(y)

def get_cv_splits(data_length, n_splits, val_size):
    initial_train_size = data_length - (n_splits * val_size)
    for i in range(n_splits):
        train_end = initial_train_size + i * val_size
        val_end = train_end + val_size

        train_indices = range(0, train_end)
        val_indices = range(train_end, val_end)
        yield train_indices, val_indices



def build_elman_model(input_shape, hidden_units):
    model = keras.Sequential([
        layers.Input(shape=input_shape),
        layers.SimpleRNN(hidden_units, activation='tanh'),
        layers.Dense(1)
    ])
    return model

def build_multi_layer_model(input_shape, hidden_units):
    """Builds a multi-layer RNN (two stacked SimpleRNN layers)."""
    model = keras.Sequential([
        layers.Input(shape=input_shape),
        layers.SimpleRNN(hidden_units, activation='tanh', return_sequences=True),
        layers.SimpleRNN(hidden_units, activation='tanh'),
        layers.Dense(1)
    ])
    return model

class JordanRNN(keras.Model):
    def __init__(self, hidden_units):
        super().__init__()
        self.rnn_cell = layers.SimpleRNNCell(hidden_units, activation='tanh')
        # The final output layer.
        self.dense_output = layers.Dense(1)

    def call(self, inputs):
        batch_size = tf.shape(inputs)[0]
        hidden_state = tf.zeros((batch_size, self.rnn_cell.units))
        previous_output = tf.zeros((batch_size, 1))

        for t in range(inputs.shape[1]):
            current_features = inputs[:, t, :]
            # Combine current features with the previous output.
            combined_input = tf.concat([current_features, previous_output], axis=1)
            _, [hidden_state] = self.rnn_cell(combined_input, [hidden_state])

            # Calculate the output used in the next iteration.
            previous_output = self.dense_output(hidden_state)

        return previous_output


def main():
    train_df, test_df = load_data(TRAIN_CSV_PATH, TEST_CSV_PATH, TIME_COLUMN)
    train_df = train_df.ffill()
    test_df = test_df.ffill()

    models_to_evaluate = {
        'Elman': build_elman_model,
        'Multi-Layer': build_multi_layer_model,
        'Jordan': JordanRNN
    }

    final_results = {}

    for model_name, model_builder in models_to_evaluate.items():
        best_params = {}
        best_avg_rmse = float('inf')
        for lookback in HYPERPARAMETERS_TO_TEST['lookback']:
            for hidden_units in HYPERPARAMETERS_TO_TEST['hidden_units']:
                print(f"  Testing params: lookback={lookback}, hidden_units={hidden_units}")
                fold_rmses = []

                cv_splits = get_cv_splits(len(train_df), N_CV_SPLITS, VALIDATION_BLOCK_SIZE)

                for train_idx, val_idx in cv_splits:
                    train_fold = train_df.iloc[train_idx]
                    val_fold = train_df.iloc[val_idx]

                    all_cols = [TARGET_COLUMN] + FEATURE_COLUMNS

                    scaler_X = StandardScaler()
                    scaler_y = StandardScaler()

                    train_features_scaled = scaler_X.fit_transform(train_fold[FEATURE_COLUMNS])
                    train_target_scaled = scaler_y.fit_transform(train_fold[[TARGET_COLUMN]])

                    val_features_scaled = scaler_X.transform(val_fold[FEATURE_COLUMNS])
                    val_target_scaled = scaler_y.transform(val_fold[[TARGET_COLUMN]])

                    train_scaled_data = np.hstack([train_target_scaled, train_features_scaled])

                    validation_context = np.vstack([
                        train_scaled_data[-lookback:],
                        np.hstack([val_target_scaled, val_features_scaled])
                    ])

                    X_train, y_train = create_windows(train_scaled_data, lookback, target_column_index=0)
                    X_val, y_val = create_windows(validation_context, lookback, target_column_index=0)

                    if X_train.shape[0] == 0 or X_val.shape[0] == 0:
                        continue # Skip if a not enough data

                    n_features = X_train.shape[2]
                    input_shape = (lookback, n_features)

                    if model_name == 'Jordan':
                        model = model_builder(hidden_units=hidden_units)
                    else:
                        model = model_builder(input_shape=input_shape, hidden_units=hidden_units)

                    model.compile(optimizer='adam', loss='mse')

                    early_stopping = keras.callbacks.EarlyStopping(
                        monitor='val_loss', patience=PATIENCE, restore_best_weights=True
                    )

                    model.fit(X_train, y_train,
                              validation_data=(X_val, y_val),
                              epochs=N_EPOCHS,
                              batch_size=BATCH_SIZE,
                              callbacks=[early_stopping],
                              verbose=0)

                    predictions_scaled = model.predict(X_val, verbose=0)
                    predictions_unscaled = scaler_y.inverse_transform(predictions_scaled)
                    true_values_unscaled = scaler_y.inverse_transform(y_val.reshape(-1, 1))

                    fold_rmse = math.sqrt(mean_squared_error(true_values_unscaled, predictions_unscaled))
                    fold_rmses.append(fold_rmse)

                avg_rmse = np.mean(fold_rmses)
                print(f"Average CV RMSE: {avg_rmse:.4f}")
                if avg_rmse < best_avg_rmse:
                    best_avg_rmse = avg_rmse
                    best_params = {'lookback': lookback, 'hidden_units': hidden_units}

        print(f"\n  Best parameters found for {model_name}: {best_params} (RMSE: {best_avg_rmse:.4f})")


        final_lookback = best_params['lookback']
        final_hidden_units = best_params['hidden_units']

        scaler_X_final = StandardScaler()
        scaler_y_final = StandardScaler()

        train_features_scaled_final = scaler_X_final.fit_transform(train_df[FEATURE_COLUMNS])
        train_target_scaled_final = scaler_y_final.fit_transform(train_df[[TARGET_COLUMN]])

        test_features_scaled_final = scaler_X_final.transform(test_df[FEATURE_COLUMNS])
        test_target_scaled_final = scaler_y_final.transform(test_df[[TARGET_COLUMN]])

        # Combine data for windowing
        train_scaled_final = np.hstack([train_target_scaled_final, train_features_scaled_final])

        test_context_final = np.vstack([
            train_scaled_final[-final_lookback:],
            np.hstack([test_target_scaled_final, test_features_scaled_final])
        ])

        X_train_final, y_train_final = create_windows(train_scaled_final, final_lookback, 0)
        X_test_final, y_test_final = create_windows(test_context_final, final_lookback, 0)

        n_features_final = X_train_final.shape[2]
        input_shape_final = (final_lookback, n_features_final)

        if model_name == 'Jordan':
            final_model = model_builder(hidden_units=final_hidden_units)
        else:
            final_model = model_builder(input_shape=input_shape_final, hidden_units=final_hidden_units)

        final_model.compile(optimizer='adam', loss='mse')

        final_model.fit(X_train_final, y_train_final,
                        epochs=N_EPOCHS,
                        batch_size=BATCH_SIZE,
                        verbose=0)

        final_predictions_scaled = final_model.predict(X_test_final, verbose=0)
        final_predictions_unscaled = scaler_y_final.inverse_transform(final_predictions_scaled)
        final_true_values_unscaled = scaler_y_final.inverse_transform(y_test_final.reshape(-1, 1))

        final_test_rmse = math.sqrt(mean_squared_error(final_true_values_unscaled, final_predictions_unscaled))
        print(f"  -> Final Test RMSE for {model_name}: {final_test_rmse:.4f}")

        final_results[model_name] = {
            'best_params': best_params,
            'test_rmse': final_test_rmse,
            'predictions': final_predictions_unscaled.flatten(),
            'true_values': final_true_values_unscaled.flatten()
        }

    print(f"\n{'='*30}\nFINAL RESULTS SUMMARY\n{'='*30}")
    best_overall_model = None
    lowest_rmse = float('inf')

    for model_name, result in final_results.items():
        print(f"Model: {model_name}")
        print(f"  - Best Hyperparameters: {result['best_params']}")
        print(f"  - Final Test RMSE: {result['test_rmse']:.4f}")
        if result['test_rmse'] < lowest_rmse:
            lowest_rmse = result['test_rmse']
            best_overall_model = model_name

    print(f"\n Overall Best Performing Model: {best_overall_model} (RMSE: {lowest_rmse:.4f})")

main()

# Commenrt on the custom jordan implementation
# Comment on the steps taken to prevent overfitting
# Describe Cross validation approach used
# Describe data preprocessing and justifications
# Describe optimiser and loss function used
# Carefully define the empirical process that you have followed, and describe this process in your report.The process has to include settings for all hyperparameters, neural network architecture, performance measures, and the process followed to determine which simple recurrent neural network performed best for each of the datasets.

In [None]:
import numpy as np
from statsmodels.tsa.stattools import adfuller, kpss

def adf_test(series, signif=0.05):
    result = adfuller(series.dropna(), autolag='AIC')
    stat, pval, usedlag, nobs, crit_values, icbest = result
    stationary = pval < signif
    return {'stat': stat, 'pval': pval, 'usedlag': usedlag, 'nobs': nobs,
            'crit': crit_values, 'stationary': stationary}

def kpss_test(series, regression='c', signif=0.05):
    stat, pval, nlags, crit_values = kpss(series.dropna(), regression=regression, nlags="auto")
    stationary = pval > signif
    return {'stat': stat, 'pval': pval, 'nlags': nlags, 'crit': crit_values, 'stationary': stationary}

def check_stationarity(df, columns=None, signif=0.05, verbose=True):
    df_copy = df.copy()
    if not isinstance(df_copy.index, pd.DatetimeIndex):
        for alt in ['date', 'time', 'timestamp']:
            if alt in df_copy.columns:
                df_copy[alt] = pd.to_datetime(df_copy[alt], errors='coerce')
                df_copy = df_copy.set_index(alt)
                break

    if columns is None:
        columns = df_copy.select_dtypes(include=[np.number]).columns.tolist()

    results = {}
    for col in columns:
        series = df_copy[col]
        adf_res = adf_test(series, signif)
        kpss_res = kpss_test(series, signif=signif)
        if adf_res['stationary'] and kpss_res['stationary']:
            conclusion = 'Stationary'
        elif not adf_res['stationary'] and not kpss_res['stationary']:
            conclusion = 'Non-stationary'
        else:
            conclusion = 'Mixed/Borderline'
        results[col] = {'ADF': adf_res, 'KPSS': kpss_res, 'conclusion': conclusion}
        if verbose:
            print(f"\nColumn: {col}")
            print(f"  ADF: stat={adf_res['stat']:.4f}, p={adf_res['pval']:.4f}, stationary={adf_res['stationary']}")
            print(f"  KPSS: stat={kpss_res['stat']:.4f}, p={kpss_res['pval']:.4f}, stationary={kpss_res['stationary']}")
            print(f"  => Overall conclusion: {conclusion}")
    return results


results = check_stationarity(train_df)


In [None]:
path = kagglehub.dataset_download("gabrielsantello/airline-baggage-complaints-time-series-dataset")
file_name = "baggagecomplaints.csv"
file_path = os.path.join(path, file_name)
df = pd.read_csv(file_path)
df.head()

In [None]:
print(df.columns.tolist())

In [None]:
np.random.seed(42)
tf.random.set_seed(42)

FILE_PATH = os.path.join(path, file_name)
TARGET_COLUMN = "baggage"
NUMERIC_FEATURES = ["scheduled", "cancelled", "enplaned"]
CATEGORICAL_FEATURE = "airline"

HYPERPARAMETERS_TO_TEST = {
    'lookback': [24],
    'hidden_units': [32],
    'learning_rate': [0.001],
    'dropout': [0.2],
}

N_EPOCHS = 20
PATIENCE = 10
BATCH_SIZE = 32

TEST_SET_FRACTION = 0.2
N_CV_SPLITS = 3
VALIDATION_BLOCK_SIZE = 24

def load_and_prepare_data(filepath):
    df = pd.read_csv(filepath)
    df.columns = df.columns.str.strip().str.lower()
    df['date'] = pd.to_datetime(df['date'], format="%m/%Y")
    return df.set_index('date').sort_index()

def add_time_features(df):
    df_copy = df.copy()
    df_copy['month'] = df_copy.index.month
    df_copy['month_sin'] = np.sin(2 * np.pi * (df_copy['month'] - 1) / 12)
    df_copy['month_cos'] = np.cos(2 * np.pi * (df_copy['month'] - 1) / 12)
    df_copy['time_idx'] = np.arange(len(df_copy))
    return df_copy

def create_windows(data, lookback):
    X, y = [], []
    target_column_index = 0
    for i in range(len(data) - lookback):
        X.append(data[i:(i + lookback)])
        y.append(data[i + lookback, target_column_index])
    return np.array(X), np.array(y)

def get_cv_splits(data_length, n_splits, val_size):
    initial_train_size = data_length - (n_splits * val_size)
    for i in range(n_splits):
        train_end = initial_train_size + i * val_size
        val_end = train_end + val_size
        yield range(0, train_end), range(train_end, val_end)


def build_elman_model(input_shape, hidden_units, dropout):
    model = keras.Sequential([
        layers.Input(shape=input_shape),
        layers.SimpleRNN(hidden_units, activation='tanh', dropout=dropout),
        layers.Dense(1)
    ])
    return model

def build_multi_layer_model(input_shape, hidden_units, dropout):
    model = keras.Sequential([
        layers.Input(shape=input_shape),
        layers.SimpleRNN(hidden_units, activation='tanh', return_sequences=True, dropout=dropout),
        layers.SimpleRNN(hidden_units, activation='tanh', dropout=dropout),
        layers.Dense(1)
    ])
    return model

class JordanRNN(keras.Model):
    def __init__(self, hidden_units):
        super().__init__()
        self.rnn_cell = layers.SimpleRNNCell(hidden_units, activation='tanh')
        # The final output layer.
        self.dense_output = layers.Dense(1)

    def call(self, inputs):
        batch_size = tf.shape(inputs)[0]
        hidden_state = tf.zeros((batch_size, self.rnn_cell.units))
        previous_output = tf.zeros((batch_size, 1))

        for t in range(inputs.shape[1]):
            current_features = inputs[:, t, :]
            # Combine current features with the previous output.
            combined_input = tf.concat([current_features, previous_output], axis=1)
            _, [hidden_state] = self.rnn_cell(combined_input, [hidden_state])

            # Calculate the output used in the next iteration.
            previous_output = self.dense_output(hidden_state)

        return previous_output


full_df = load_and_prepare_data(FILE_PATH)
full_df = full_df.ffill().bfill()

test_size = int(len(full_df) * TEST_SET_FRACTION)
trainval_df = full_df.iloc[:-test_size].copy()
test_df = full_df.iloc[-test_size:].copy()
results = check_stationarity(trainval_df)

models_to_evaluate = {
    'Elman': build_elman_model,
    'Multi-Layer': build_multi_layer_model,
    'Jordan': JordanRNN
}
param_grid = HYPERPARAMETERS_TO_TEST
param_combos = list(itertools.product(
    param_grid['lookback'], param_grid['hidden_units'],
    param_grid['learning_rate'], param_grid['dropout']
))

final_results = {}

for model_name, model_builder in models_to_evaluate.items():
    print(f"\n{'='*40}\nStarting evaluation for: {model_name}\n{'='*40}")

    best_params = {}
    best_avg_rmse = float('inf')

    for lookback, hidden_units, lr, dropout in param_combos:
        print(f"  Testing params: lookback={lookback}, hidden={hidden_units}, lr={lr}, dropout={dropout}")
        fold_rmses = []
        cv_splits = get_cv_splits(len(trainval_df), N_CV_SPLITS, VALIDATION_BLOCK_SIZE)

        for train_idx, val_idx in cv_splits:
            train_fold = trainval_df.iloc[train_idx].copy()
            val_fold = trainval_df.iloc[val_idx].copy()

            train_fold = add_time_features(train_fold)
            val_fold = add_time_features(val_fold)

            train_dummies = pd.get_dummies(train_fold[CATEGORICAL_FEATURE], prefix='air')
            val_dummies = pd.get_dummies(val_fold[CATEGORICAL_FEATURE], prefix='air')
            val_dummies = val_dummies.reindex(columns=train_dummies.columns, fill_value=0)
            train_fold = pd.concat([train_fold, train_dummies], axis=1)
            val_fold = pd.concat([val_fold, val_dummies], axis=1)

            time_features = ['month_sin', 'month_cos', 'time_idx']
            all_features = NUMERIC_FEATURES + time_features + list(train_dummies.columns)

            scaler_X = StandardScaler()
            scaler_y = StandardScaler()

            train_fold[all_features] = scaler_X.fit_transform(train_fold[all_features])
            train_fold[[TARGET_COLUMN]] = scaler_y.fit_transform(train_fold[[TARGET_COLUMN]])
            val_fold[all_features] = scaler_X.transform(val_fold[all_features])
            val_fold[[TARGET_COLUMN]] = scaler_y.transform(val_fold[[TARGET_COLUMN]])

            train_data_for_windows = train_fold[[TARGET_COLUMN] + all_features].values
            val_context = pd.concat([train_fold.tail(lookback), val_fold])
            val_data_for_windows = val_context[[TARGET_COLUMN] + all_features].values

            X_train, y_train = create_windows(train_data_for_windows, lookback)
            X_val, y_val = create_windows(val_data_for_windows, lookback)

            if X_train.shape[0] == 0 or X_val.shape[0] == 0: continue

            input_shape = (lookback, X_train.shape[2])
            if model_name == 'Elman':
                model = model_builder(input_shape=input_shape, hidden_units=hidden_units, dropout=dropout)
            elif model_name == 'Multi-Layer':
                model = model_builder(input_shape=input_shape, hidden_units=hidden_units, dropout=dropout)
            elif model_name == 'Jordan':
                model = model_builder(hidden_units=hidden_units)
                model.build(input_shape)

            model.compile(optimizer=keras.optimizers.Adam(learning_rate=lr), loss='mse')
            early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=PATIENCE, restore_best_weights=True)

            model.fit(X_train, y_train, validation_data=(X_val, y_val),
                      epochs=N_EPOCHS, batch_size=BATCH_SIZE, callbacks=[early_stopping], verbose=0)

            preds_scaled = model.predict(X_val, verbose=0)
            preds_unscaled = scaler_y.inverse_transform(preds_scaled)
            true_unscaled = scaler_y.inverse_transform(y_val.reshape(-1, 1))
            fold_rmse = math.sqrt(mean_squared_error(true_unscaled, preds_unscaled))
            fold_rmses.append(fold_rmse)

        avg_rmse = np.mean(fold_rmses) if fold_rmses else float('inf')
        if avg_rmse < best_avg_rmse:
            best_avg_rmse = avg_rmse
            best_params = {'lookback': lookback, 'hidden_units': hidden_units, 'lr': lr, 'dropout': dropout}

    print(f"\n  Best parameters for {model_name}: {best_params} (Avg. CV RMSE: {best_avg_rmse:.2f})")


    train_final = add_time_features(trainval_df)
    test_final = add_time_features(test_df)

    train_dummies_final = pd.get_dummies(train_final[CATEGORICAL_FEATURE], prefix='air')
    test_dummies_final = pd.get_dummies(test_final[CATEGORICAL_FEATURE], prefix='air')
    test_dummies_final = test_dummies_final.reindex(columns=train_dummies_final.columns, fill_value=0)
    train_final = pd.concat([train_final, train_dummies_final], axis=1)
    test_final = pd.concat([test_final, test_dummies_final], axis=1)

    time_features_final = ['month_sin', 'month_cos', 'time_idx']
    all_features_final = NUMERIC_FEATURES + time_features_final + list(train_dummies_final.columns)

    scaler_X_final = StandardScaler()
    scaler_y_final = StandardScaler()

    train_final[all_features_final] = scaler_X_final.fit_transform(train_final[all_features_final])
    train_final[[TARGET_COLUMN]] = scaler_y_final.fit_transform(train_final[[TARGET_COLUMN]])
    test_final[all_features_final] = scaler_X_final.transform(test_final[all_features_final])
    test_final[[TARGET_COLUMN]] = scaler_y_final.transform(test_final[[TARGET_COLUMN]])

    train_data_final = train_final[[TARGET_COLUMN] + all_features_final].values
    test_context_final = pd.concat([train_final.tail(best_params['lookback']), test_final])
    test_data_final = test_context_final[[TARGET_COLUMN] + all_features_final].values

    X_train_final, y_train_final = create_windows(train_data_final, best_params['lookback'])
    X_test_final, y_test_final = create_windows(test_data_final, best_params['lookback'])

    final_input_shape = (best_params['lookback'], X_train_final.shape[2])
    if model_name == 'Elman':
        final_model = model_builder(input_shape=final_input_shape, hidden_units=best_params['hidden_units'], dropout=best_params['dropout'])
    elif model_name == 'Multi-Layer':
        final_model = model_builder(input_shape=final_input_shape, hidden_units=best_params['hidden_units'], dropout=best_params['dropout'])
    elif model_name == 'Jordan':
        final_model = model_builder(hidden_units=best_params['hidden_units'])
        final_model.build(final_input_shape)

    final_model.compile(optimizer=keras.optimizers.Adam(learning_rate=best_params['lr']), loss='mse')

    final_model.fit(X_train_final, y_train_final, epochs=N_EPOCHS, batch_size=BATCH_SIZE, verbose=0)

    final_preds_scaled = final_model.predict(X_test_final, verbose=0)
    final_preds_unscaled = scaler_y_final.inverse_transform(final_preds_scaled)
    final_true_unscaled = scaler_y_final.inverse_transform(y_test_final.reshape(-1, 1))

    final_test_rmse = math.sqrt(mean_squared_error(final_true_unscaled, final_preds_unscaled))
    print(f"  -> Final Test Set RMSE for {model_name}: {final_test_rmse:.2f}")

    final_results[model_name] = {
        'best_params': best_params,
        'test_rmse': final_test_rmse,
        'predictions': final_preds_unscaled.flatten(),
        'true_values': final_true_unscaled.flatten(),
        'test_dates': test_df.index[best_params['lookback']:]
    }

print(f"\n{'='*40}\nFINAL RESULTS SUMMARY\n{'='*40}")
best_overall_model_name = None
lowest_rmse = float('inf')

for model_name, result in final_results.items():
    print(f"Model: {model_name}")
    print(f"  - Best Hyperparameters: {result['best_params']}")
    print(f"  - Final Test RMSE: {result['test_rmse']:.2f}")
    if result['test_rmse'] < lowest_rmse:
        lowest_rmse = result['test_rmse']
        best_overall_model_name = model_name

print(f"\n Overall Best Performing Model: {best_overall_model_name} (RMSE: {lowest_rmse:.2f})")


In [None]:
path = kagglehub.dataset_download("ujjwalchowdhury/walmartcleaned")
print(os.listdir(path))
file_name = "walmart_cleaned.csv"
file_path = os.path.join(path, file_name)
df = pd.read_csv(file_path)
df.head()

In [None]:
df.shape

In [None]:
FILE_PATH = os.path.join(path, file_name)

STORE_ID = 1
DEPT_ID = 1

TARGET_COLUMN = "weekly_sales"
NUMERIC_FEATURES = ["temperature", "fuel_price", "markdown1", "markdown2", "markdown3", "markdown4", "markdown5", "cpi", "unemployment", "size"]
CATEGORICAL_FEATURES = ["isholiday", "type"]


HYPERPARAMETERS_TO_TEST = {
    'lookback': [52],
    'hidden_units': [32],
    'learning_rate': [0.001],
    'dropout': [0.2],
}
N_EPOCHS = 20
PATIENCE = 8
BATCH_SIZE = 32

TEST_SET_FRACTION = 0.20
N_CV_SPLITS = 3
VALIDATION_BLOCK_SIZE = 20


def load_and_prepare_data(filepath):
    df = pd.read_csv(filepath)
    df.columns = df.columns.str.strip().str.lower()
    df['date'] = pd.to_datetime(df['date'])
    return df.set_index('date').sort_index()

def add_time_features(df):
    df_copy = df.copy()
    df_copy['week'] = df_copy.index.isocalendar().week.astype(int)
    df_copy['week_sin'] = np.sin(2 * np.pi * (df_copy['week'] - 1) / 52)
    df_copy['week_cos'] = np.cos(2 * np.pi * (df_copy['week'] - 1) / 52)
    df_copy['time_idx'] = np.arange(len(df_copy))
    return df_copy

def create_windows(data, lookback):
    X, y = [], []
    target_column_index = 0
    for i in range(len(data) - lookback):
        X.append(data[i:(i + lookback)])
        y.append(data[i + lookback, target_column_index])
    return np.array(X), np.array(y)

def get_cv_splits(data_length, n_splits, val_size):
    initial_train_size = data_length - (n_splits * val_size)
    for i in range(n_splits):
        train_end = initial_train_size + i * val_size
        val_end = train_end + val_size
        yield range(0, train_end), range(train_end, val_end)

def build_elman_model(input_shape, hidden_units, dropout):
    model = keras.Sequential([
        layers.Input(shape=input_shape),
        layers.SimpleRNN(hidden_units, activation='tanh', dropout=dropout),
        layers.Dense(1)
    ])
    return model

def build_multi_layer_model(input_shape, hidden_units, dropout):
    model = keras.Sequential([
        layers.Input(shape=input_shape),
        layers.SimpleRNN(hidden_units, activation='tanh', return_sequences=True, dropout=dropout),
        layers.SimpleRNN(hidden_units, activation='tanh', dropout=dropout),
        layers.Dense(1)
    ])
    return model

class JordanRNN(keras.Model):
    def __init__(self, hidden_units):
        super().__init__()
        self.rnn_cell = layers.SimpleRNNCell(hidden_units, activation='tanh')
        # The final output layer.
        self.dense_output = layers.Dense(1)

    def call(self, inputs):
        batch_size = tf.shape(inputs)[0]
        hidden_state = tf.zeros((batch_size, self.rnn_cell.units))
        previous_output = tf.zeros((batch_size, 1))

        for t in range(inputs.shape[1]):
            current_features = inputs[:, t, :]
            # Combine current features with the previous output.
            combined_input = tf.concat([current_features, previous_output], axis=1)
            _, [hidden_state] = self.rnn_cell(combined_input, [hidden_state])

            # Calculate the output used in the next iteration.
            previous_output = self.dense_output(hidden_state)

        return previous_output


print("--- Starting Walmart Sales Prediction Pipeline ---")

full_df = load_and_prepare_data(FILE_PATH)
series_df = full_df[(full_df['store'] == STORE_ID) & (full_df['dept'] == DEPT_ID)].copy()

series_df = series_df.ffill().bfill()

test_size = int(len(series_df) * TEST_SET_FRACTION)
trainval_df = series_df.iloc[:-test_size].copy()

test_df = series_df.iloc[-test_size:].copy()

models_to_evaluate = {
    'Elman': build_elman_model,
    'Multi-Layer': build_multi_layer_model,
    'Jordan': JordanRNN
}

param_grid = HYPERPARAMETERS_TO_TEST
param_combos = list(itertools.product(
    param_grid['lookback'], param_grid['hidden_units'],
    param_grid['learning_rate'], param_grid['dropout']
))

final_results = {}

for model_name, model_builder in models_to_evaluate.items():
    print(f"\n{'='*40}\nStarting evaluation for: {model_name}\n{'='*40}")

    best_params = {}
    best_avg_rmse = float('inf')

    for lookback, hidden_units, lr, dropout in param_combos:
        print(f"  Testing params: lookback={lookback}, hidden={hidden_units}, lr={lr}, dropout={dropout}")
        fold_rmses = []
        cv_splits = get_cv_splits(len(trainval_df), N_CV_SPLITS, VALIDATION_BLOCK_SIZE)

        for train_idx, val_idx in cv_splits:
            train_fold = trainval_df.iloc[train_idx].copy()
            val_fold = trainval_df.iloc[val_idx].copy()

            train_fold = add_time_features(train_fold)
            val_fold = add_time_features(val_fold)

            train_dummies = pd.get_dummies(train_fold['type'], prefix='type')
            val_dummies = pd.get_dummies(val_fold['type'], prefix='type').reindex(columns=train_dummies.columns, fill_value=0)
            train_fold = pd.concat([train_fold, train_dummies], axis=1)
            val_fold = pd.concat([val_fold, val_dummies], axis=1)

            time_features = ['week_sin', 'week_cos', 'time_idx']
            all_features = NUMERIC_FEATURES + time_features + ['isholiday'] + list(train_dummies.columns)
            all_features = [f for f in all_features if f in train_fold.columns]

            scaler_X = StandardScaler()
            scaler_y = StandardScaler()

            train_fold[all_features] = scaler_X.fit_transform(train_fold[all_features])
            train_fold[[TARGET_COLUMN]] = scaler_y.fit_transform(train_fold[[TARGET_COLUMN]])
            val_fold[all_features] = scaler_X.transform(val_fold[all_features])
            val_fold[[TARGET_COLUMN]] = scaler_y.transform(val_fold[[TARGET_COLUMN]])

            train_data_for_windows = train_fold[[TARGET_COLUMN] + all_features].values
            val_context = pd.concat([train_fold.tail(lookback), val_fold])
            val_data_for_windows = val_context[[TARGET_COLUMN] + all_features].values

            X_train, y_train = create_windows(train_data_for_windows, lookback)
            X_val, y_val = create_windows(val_data_for_windows, lookback)

            if X_train.shape[0] == 0 or X_val.shape[0] == 0: continue

            input_shape = (lookback, X_train.shape[2])


            if model_name == 'Elman':
                model = model_builder(input_shape=input_shape, hidden_units=hidden_units, dropout=dropout)
            elif model_name == 'Multi-Layer':
                model = model_builder(input_shape=input_shape, hidden_units=hidden_units, dropout=dropout)
            elif model_name == 'Jordan':
                model = model_builder(hidden_units=hidden_units)
                model.build(input_shape)

            model.compile(optimizer=keras.optimizers.Adam(learning_rate=lr), loss='mse')
            early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=PATIENCE, restore_best_weights=True)

            model.fit(X_train, y_train, validation_data=(X_val, y_val),
                      epochs=N_EPOCHS, batch_size=BATCH_SIZE, callbacks=[early_stopping], verbose=0)

            preds_scaled = model.predict(X_val, verbose=0)
            preds_unscaled = scaler_y.inverse_transform(preds_scaled)
            true_unscaled = scaler_y.inverse_transform(y_val.reshape(-1, 1))
            fold_rmse = math.sqrt(mean_squared_error(true_unscaled, preds_unscaled))
            fold_rmses.append(fold_rmse)

        avg_rmse = np.mean(fold_rmses) if fold_rmses else float('inf')
        if avg_rmse < best_avg_rmse:
            best_avg_rmse = avg_rmse
            best_params = {'lookback': lookback, 'hidden_units': hidden_units, 'lr': lr, 'dropout': dropout}

    print(f"\n  Best parameters for {model_name}: {best_params} (Avg. CV RMSE: ${best_avg_rmse:,.2f})")


    train_final = add_time_features(trainval_df)
    test_final = add_time_features(test_df)

    train_dummies_final = pd.get_dummies(train_final['type'], prefix='type')
    test_dummies_final = pd.get_dummies(test_final['type'], prefix='type').reindex(columns=train_dummies_final.columns, fill_value=0)
    train_final = pd.concat([train_final, train_dummies_final], axis=1)
    test_final = pd.concat([test_final, test_dummies_final], axis=1)

    all_features_final = NUMERIC_FEATURES + time_features + ['isholiday'] + list(train_dummies_final.columns)
    all_features_final = [f for f in all_features_final if f in train_final.columns]

    scaler_X_final = StandardScaler()
    scaler_y_final = StandardScaler()

    train_final[all_features_final] = scaler_X_final.fit_transform(train_final[all_features_final])
    train_final[[TARGET_COLUMN]] = scaler_y_final.fit_transform(train_final[[TARGET_COLUMN]])
    test_final[all_features_final] = scaler_X_final.transform(test_final[all_features_final])
    test_final[[TARGET_COLUMN]] = scaler_y_final.transform(test_final[[TARGET_COLUMN]])

    train_data_final = train_final[[TARGET_COLUMN] + all_features_final].values
    test_context_final = pd.concat([train_final.tail(best_params['lookback']), test_final])
    test_data_final = test_context_final[[TARGET_COLUMN] + all_features_final].values

    X_train_final, y_train_final = create_windows(train_data_final, best_params['lookback'])
    X_test_final, y_test_final = create_windows(test_data_final, best_params['lookback'])

    final_input_shape = (best_params['lookback'], X_train_final.shape[2])
    if model_name == 'Elman':
        final_model = model_builder(input_shape=final_input_shape, hidden_units=best_params['hidden_units'], dropout=best_params['dropout'])
    elif model_name == 'Multi-Layer':
        final_model = model_builder(input_shape=final_input_shape, hidden_units=best_params['hidden_units'], dropout=best_params['dropout'])
    elif model_name == 'Jordan':
        final_model = model_builder(hidden_units=best_params['hidden_units'])
        final_model.build(final_input_shape) # Build explicitly
    final_model.compile(optimizer=keras.optimizers.Adam(learning_rate=best_params['lr']), loss='mse')

    final_model.fit(X_train_final, y_train_final, epochs=N_EPOCHS, batch_size=BATCH_SIZE, verbose=0)

    final_preds_scaled = final_model.predict(X_test_final, verbose=0)
    final_preds_unscaled = scaler_y_final.inverse_transform(final_preds_scaled)
    final_true_unscaled = scaler_y_final.inverse_transform(y_test_final.reshape(-1, 1))

    final_test_rmse = math.sqrt(mean_squared_error(final_true_unscaled, final_preds_unscaled))
    print(f"Final Test Set RMSE for {model_name}: ${final_test_rmse:,.2f}")

    final_results[model_name] = {
        'best_params': best_params,
        'test_rmse': final_test_rmse,
        'predictions': final_preds_unscaled.flatten(),
        'true_values': final_true_unscaled.flatten(),
        'test_dates': test_df.index[best_params['lookback']:]
    }


print(f"\n{'='*40}\nFINAL RESULTS SUMMARY (Store {STORE_ID}, Dept {DEPT_ID})\n{'='*40}")
best_overall_model_name = None
lowest_rmse = float('inf')

for model_name, result in final_results.items():
    print(f"Model: {model_name}")
    print(f"  - Best Hyperparameters: {result['best_params']}")
    print(f"  - Final Test RMSE: ${result['test_rmse']:,.2f}")
    if result['test_rmse'] < lowest_rmse:
        lowest_rmse = result['test_rmse']
        best_overall_model_name = model_name

print(f"\n Overall Best Performing Model: {best_overall_model_name} (RMSE: ${lowest_rmse:,.2f})")

In [None]:
path = kagglehub.dataset_download("die9origephit/human-activity-recognition")
file_name = "time_series_data_human_activities.csv"
file_path = os.path.join(path, file_name)
df = pd.read_csv(file_path)
df.head()

In [None]:
import os, time, math, itertools, copy, pickle, warnings
warnings.filterwarnings("ignore")
import numpy as np
import pandas as pd
from collections import Counter
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report, accuracy_score
from statsmodels.tsa.stattools import adfuller, kpss


DATASET_DIR = globals().get("path", ".")
FILE_NAME = "time_series_data_human_activities.csv"
FILE_PATH = os.path.join(DATASET_DIR, FILE_NAME)

RESULTS_DIR = "har_rnn_results"
os.makedirs(RESULTS_DIR, exist_ok=True)

USER_COL = "user"
ACTIVITY_COL = "activity"
TIMESTAMP_COL = "timestamp"
SENSOR_COLS = ["x-axis", "y-axis", "z-axis"]

LOOKBACK_CANDIDATES = [64]
HORIZON = 0
STRIDE = 8
MAX_SAMPLES = None
BATCH_SIZE = 128
N_EPOCHS = 10
PATIENCE = 6
N_CV_SPLITS = 3
VAL_BLOCK_SIZE = 100000
GAP = 0
VERBOSE = True

HYPERPARAM_GRID = {
    'lookback': LOOKBACK_CANDIDATES,
    'hidden': [64],
    'lr': [1e-3],
    'dropout': [0.2]
}

print("FILE_PATH:", FILE_PATH)

def try_parse_timestamp(s):
    ts = pd.to_numeric(s, errors='coerce')
    candidates = ['ns', 'us', 'ms', 's']
    for unit in candidates:
        try:
            dt = pd.to_datetime(ts, unit=unit, errors='coerce')
            median_year = pd.Series(dt).dt.year.dropna().median()
            if not np.isnan(median_year) and 1980 <= median_year <= 2035:
                return dt, unit
        except Exception:
            pass
    try:
        dt = pd.to_datetime(s, errors='coerce')
        median_year = pd.Series(dt).dt.year.dropna().median()
        if not np.isnan(median_year) and 1980 <= median_year <= 2035:
            return dt, 'parsed'
    except Exception:
        pass
    return None, None

def read_har_csv(filepath, max_rows=None):
    df = pd.read_csv(filepath)
    df.columns = df.columns.str.strip().str.lower()
    for c in [USER_COL.lower(), ACTIVITY_COL.lower(), TIMESTAMP_COL.lower()]:
        if c not in df.columns:
            raise ValueError(f"Expected column '{c}' in CSV. Found: {df.columns.tolist()}")
    df = df.rename(columns={USER_COL.lower(): 'user', ACTIVITY_COL.lower(): 'activity', TIMESTAMP_COL.lower(): 'timestamp'})
    dt, unit = try_parse_timestamp(df['timestamp'])
    df['timestamp'] = dt
    df = df.dropna(subset=['timestamp']).copy()
    df = df.sort_values('timestamp').reset_index(drop=True)
    if max_rows is not None:
        df = df.iloc[:max_rows].copy()
    return df


def stationarity_report_numeric(series, name='series'):
    try:
        a = adfuller(series.dropna(), autolag='AIC')
        kst = kpss(series.dropna(), regression='c', nlags="auto")
        print(f"ADF p={a[1]:.4f}; KPSS p={kst[1]:.4f}")
    except Exception as e:
        print("Stationarity test failed:", e)

def window_label_mode(labels):
    c = Counter(labels)
    most = c.most_common()
    return most[0][0]

def make_windows_classification(df, feature_cols, label_col, lookback, stride=1):
    arr_X = df[feature_cols].values
    arr_y = df[label_col].values
    T = len(df)
    starts = list(range(0, T - lookback + 1, stride))
    Xs = []
    Ys = []
    for s in starts:
        window_X = arr_X[s: s+lookback]
        window_y = arr_y[s: s+lookback]
        Xs.append(window_X)
        Ys.append(window_label_mode(window_y))
    if len(Xs)==0:
        return np.empty((0, lookback, len(feature_cols))), np.empty((0,), dtype=np.int32)
    return np.array(Xs, dtype=np.float32), np.array(Ys)

def rolling_origin_splits_by_index(n_time, n_splits=N_CV_SPLITS, val_block_size=VAL_BLOCK_SIZE, gap=GAP, initial_train_size=None):
    if initial_train_size is None:
        initial_train_size = n_time - n_splits * val_block_size - gap * n_splits
    for i in range(n_splits):
        train_end = initial_train_size + i * val_block_size
        val_start = train_end + gap
        val_end = val_start + val_block_size
        if val_end > n_time:
            break
        train_idx = np.arange(0, train_end)
        val_idx = np.arange(val_start, val_end)
        yield train_idx, val_idx

def build_elman_classifier(input_shape, n_classes, hidden_size=64, dropout=0.0):
    inp = keras.Input(shape=input_shape)
    x = layers.SimpleRNN(hidden_size, activation='tanh', dropout=dropout, return_sequences=False)(inp)
    out = layers.Dense(n_classes, activation='softmax')(x)
    return keras.Model(inp, out)

def build_multi_classifier(input_shape, n_classes, hidden_size=64, dropout=0.2):
    inp = keras.Input(shape=input_shape)
    x = layers.SimpleRNN(hidden_size, activation='tanh', dropout=dropout, return_sequences=True)(inp)
    x = layers.SimpleRNN(hidden_size, activation='tanh', dropout=dropout, return_sequences=False)(x)
    out = layers.Dense(n_classes, activation='softmax')(x)
    return keras.Model(inp, out)

# this is the big mans Jordan implementation. If you readijng this I couldnt get mione to work to be honest. My bad :)
class JordanClassifier(keras.Model):
    def __init__(self, input_size, n_classes, hidden_size=64, dropout=0.0, **kwargs):
        super().__init__(**kwargs)
        self.rnn_cell = layers.SimpleRNNCell(hidden_size, activation='tanh')
        self.dropout = layers.Dropout(dropout)
        self.fc = layers.Dense(n_classes, activation='softmax')
        self.hidden_size = hidden_size

    def call(self, x, training=False):
        batch = tf.shape(x)[0]
        h = tf.zeros((batch, self.hidden_size))
        y_prev = tf.zeros((batch, 1))  # previous *class* vector isn't available, we use previous logits as scalar 0 -> better: use zeros
        # We'll feed previous output scalar (0) concatenated â€” this is a simplistic Jordan adaptation for classification
        for t in range(x.shape[1]):
            xt = x[:, t, :]
            inp_t = tf.concat([xt, y_prev], axis=1)
            out_cell, [h] = self.rnn_cell(inp_t, [h])
            if training:
                h = self.dropout(h, training=training)
            # produce logits then reduce to a scalar to feed next step: take mean(logits) as scalar
            logits = self.fc(h)  # (batch, n_classes)
            # reduce to scalar in [-1,1] via tanh of mean
            y_prev = tf.expand_dims(tf.tanh(tf.reduce_mean(logits, axis=1)), axis=1)
        # final logits computed above (logits) -> but we need to return final class probs
        return logits

def compile_and_train(model, X_train, y_train, X_val, y_val, cfg):
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=cfg['lr']),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    es = keras.callbacks.EarlyStopping(patience=cfg['patience'], restore_best_weights=True, verbose=0)
    history = model.fit(X_train, y_train, validation_data=(X_val, y_val),
                        epochs=cfg['epochs'], batch_size=cfg['batch_size'],
                        callbacks=[es], verbose=cfg.get('verbose', 0))
    return model, history

def evaluate_classification(model, X, y, label_encoder, n_classes):
    preds_prob = model.predict(X, batch_size=256)
    preds = np.argmax(preds_prob, axis=1)
    acc = accuracy_score(y, preds)
    report = classification_report(y, preds, target_names=label_encoder.classes_, labels=np.arange(n_classes), zero_division=0)
    return acc, report, preds


def pipeline_har_classification(filepath):
    df = read_har_csv(filepath, max_rows=MAX_SAMPLES)
    print("Loaded rows:", len(df))
    sensor_cols = [c for c in df.columns if c in [s.lower() for s in SENSOR_COLS]]
    if len(sensor_cols) == 0:
        sensor_cols = SENSOR_COLS
    le = LabelEncoder()
    df['activity_enc'] = le.fit_transform(df['activity'].astype(str))
    n_classes = len(le.classes_)
    users = sorted(df['user'].unique())
    user_map = {u: i for i, u in enumerate(users)}
    df['user_enc'] = df['user'].map(user_map)
    user_one_hot = False
    if len(users) <= 128:
        user_one_hot = True
        user_dummies = pd.get_dummies(df['user_enc'], prefix='usr')
        df = pd.concat([df.reset_index(drop=True), user_dummies.reset_index(drop=True)], axis=1)
        user_cols = [c for c in df.columns if c.startswith('usr_')]
    feature_cols = sensor_cols + user_cols

    df = df.set_index('timestamp').sort_index()

    df[feature_cols] = df[feature_cols].interpolate(method='time').ffill().bfill()
    n = len(df)
    test_n = max(1, int(0.10 * n))
    trainval_df = df.iloc[:-test_n].copy()
    test_df = df.iloc[-test_n:].copy()

    n_time = len(trainval_df)
    combos = list(itertools.product(HYPERPARAM_GRID['lookback'], HYPERPARAM_GRID['hidden'],
                                    HYPERPARAM_GRID['lr'], HYPERPARAM_GRID['dropout']))
    results = {}

    for model_type in ['Elman', 'Jordan', 'Multi']:
        print("\n=== MODEL:", model_type, "===")
        combo_scores = []
        for (lb, hid, lr, drop) in combos:
            fold_accs = []
            for (train_idx, val_idx) in rolling_origin_splits_by_index(n_time, n_splits=N_CV_SPLITS, val_block_size=VAL_BLOCK_SIZE, gap=GAP):
                train_fold = trainval_df.iloc[train_idx].copy()
                val_fold = trainval_df.iloc[val_idx].copy()
                scaler = StandardScaler()
                scaler.fit(train_fold[sensor_cols])
                train_fold[sensor_cols] = scaler.transform(train_fold[sensor_cols])
                val_fold[sensor_cols] = scaler.transform(val_fold[sensor_cols])
                X_train, y_train = make_windows_classification(train_fold, feature_cols, 'activity_enc', lookback=lb, stride=STRIDE)
                val_context = pd.concat([train_fold.tail(lb), val_fold])
                X_val, y_val = make_windows_classification(val_context, feature_cols, 'activity_enc', lookback=lb, stride=STRIDE)
                if X_train.shape[0]==0 or X_val.shape[0]==0:
                    fold_accs.append(np.nan)
                    continue

                input_shape = (lb, X_train.shape[2])
                if model_type == 'Elman':
                    model = build_elman_classifier(input_shape, n_classes, hidden_size=hid, dropout=drop)
                elif model_type == 'Multi':
                    model = build_multi_classifier(input_shape, n_classes, hidden_size=hid, dropout=drop)
                elif model_type == 'Jordan':
                    model = JordanClassifier(input_size=X_train.shape[2], n_classes=n_classes, hidden_size=hid, dropout=drop)
                    model.build((None, lb, X_train.shape[2]))
                else:
                    raise ValueError("Unknown model type")

                cfg = {'epochs': N_EPOCHS, 'lr': lr, 'batch_size': BATCH_SIZE, 'patience': PATIENCE, 'verbose': 0}
                model, history = compile_and_train(model, X_train, y_train, X_val, y_val, cfg)
                acc, rep, preds = evaluate_classification(model, X_val, y_val, le, n_classes)
                fold_accs.append(acc)
            fold_accs = [v for v in fold_accs if not np.isnan(v)]
            mean_acc = np.mean(fold_accs) if len(fold_accs)>0 else np.nan
            combo_scores.append({'lookback':lb, 'hidden':hid, 'lr':lr, 'dropout':drop, 'mean_cv_acc': mean_acc})

        combo_scores_valid = [c for c in combo_scores if not np.isnan(c['mean_cv_acc'])]

        best_combo = sorted(combo_scores_valid, key=lambda x: -x['mean_cv_acc'])[0]
        print("Best combo:", best_combo)
        results[model_type] = {'best_combo': best_combo, 'combo_scores': combo_scores}

        train_full = trainval_df.copy()
        test_full = test_df.copy()
        scaler = StandardScaler(); scaler.fit(train_full[sensor_cols])
        train_full[sensor_cols] = scaler.transform(train_full[sensor_cols])
        test_full[sensor_cols] = scaler.transform(test_full[sensor_cols])
        lb = best_combo['lookback']; hid = best_combo['hidden']; lr = best_combo['lr']; drop = best_combo['dropout']
        X_train_all, y_train_all = make_windows_classification(train_full, feature_cols, 'activity_enc', lookback=lb, stride=STRIDE)
        test_context = pd.concat([train_full.tail(lb), test_full])
        X_test, y_test = make_windows_classification(test_context, feature_cols, 'activity_enc', lookback=lb, stride=STRIDE)

        if model_type == 'Elman':
            final_model = build_elman_classifier((lb, X_train_all.shape[2]), n_classes, hidden_size=hid, dropout=drop)
        elif model_type == 'Multi':
            final_model = build_multi_classifier((lb, X_train_all.shape[2]), n_classes, hidden_size=hid, dropout=drop)
        else:
            final_model = JordanClassifier(input_size=X_train_all.shape[2], n_classes=n_classes, hidden_size=hid, dropout=drop)
            final_model.build((None, lb, X_train_all.shape[2]))

        cfg = {'epochs': N_EPOCHS, 'lr': lr, 'batch_size': BATCH_SIZE, 'patience': PATIENCE, 'verbose': 1}
        final_model, history = compile_and_train(final_model, X_train_all, y_train_all, X_train_all[:0], y_train_all[:0], cfg)
        acc_test, report_test, preds_test = evaluate_classification(final_model, X_test, y_test, le, n_classes)
        print(f" Final TEST accuracy for {model_type}: {acc_test:.4f}")
        print("Classification report (test):")
        print(report_test)

        results[model_type].update({
            'final_model': final_model,
            'scaler': scaler,
            'test_acc': acc_test,
            'test_report': report_test,
            'y_test': y_test,
            'y_pred': preds_test
        })


    # summary
    summary = {m: (results[m].get('test_acc', None)) for m in results}
    print("Summary test accuracies:", summary)

start = time.time()
res = pipeline_har_classification(FILE_PATH)

In [None]:
path = kagglehub.dataset_download("krupalpatel07/microsoft-stock-data")
file_name = "MSFT.csv"
file_path = os.path.join(path, file_name)
df = pd.read_csv(file_path)
df.head()

In [None]:
import os, time, math, itertools, copy, pickle, warnings
warnings.filterwarnings("ignore")
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style='whitegrid')

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
from statsmodels.tsa.stattools import adfuller, kpss


DATASET_DIR = globals().get('path', '.')
FILE_NAME = "MSFT.csv"
FILE_PATH = os.path.join(DATASET_DIR, FILE_NAME)

RESULTS_DIR = "msft_rnn_results"
os.makedirs(RESULTS_DIR, exist_ok=True)

TIME_COL = "date"
TARGET_COL = "close"
FEATURE_COLS_BASE = ["open","high","low","volume"]

LOOKBACK_CANDIDATES = [30]
HORIZON = 1
BATCH_SIZE = 128
N_EPOCHS = 10
PATIENCE = 3
N_CV_SPLITS = 3
VAL_BLOCK_SIZE = 252
GAP = 0
TEST_FRAC = 0.10
VERBOSE = True

HYPERPARAM_GRID = {
    'lookback': LOOKBACK_CANDIDATES,
    'hidden': [32],
    'lr': [1e-3],
    'dropout': [0.2]
}


def read_msft_csv(filepath):
    df = pd.read_csv(filepath)
    df.columns = df.columns.str.strip().str.lower()
    if TIME_COL not in df.columns:
        for alt in ['date','day','trade_date']:
            if alt in df.columns:
                df = df.rename(columns={alt: TIME_COL})
                break
    df[TIME_COL] = pd.to_datetime(df[TIME_COL], errors='coerce')
    if df[TIME_COL].isna().any():
        df = df.dropna(subset=[TIME_COL]).copy()
    df = df.sort_values(TIME_COL).reset_index(drop=True)
    return df

import pandas as pd
from statsmodels.tsa.stattools import adfuller, kpss

def adf_test(series):
    res = adfuller(series.dropna(), autolag='AIC')
    return {'stat': res[0], 'pval': res[1], 'usedlag': res[2], 'nobs': res[3], 'crit': res[4]}

def kpss_test(series, regression='c'):
    stat, pval, nlags, crit = kpss(series.dropna(), regression=regression, nlags="auto")
    return {'stat': stat, 'pval': pval, 'nlags': nlags, 'crit': crit}

def stationarity_report_df(df, exclude_cols=None):
    if exclude_cols is None:
        exclude_cols = []
    try:
      results = []
      for col in df.select_dtypes(include='number').columns:
          if col in exclude_cols:
              continue
          series = df[col].dropna()

          try:
              a = adf_test(series)
              k = kpss_test(series)
          except Exception as e:
              print(f"Skipping {col}: {e}")
              continue

          adf_stationary = a['pval'] < 0.05
          kpss_stationary = k['pval'] > 0.05

          if adf_stationary and kpss_stationary:
              conclusion = "Stationary"
          elif not adf_stationary and not kpss_stationary:
              conclusion = "Non-stationary"
          else:
              conclusion = "Mixed/Borderline"

          results.append({
              "Column": col,
              "ADF stat": a['stat'],
              "ADF p": a['pval'],
              "ADF result": "Stationary" if adf_stationary else "Non-stationary",
              "KPSS stat": k['stat'],
              "KPSS p": k['pval'],
              "KPSS result": "Stationary" if kpss_stationary else "Non-stationary",
              "Overall conclusion": conclusion
          })
      summary = pd.DataFrame(results)
      return summary
    except Exception as e:
      print(f"Error: {e}")
      return None

def add_time_features_daily(df):
    dfi = df.copy()
    if 'date' in dfi.columns:
        dfi['date'] = pd.to_datetime(dfi['date'])
        dfi = dfi.set_index('date')
    idx = dfi.index
    dfi['dayofweek'] = idx.dayofweek
    dfi['month'] = idx.month
    dfi['dayofyear'] = idx.dayofyear
    dfi['dow_sin'] = np.sin(2*np.pi*dfi['dayofweek']/7)
    dfi['dow_cos'] = np.cos(2*np.pi*dfi['dayofweek']/7)
    dfi['month_sin'] = np.sin(2*np.pi*(dfi['month']-1)/12)
    dfi['month_cos'] = np.cos(2*np.pi*(dfi['month']-1)/12)
    dfi['time_idx'] = np.arange(len(dfi))
    return dfi

def winsorize_series(series, lower_q=0.01, upper_q=0.99):
    low = series.quantile(lower_q)
    high = series.quantile(upper_q)
    return series.clip(low, high)

def make_windows_from_df(df, feature_cols, target_col, lookback, horizon=1):
    arr = df[feature_cols + [target_col]].values
    T = arr.shape[0]
    n_feat = len(feature_cols)
    last_start = T - (lookback + horizon)
    if last_start < 0:
        return np.empty((0, lookback, n_feat)), np.empty((0, 1))
    Xs, ys = [], []
    for s in range(0, last_start+1):
        Xs.append(arr[s: s+lookback, :n_feat])
        ys.append(arr[s+lookback+horizon-1, n_feat])
    return np.array(Xs, dtype=np.float32), np.array(ys, dtype=np.float32).reshape(-1,1)

def rolling_origin_splits(n_time, n_splits=N_CV_SPLITS, val_block_size=VAL_BLOCK_SIZE, gap=GAP, initial_train_size=None):
    if initial_train_size is None:
        initial_train_size = n_time - n_splits * val_block_size - gap * n_splits
    for i in range(n_splits):
        train_end = initial_train_size + i * val_block_size
        val_start = train_end + gap
        val_end = val_start + val_block_size
        if val_end > n_time:
            break
        train_idx = np.arange(0, train_end)
        val_idx = np.arange(val_start, val_end)
        yield train_idx, val_idx

def rmse(y_true, y_pred):
    return math.sqrt(mean_squared_error(y_true, y_pred))

def build_elman(input_shape, hidden_size=32, dropout=0.0):
    inp = keras.Input(shape=input_shape)
    x = layers.SimpleRNN(hidden_size, activation='tanh', dropout=dropout, return_sequences=False)(inp)
    out = layers.Dense(1)(x)
    model = keras.Model(inp, out)
    return model

def build_multi(input_shape, hidden_size=32, dropout=0.2):
    inp = keras.Input(shape=input_shape)
    x = layers.SimpleRNN(hidden_size, activation='tanh', dropout=dropout, return_sequences=True)(inp)
    x = layers.SimpleRNN(hidden_size, activation='tanh', dropout=dropout, return_sequences=False)(x)
    out = layers.Dense(1)(x)
    model = keras.Model(inp, out)
    return model

class JordanRNN(keras.Model):
    def __init__(self, input_size, hidden_size, dropout=0.0, **kwargs):
        super().__init__(**kwargs)
        self.hidden_size = hidden_size
        self.input_size = input_size
        self.rnn_cell = layers.SimpleRNNCell(hidden_size, activation='tanh')
        self.dropout = layers.Dropout(dropout)
        self.fc = layers.Dense(1)

    def call(self, x, training=False):
        batch = tf.shape(x)[0]
        h = tf.zeros((batch, self.hidden_size))
        y_prev = tf.zeros((batch, 1))
        for t in range(x.shape[1]):
            xt = x[:, t, :]
            inp_t = tf.concat([xt, y_prev], axis=1)
            out_cell, [h] = self.rnn_cell(inp_t, [h])
            if training:
                h = self.dropout(h, training=training)
            y_prev = self.fc(h)
        return y_prev

def train_model_keras(model, X_train, y_train, X_val, y_val, cfg):
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=cfg['lr']), loss='mse', metrics=['mae'])
    es = keras.callbacks.EarlyStopping(patience=cfg['patience'], restore_best_weights=True, verbose=0)
    history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=cfg['epochs'],
                        batch_size=cfg['batch_size'], callbacks=[es], verbose=cfg.get('verbose', 0))
    return model, history

def evaluate_model_keras(model, X, y, scaler_y=None):
    preds = model.predict(X, batch_size=256)
    if scaler_y is not None:
        preds_inv = scaler_y.inverse_transform(preds)
        y_inv = scaler_y.inverse_transform(y)
    else:
        preds_inv = preds
        y_inv = y
    rmse_val = rmse(y_inv, preds_inv)
    mae_val = mean_absolute_error(y_inv, preds_inv)
    return y_inv, preds_inv, rmse_val, mae_val

def pipeline_msft(filepath):
    df = read_msft_csv(filepath)

    target = TARGET_COL.lower()
    base_feats = [c for c in FEATURE_COLS_BASE if c in df.columns]


    n = len(df)
    test_n = max(1, int(np.floor(n * TEST_FRAC)))
    trainval_df = df.iloc[:-test_n].copy()
    test_df = df.iloc[-test_n:].copy()

    print("\nStationarity on raw Close (train+val):")
    summary = stationarity_report_df(df, exclude_cols=['Date'])
    print(summary)
    trainval_df['logret'] = np.log(trainval_df[target]).diff()

    n_time = len(trainval_df)
    combos = list(itertools.product(HYPERPARAM_GRID['lookback'], HYPERPARAM_GRID['hidden'], HYPERPARAM_GRID['lr'], HYPERPARAM_GRID['dropout']))
    results = {}

    for model_type in ['Elman','Jordan','Multi']:
        print(f"\n=== Model: {model_type} ===")
        combo_scores = []
        for (lb, hid, lr, drop) in combos:
            fold_rmse_vals = []
            for (train_idx, val_idx) in rolling_origin_splits(n_time, n_splits=N_CV_SPLITS, val_block_size=VAL_BLOCK_SIZE, gap=GAP):
                train_fold = trainval_df.iloc[train_idx].copy()
                val_fold = trainval_df.iloc[val_idx].copy()

                train_fold = train_fold.sort_values('date').reset_index(drop=True)
                val_fold = val_fold.sort_values('date').reset_index(drop=True)

                for df_f in [train_fold, val_fold]:
                    df_f['ret'] = df_f[target].pct_change()
                    df_f['logret'] = np.log(df_f[target]).diff()
                    df_f['ma_5'] = df_f[target].rolling(window=5, min_periods=1).mean()
                    df_f['ma_10'] = df_f[target].rolling(window=10, min_periods=1).mean()

                low = train_fold[target].quantile(0.01); high = train_fold[target].quantile(0.99)
                train_fold[target] = train_fold[target].clip(low, high)
                val_fold[target] = val_fold[target].clip(low, high)

                train_fold = add_time_features_daily(train_fold)
                val_fold = add_time_features_daily(val_fold)

                used_feats = []
                used_feats += [c for c in base_feats if c in train_fold.columns]
                eng_feats = ['ret','logret','ma_5','ma_10','time_idx','dow_sin','dow_cos','month_sin','month_cos']
                used_feats += [c for c in eng_feats if c in train_fold.columns]

                train_fold[used_feats + [target]] = train_fold[used_feats + [target]].fillna(0)
                val_fold[used_feats + [target]] = val_fold[used_feats + [target]].fillna(0)


                scaler_X = StandardScaler()
                scaler_y = StandardScaler()
                numeric_feats = [c for c in used_feats if c in train_fold.columns]
                scaler_X.fit(train_fold[numeric_feats])
                scaler_y.fit(train_fold[[target]])

                train_scaled = train_fold.copy()
                val_scaled = val_fold.copy()
                train_scaled[numeric_feats] = scaler_X.transform(train_fold[numeric_feats])
                val_scaled[numeric_feats] = scaler_X.transform(val_fold[numeric_feats])
                train_scaled[[target]] = scaler_y.transform(train_fold[[target]])
                val_scaled[[target]] = scaler_y.transform(val_fold[[target]])

                X_train, y_train = make_windows_from_df(train_scaled, numeric_feats, target, lookback=lb, horizon=HORIZON)
                val_context = pd.concat([train_scaled.tail(lb), val_scaled])
                X_val, y_val = make_windows_from_df(val_context, numeric_feats, target, lookback=lb, horizon=HORIZON)

                if X_train.shape[0]==0 or X_val.shape[0]==0:
                    fold_rmse_vals.append(np.nan)
                    continue

                input_shape = (lb, X_train.shape[2])
                if model_type == 'Elman':
                    model = build_elman(input_shape, hidden_size=hid, dropout=drop)
                elif model_type == 'Multi':
                    model = build_multi(input_shape, hidden_size=hid, dropout=drop)
                elif model_type == 'Jordan':
                    model = JordanRNN(input_size=X_train.shape[2], hidden_size=hid, dropout=drop)
                    model.build((None, lb, X_train.shape[2]))

                cfg = {'epochs': N_EPOCHS, 'lr': lr, 'batch_size': BATCH_SIZE, 'patience': PATIENCE, 'verbose': 0}
                model, history = train_model_keras(model, X_train, y_train, X_val, y_val, cfg)

                y_val_inv, preds_val_inv, val_rmse, val_mae = evaluate_model_keras(model, X_val, y_val, scaler_y=scaler_y)
                fold_rmse_vals.append(val_rmse)

            fold_rmse_vals = [v for v in fold_rmse_vals if not np.isnan(v)]
            mean_cv_rmse = np.mean(fold_rmse_vals) if len(fold_rmse_vals)>0 else np.nan
            combo_scores.append({'lookback':lb,'hidden':hid,'lr':lr,'dropout':drop,'mean_cv_rmse':mean_cv_rmse})

        combo_scores = [c for c in combo_scores if not np.isnan(c['mean_cv_rmse'])]
        best_combo = sorted(combo_scores, key=lambda x: x['mean_cv_rmse'])[0]
        print(f"\nBest hyperparams for {model_type}: {best_combo}")
        results[model_type] = {'best_combo': best_combo, 'combo_scores': combo_scores}

        train_full = trainval_df.copy().sort_values('date').reset_index(drop=True)
        test_full  = test_df.copy().sort_values('date').reset_index(drop=True)


        for df_f in [train_full, test_full]:
            df_f['ret'] = df_f[target].pct_change()
            df_f['logret'] = np.log(df_f[target]).diff()
            df_f['ma_5'] = df_f[target].rolling(window=5, min_periods=1).mean()
            df_f['ma_10'] = df_f[target].rolling(window=10, min_periods=1).mean()

        low = train_full[target].quantile(0.01); high = train_full[target].quantile(0.99)
        train_full[target] = train_full[target].clip(low, high)
        test_full[target]  = test_full[target].clip(low, high)

        train_full = add_time_features_daily(train_full)
        test_full = add_time_features_daily(test_full)

        used_feats = [c for c in base_feats if c in train_full.columns] + [c for c in ['ret','logret','ma_5','ma_10','time_idx','dow_sin','dow_cos','month_sin','month_cos'] if c in train_full.columns]

        train_full[used_feats + [target]] = train_full[used_feats + [target]].fillna(0) # or .median() or .mean()
        test_full[used_feats + [target]] = test_full[used_feats + [target]].fillna(0)


        scaler_X = StandardScaler()
        scaler_y = StandardScaler()
        scaler_X.fit(train_full[used_feats])
        scaler_y.fit(train_full[[target]])
        train_scaled = train_full.copy(); test_scaled = test_full.copy()
        train_scaled[used_feats] = scaler_X.transform(train_full[used_feats])
        test_scaled[used_feats] = scaler_X.transform(test_full[used_feats])
        train_scaled[[target]] = scaler_y.transform(train_full[[target]])
        test_scaled[[target]] = scaler_y.transform(test_full[[target]])

        lb = best_combo['lookback']; hid = best_combo['hidden']; lr = best_combo['lr']; drop = best_combo['dropout']
        X_train_all, y_train_all = make_windows_from_df(train_scaled, used_feats, target, lookback=lb, horizon=HORIZON)
        test_context = pd.concat([train_scaled.tail(lb), test_scaled])
        X_test, y_test = make_windows_from_df(test_context, used_feats, target, lookback=lb, horizon=HORIZON)

        if model_type == 'Elman':
            final_model = build_elman((lb, X_train_all.shape[2]), hidden_size=hid, dropout=drop)
        elif model_type == 'Multi':
            final_model = build_multi((lb, X_train_all.shape[2]), hidden_size=hid, dropout=drop)
        else:
            final_model = JordanRNN(input_size=X_train_all.shape[2], hidden_size=hid, dropout=drop)
            final_model.build((None, lb, X_train_all.shape[2]))

        cfg = {'epochs': N_EPOCHS, 'lr': lr, 'batch_size': BATCH_SIZE, 'patience': PATIENCE, 'verbose': 1}
        final_model, history = train_model_keras(final_model, X_train_all, y_train_all, X_train_all[:0], y_train_all[:0], cfg)

        y_true_test, y_pred_test, final_rmse, final_mae = evaluate_model_keras(final_model, X_test, y_test, scaler_y=scaler_y)
        print(f" Final TEST for {model_type}: RMSE={final_rmse:.4f}, MAE={final_mae:.4f}")

        results[model_type].update({
            'final_model': final_model,
            'scaler_X': scaler_X, 'scaler_y': scaler_y,
            'y_true_test': y_true_test, 'y_pred_test': y_pred_test,
            'test_rmse': final_rmse, 'test_mae': final_mae,
            'history': history.history
        })


    summary = {}
    for m in ['Elman','Jordan','Multi']:
        if m in results and 'test_rmse' in results[m]:
            summary[m] = {'best_combo': results[m]['best_combo'], 'test_rmse': results[m]['test_rmse'], 'test_mae': results[m]['test_mae']}
        else:
            summary[m] = {'note': 'no result'}
    print("\nSummary of final test metrics:")
    print(summary)
    return results

results = pipeline_msft(FILE_PATH)