# 3 Base Model

In [14]:
import sys
import numpy as np
sys.path.append('./the .py files')  # relative path from notebook to .py directory

import data_loader
import pre_processing as pre
import evaluator

In [12]:
# function 
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

def load_feature_data(npz_path):
    """
    Load training and validation feature arrays from a compressed .npz file.

    Parameters:
        npz_path (str): Path to the saved .npz file.

    Returns:
        Tuple: X_train, y_train, X_val, y_val
    """
    data = np.load(npz_path)
    return data['X_train'], data['y_train'], data['X_val'], data['y_val']


def split_data(X, y, test_size=0.2, random_state=42):
    """
    Splits features and labels into training and validation sets.

    Parameters:
        X (np.ndarray): Feature matrix (2D or 3D).
        y (np.ndarray): Target vector.
        test_size (float): Fraction of data to use for validation.
        random_state (int): Seed for reproducibility.

    Returns:
        Tuple: (X_train, X_val, y_train, y_val)
    """
    return train_test_split(X, y, test_size=test_size, random_state=random_state)

def train_linear_model(X_train, y_train):
    """
    Trains a Linear Regression model.

    Parameters:
        X_train (np.ndarray): Training feature matrix.
        y_train (np.ndarray): Training target values.

    Returns:
        Trained LinearRegression model.
    """
    model = LinearRegression()
    model.fit(X_train, y_train)
    return model

def predict(model, X):
    """
    Uses the trained model to generate predictions.

    Parameters:
        model: A trained regression model with a `.predict()` method.
        X (np.ndarray): Feature matrix to predict on.

    Returns:
        np.ndarray: Predicted RUL values.
    """
    return model.predict(X)

def run_base_model_pipeline(npz_path, strategy='last'):
    """
    Full pipeline to train and run a linear model on feature data.

    Parameters:
        npz_path (str): Path to .npz file with preprocessed data.
        strategy (str): Feature strategy used ('last', 'mean', 'flat') — placeholder for consistency.

    Returns:
        Tuple: (model, X_val, y_val, y_pred)
    """
    # Step 1: Load data
    X, y, _, _ = load_feature_data(npz_path)

    # Step 2: Split into train/val
    X_train, X_val, y_train, y_val = split_data(X, y)

    # Step 3: Train model
    model = train_linear_model(X_train, y_train)

    # Step 4: Predict
    y_pred = predict(model, X_val)

    return model, X_val, y_val, y_pred

In [3]:
# load_feature_data function test


# Step 1: Load raw data
df = data_loader.load_raw_data("CMaps/train_FD001.txt")

# Step 2: Preprocess
df = pre.drop_flat_sensors(df)
df = pre.calculate_rul(df)
df = pre.standardise_per_condition(df)  # ✅ Standardise by operational settings

# Step 3: Create sliding windows
X_3d, y = pre.generate_sliding_windows(df, seq_len=30)

# Step 4: Convert to 2D features
X_2d = pre.make_feature_vectors_from_windows(X_3d, strategy='last')

# Step 5: Split into train/val
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(X_2d, y, test_size=0.2, random_state=42)

# Step 6: Save as .npz
pre.save_preprocessed_data(X_train, y_train, X_val, y_val, X_test=None, y_test=None, filename="fd001_last.npz")

# Step 7: Test your function
X_tr, y_tr, X_vl, y_vl = load_feature_data("fd001_last.npz")
print("Train shapes:", X_tr.shape, y_tr.shape)
print("Val shapes  :", X_vl.shape, y_vl.shape)

Data saved to fd001_last.npz
Train shapes: (14184, 15) (14184,)
Val shapes  : (3547, 15) (3547,)


In [5]:
# split_data test

X_train, X_val, y_train, y_val = split_data(X_tr, y_tr, test_size=0.25, random_state=123)

print("X_train shape:", X_train.shape)
print("X_val shape  :", X_val.shape)
print("y_train shape:", y_train.shape)
print("y_val shape  :", y_val.shape)

X_train shape: (10638, 15)
X_val shape  : (3546, 15)
y_train shape: (10638,)
y_val shape  : (3546,)


In [8]:
model = train_linear_model(X_train, y_train)
print("Model trained:", model)

Model trained: LinearRegression()


In [10]:
y_pred = predict(model, X_val)
print("Predictions shape:", y_pred.shape)
print("First 5 predictions:", y_pred[:5])

Predictions shape: (3546,)
First 5 predictions: [102.0559    84.11614   85.425026  91.34407   66.115036]


In [13]:
model, X_val, y_val, y_pred = run_base_model_pipeline("fd001_last.npz")
print("Pipeline completed. Sample predictions:", y_pred[:5])

Pipeline completed. Sample predictions: [ 91.10553  106.80296  124.87522  116.865814  93.38303 ]


In [15]:
# Evaluate model performance using evaluator module
evaluator.evaluate_model(y_val, y_pred, model_name="Linear Regression")

Linear Regression Evaluation:
  RMSE: 24.4143
  MAE : 20.0134


{'model': 'Linear Regression', 'RMSE': 24.414293, 'MAE': 20.01343}