# Predictive Maintenance with Azure Dataset

## Project imports

In [3]:
import numpy as np
import pandas as pd

## Data Imports

Needs pre-processing.ipynb to be run first

In [4]:
def read(name: str, parse_dates: list[str] | None = ["datetime"]) -> pd.DataFrame:
    path = "data/"
    ext = ".csv"
    file = path + name + ext
    return pd.read_csv(file, parse_dates=parse_dates, na_values="NaN")

VARIABLES = ["volt", "rotate", "pressure", "vibration"]
DATA = read("raw_data").dropna(subset=VARIABLES)
normal_behavior_data = read("preprocessing/expected_behavior")
abnormal_data = read("preprocessing/failures_only")

## Full dataset regression

### Pre-processing

In [5]:
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OrdinalEncoder, StandardScaler

independent_data = DATA.drop(columns=["datetime", "machineID", "failure"])

numeric_features = ["volt", "rotate", "pressure", "vibration", "age"]
numeric_transformer = Pipeline(
    steps=[("imputer", SimpleImputer(strategy="median")), ("scaler", StandardScaler())]
)

categorical_features = ["model", "errorID", "comp"]
categorical_transformer = Pipeline(
    steps=[
        ("imputer", SimpleImputer(strategy="constant", fill_value="None")),
        ("ordinal", OrdinalEncoder()),
    ]
)

preprocessor = ColumnTransformer(
    transformers=[
        ("num", numeric_transformer, numeric_features),
        ("cat", categorical_transformer, categorical_features),
    ]
)

preprocessed_data = preprocessor.fit_transform(independent_data)
print(preprocessed_data.shape)

(877209, 8)


In [6]:
time_before_failure = 0
max_interval = 48
last_failure = None

labels = DATA[["failure"]].fillna("normal").reset_index(drop=True)

for i in range(len(labels) - 1, -1, -1):
    if labels.at[i, "failure"] != "normal":
        time_before_failure = 0
        labels.at[i, "failure"] = 0
        if not last_failure:
            last_failure = i + 1

    else:
        time_before_failure += 1
        labels.at[i, "failure"] = time_before_failure


labels = labels.iloc[:last_failure]
short_labels = labels[labels["failure"] <= max_interval]
preprocessed_data = preprocessed_data[:last_failure]
short_data = preprocessed_data[short_labels.index]

labels.to_csv("data/preprocessing/regression_labels.csv", index=False)

labels = labels.to_numpy()
short_labels = short_labels.to_numpy()

print(labels.shape)
print(short_labels.shape)
assert labels.shape[0] == preprocessed_data.shape[0]
assert short_labels.shape[0] == short_data.shape[0]

(876657, 1)
(35585, 1)


### Data splitting

In [7]:
from sklearn.model_selection import train_test_split

STATE = 42

X = preprocessed_data
y = labels

short_X = short_data
short_y = short_labels

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=STATE
)

short_X_train, short_X_test, short_y_train, short_y_test = train_test_split(
    short_X, short_y, test_size=0.2, random_state=STATE
)

print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)
print(short_X_train.shape, short_X_test.shape, short_y_train.shape, short_y_test.shape)

(701325, 8) (175332, 8) (701325, 1) (175332, 1)
(28468, 8) (7117, 8) (28468, 1) (7117, 1)


### Models

#### Finding best plane

- Model function

In [8]:
from sklearn.base import BaseEstimator
from sklearn.metrics import mean_squared_error, r2_score


def evaluate(
    model: BaseEstimator,
    X_train: np.ndarray,
    y_train: np.ndarray,
    X_test: np.ndarray,
    y_test: np.ndarray,
) -> None:
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    print(f"MSE: {mse:.2f}")
    print(f"R2: {r2:.2f}")

- Linear regression

In [7]:
from sklearn.linear_model import LinearRegression

model = LinearRegression()
short_model = LinearRegression()

print("Linear Regression")
evaluate(model, X_train, y_train, X_test, y_test)

print("\nShort Linear Regression")
evaluate(short_model, short_X_train, short_y_train, short_X_test, short_y_test)

Linear Regression
MSE: 1669969.90
R2: 0.07

Short Linear Regression
MSE: 188.63
R2: 0.08


- Stochastic Gradient Descent (SGD)

In [8]:
from sklearn.linear_model import SGDRegressor

reshaped_y_train = y_train.ravel()
model = SGDRegressor(
    max_iter=1000, tol=1e-3, penalty="l2", alpha=0.0001, learning_rate="invscaling", random_state=STATE
)

reshaped_short_y_train = short_y_train.ravel()
short_model = SGDRegressor(
    max_iter=1000, tol=1e-3, penalty="l2", alpha=0.0001, learning_rate="invscaling", random_state=STATE
)

print("SGD Regressor")
evaluate(model, X_train, reshaped_y_train, X_test, y_test)

print("\nShort SGD Regressor")
evaluate(short_model, short_X_train, reshaped_short_y_train, short_X_test, short_y_test)

SGD Regressor
MSE: 1674739.81
R2: 0.06

Short SGD Regressor
MSE: 188.97
R2: 0.07


- Ridge Regression

In [9]:
from sklearn.linear_model import Ridge

model = Ridge(alpha=1.0, random_state=STATE)
short_model = Ridge(alpha=1.0, random_state=STATE)

print("Ridge")
evaluate(model, X_train, y_train, X_test, y_test)

print("\nShort Ridge")
evaluate(short_model, short_X_train, short_y_train, short_X_test, short_y_test)

Ridge
MSE: 1669969.91
R2: 0.07

Short Ridge
MSE: 188.63
R2: 0.08


- Lasso Regression

In [10]:
from sklearn.linear_model import Lasso

model = Lasso(alpha=0.1, random_state=STATE)
short_model = Lasso(alpha=0.1, random_state=STATE)

print("Lasso")
evaluate(model, X_train, y_train, X_test, y_test)

print("\nShort Lasso")
evaluate(short_model, short_X_train, short_y_train, short_X_test, short_y_test)

Lasso
MSE: 1669971.57
R2: 0.07

Short Lasso
MSE: 188.71
R2: 0.08


- Elastic Net

In [11]:
from sklearn.linear_model import ElasticNet

model = ElasticNet(alpha=0.1, l1_ratio=0.5, random_state=STATE)
short_model = ElasticNet(alpha=0.1, l1_ratio=0.5, random_state=STATE)

print("ElasticNet")
evaluate(model, X_train, y_train, X_test, y_test)

print("\nShort ElasticNet")
evaluate(short_model, short_X_train, short_y_train, short_X_test, short_y_test)

ElasticNet
MSE: 1670393.48
R2: 0.07

Short ElasticNet
MSE: 189.45
R2: 0.07


- Decision Tree Regressor

In [12]:
from sklearn.tree import DecisionTreeRegressor

model = DecisionTreeRegressor(random_state=STATE)
short_model = DecisionTreeRegressor(random_state=STATE)

print("Decision Tree")
evaluate(model, X_train, y_train, X_test, y_test)

print("\nShort Decision Tree")
evaluate(short_model, short_X_train, short_y_train, short_X_test, short_y_test)

Decision Tree
MSE: 2703625.33
R2: -0.51

Short Decision Tree
MSE: 377.83
R2: -0.85


- Random Forest Regressor

In [13]:
from sklearn.ensemble import RandomForestRegressor

model = RandomForestRegressor(n_estimators=100, n_jobs=-1, random_state=STATE)
short_model = RandomForestRegressor(n_estimators=100, n_jobs=-1, random_state=STATE)

print("Random Forest")
evaluate(model, X_train, reshaped_y_train, X_test, y_test)

print("\nShort Random Forest")
evaluate(short_model, short_X_train, reshaped_short_y_train, short_X_test, short_y_test)

Random Forest
MSE: 1398867.68
R2: 0.22

Short Random Forest
MSE: 191.62
R2: 0.06


- Gradient Boosting Regressor

In [14]:
from sklearn.ensemble import GradientBoostingRegressor

model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, random_state=STATE)
short_model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, random_state=STATE)

print("Gradient Boosting")
evaluate(model, X_train, reshaped_y_train, X_test, y_test)

print("\nShort Gradient Boosting")
evaluate(short_model, short_X_train, reshaped_short_y_train, short_X_test, short_y_test)

Gradient Boosting
MSE: 1375759.40
R2: 0.23

Short Gradient Boosting
MSE: 185.45
R2: 0.09


- Support Vector Regression (SVR)

In [15]:
from sklearn.svm import SVR

short_model = SVR(kernel="rbf", C=100, gamma=0.1, epsilon=0.1)

print("Short SVR")
evaluate(short_model, short_X_train, reshaped_short_y_train, short_X_test, short_y_test)

Short SVR
MSE: 194.53
R2: 0.05


#### Multi-layer Perceptron Regressor (MLP)

- Model

In [9]:
import torch
import torch.nn as nn
from torch.cuda.amp import GradScaler, autocast
from torch.utils.data import DataLoader, TensorDataset

num_epochs = 10000
batch_size = 256


class RegressionModel(nn.Module):
    PATH = "data/models/"
    EXT = ".pth"

    def __init__(self, input_dim: int, state_file: str = None):
        super().__init__()
        self.relu = nn.ReLU()
        self.fc1 = nn.Linear(input_dim, 128)
        self.fc2 = nn.Linear(128, 128)
        self.output = nn.Linear(128, 1)
        self.criterion = nn.MSELoss()
        self.state_loaded = False

        if state_file:
            self.load_state_dict(torch.load(state_file))
            self.state_loaded = True

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.output(x)
        return x

    def train_model(
        self, X_train: np.ndarray, y_train: np.ndarray, file_name: str
    ) -> None:
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        print(f"Training on device: {device}")
        self.to(device)

        self.state_loaded = False
        optimizer = torch.optim.Adam(self.parameters(), lr=0.01)
        dataset = TensorDataset(
            torch.tensor(X_train, dtype=torch.float32).to(device),
            torch.tensor(y_train, dtype=torch.float32).to(device),
        )
        train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
        scaler = GradScaler()

        best_loss = float("inf")
        patience = 10
        patience_counter = 0
        min_delta = 1e-6

        for epoch in range(num_epochs):
            self.train()
            total_loss = 0

            for batch_X, batch_y in train_loader:
                optimizer.zero_grad()
                with autocast():
                    outputs = self(batch_X)
                    loss = self.criterion(outputs, batch_y)
                scaler.scale(loss).backward()
                scaler.step(optimizer)
                scaler.update()

                total_loss += loss.item()

            avg_loss = total_loss / len(train_loader)

            if epoch % 100 == 0:
                print(f"Epoch {epoch+1}/{num_epochs}, Loss: {avg_loss}")

            if epoch % 1000 == 0:
                checkpoint_path = f"{self.PATH}{file_name}_epoch_{epoch}{self.EXT}"
                torch.save(self.state_dict(), checkpoint_path)
                print(f"Checkpoint saved at epoch {epoch} to {checkpoint_path}")

            if best_loss - min_delta < avg_loss < best_loss:
                patience_counter += 1
            else:
                patience_counter = 0
                best_loss = min(best_loss, avg_loss)

            if patience_counter > patience:
                print(f"Early stopping at epoch {epoch+1}")
                break

        self.state_loaded = True
        print(f"Epoch {num_epochs}, Loss: {avg_loss}")
        torch.save(self.state_dict(), self.PATH + file_name + self.EXT)

    def test_model(
        self, X_test: np.ndarray, y_test: np.ndarray, file_name: str | None = None
    ) -> None:
        device = torch.device("cpu")
        print(f"Testing on device: {device}")
        self.to(device)

        if not self.state_loaded and file_name:
            self.load_state_dict(torch.load(self.PATH + file_name + self.EXT))
            self.state_loaded = True

        dataset = TensorDataset(
            torch.tensor(X_test, dtype=torch.float32),
            torch.tensor(y_test, dtype=torch.float32),
        )
        test_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
        predictions = []

        self.eval()
        with torch.no_grad():
            total_loss = 0
            total_var = 0
            y_mean = torch.mean(torch.tensor(y_test, dtype=torch.float32))
            for inputs, targets in test_loader:
                outputs = self(inputs)
                loss = self.criterion(outputs, targets)
                total_loss += loss.item()
                total_var += ((targets - y_mean) ** 2).sum().item()
                predictions.extend(outputs.tolist())
            r2_score = 1 - (total_loss / total_var)
            print(f"Test Loss: {total_loss / len(test_loader)}")
            print(f"R2 Score: {r2_score}")
        return predictions

- Training

In [17]:
short_mlp = RegressionModel(short_X_train.shape[1])

short_y_train_numeric = short_y_train.astype(np.float32)

print("\nShort MLP")
short_mlp.train_model(short_X_train, short_y_train_numeric, "short_mlp")


Short MLP
Training on device: cuda
Epoch 1/10000, Loss: 224.83201270680792
Checkpoint saved at epoch 0 to data/models/short_mlp_epoch_0.pth
Epoch 101/10000, Loss: 185.89996898976142
Epoch 201/10000, Loss: 181.1987089832802
Epoch 301/10000, Loss: 178.86003434390764
Epoch 401/10000, Loss: 177.38424887892376
Epoch 501/10000, Loss: 176.57157391603752
Epoch 601/10000, Loss: 175.73977531125192
Epoch 701/10000, Loss: 175.41269420401397
Epoch 801/10000, Loss: 174.79321295905007
Epoch 901/10000, Loss: 174.5734983709361
Epoch 1001/10000, Loss: 174.05061169269374
Checkpoint saved at epoch 1000 to data/models/short_mlp_epoch_1000.pth
Epoch 1101/10000, Loss: 174.25412048887245
Epoch 1201/10000, Loss: 173.82308662311914
Epoch 1301/10000, Loss: 173.73604005548452
Epoch 1401/10000, Loss: 173.3585079176009
Epoch 1501/10000, Loss: 173.62924358556089
Epoch 1601/10000, Loss: 172.8890862571819
Epoch 1701/10000, Loss: 173.60251620852893
Epoch 1801/10000, Loss: 173.27438641877453
Epoch 1901/10000, Loss: 173

In [14]:
mlp = RegressionModel(X_train.shape[1])

y_train_numeric = y_train.astype(np.float32)

print("MLP")
mlp.train_model(X_train, y_train_numeric, "regression_mlp")

MLP
Training on device: cuda
Epoch 1/10000, Loss: 1549989.9737397353
Checkpoint saved at epoch 0 to data/models/regression_mlp_epoch_0.pth


KeyboardInterrupt: 

- Testing

In [28]:
short_y_test_numeric = short_y_test.astype(np.float32)

print("\nShort MLP")
predictions = short_mlp.test_model(short_X_test, short_y_test_numeric, "short_mlp")


Short MLP
Testing on device: cpu
Test Loss: 219.92981229509627
R2 Score: 0.9915258297723392


In [None]:
y_test_numeric = y_test.astype(np.float32)

print("MLP")
mlp.test_model(X_test, y_test_numeric, "regression_mlp")

In [10]:
model = RegressionModel(short_X_train.shape[1], "data/models/short_mlp.pth")
short_y_test_numeric = short_y_test.astype(np.float32)
predictions = model.test_model(short_X_test, short_y_test_numeric, "short_mlp")

Testing on device: cpu
Test Loss: 219.9199641091483
R2 Score: 0.9915262093491144


In [22]:
df = pd.DataFrame(
    {"actual": short_y_test.flatten(), "predicted": np.array(predictions).flatten()}
)
print(df.head())

df.to_csv("data/predictions/regression_mlp.csv", index=False)

  actual  predicted
0     26  23.924051
1     48  -0.223848
2     47  22.442184
3     38  26.791649
4     17   1.720243


In [10]:
full_model = RegressionModel(short_X_train.shape[1], "data/models/short_mlp.pth")
y_test_numeric = y_test.astype(np.float32)
predictions = full_model.test_model(X_test, y_test_numeric, "short_mlp")

Testing on device: cpu
Test Loss: 3014959.9076642334
R2 Score: 0.9868281109962662


In [12]:
df = pd.DataFrame(
    {"actual": y_test.flatten(), "predicted": np.array(predictions).flatten()}
)
print(df.head())

df.to_csv("data/predictions/full_regression_mlp.csv", index=False)

  actual  predicted
0   4308  25.021648
1   1777  24.802710
2   1438  27.992088
3    284  30.588078
4    471  23.378174
