# Loading the CWRU dataset as a DF

In [None]:
import scipy.io
import pandas as pd
from pathlib import Path
import numpy as np

def load_cwru_file(file_path, label, sensor='DE'):
    """Load CWRU file with choice of sensor"""
    mat_data = scipy.io.loadmat(file_path)
    
    # Choose sensor
    data_keys = [k for k in mat_data.keys() if k.endswith(f'_{sensor}_time')]
    
    if data_keys:
        vibration = mat_data[data_keys[0]].flatten()
        df = pd.DataFrame({
            'value': vibration,
            'label': label
        })
        return df

In [None]:
# Load a faulty bearing file
df_B007 = load_cwru_file('/kaggle/input/cwru-bearing-datasets/raw/B007_1_123.mat', label=1)
df_B014 = load_cwru_file('/kaggle/input/cwru-bearing-datasets/raw/B014_1_190.mat', label=1)
df_B021 = load_cwru_file('/kaggle/input/cwru-bearing-datasets/raw/B021_1_227.mat', label=1)
df_IR007 = load_cwru_file('/kaggle/input/cwru-bearing-datasets/raw/IR007_1_110.mat', label=1)
df_IR014 = load_cwru_file('/kaggle/input/cwru-bearing-datasets/raw/IR014_1_175.mat', label=1)
df_IR021 = load_cwru_file('/kaggle/input/cwru-bearing-datasets/raw/IR021_1_214.mat', label=1)
df_OR007 = load_cwru_file('/kaggle/input/cwru-bearing-datasets/raw/OR007_6_1_136.mat', label=1)
df_OR014 = load_cwru_file('/kaggle/input/cwru-bearing-datasets/raw/OR014_6_1_202.mat', label=1)
df_OR021 = load_cwru_file('/kaggle/input/cwru-bearing-datasets/raw/OR021_6_1_239.mat', label=1)
df_time_normal = load_cwru_file('/kaggle/input/cwru-bearing-datasets/raw/Time_Normal_1_098.mat', label=0)


# Converting the CWRU into a X_CWRU_test and y_CWRU_test

In [None]:
def create_windows(df, window_size=2048, overlap=0):
    """Create sliding windows from continuous signal"""
    X, y = [], []
    step = int(window_size * (1 - overlap))
    
    values = df['value'].values
    label = df['label'].iloc[0]
    
    for start in range(0, len(values) - window_size + 1, step):
        window = values[start:start+window_size]
        X.append(window.reshape(-1, 1))
        y.append(label)
    
    return np.array(X), np.array(y)

# Process all dataframes separately (to keep labels correct)
X_list, y_list = [], []

dataframes = [
    ('B007', df_B007),
    ('B014', df_B014),
    ('B021', df_B021),
    ('IR007', df_IR007),
    ('IR014', df_IR014),
    ('IR021', df_IR021),
    ('OR007', df_OR007),
    ('OR014', df_OR014),
    ('OR021', df_OR021),
    ('Normal', df_time_normal)
]

for name, df in dataframes:
    print(f"Processing {name}... ", end='')
    X, y = create_windows(df, window_size=2048, overlap=0)
    X_list.append(X)
    y_list.append(y)
    print(f"Created {X.shape[0]} windows")

# Combine all windows
X_train = np.concatenate(X_list, axis=0)
y_train = np.concatenate(y_list, axis=0)

print(f"\nFinal combined shape: X={X_train.shape}, y={y_train.shape}")
print(f"Total windows: {len(X_train)}")
print(f"Label distribution: {np.unique(y_train, return_counts=True)}")

In [None]:
# Rename CWRU data to test set
X_test = X_train  # CWRU windows
y_test = y_train  # CWRU labels

print(f"Test set shape: X={X_test.shape}, y={y_test.shape}")
print(f"Test label distribution: {np.unique(y_test, return_counts=True)}")

In [None]:
np.save('X_CWRU_test_no_overlap.npy', X_train)
np.save('y_CWRU_test_no_overlap.npy', y_train)