In [1]:
import pandas as pd
import numpy as np
import pywt
from scipy.signal import savgol_filter, medfilt
from scipy.ndimage import gaussian_filter
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import StandardScaler
from skimage.restoration import denoise_wavelet
from cv2 import bilateralFilter
from sklearn.preprocessing import LabelEncoder
import warnings
warnings.filterwarnings('ignore')

# Load dataset
data = pd.read_csv('PrinterSimulatorDataset.csv', low_memory=False)
data.columns = data.columns.str.strip()
print(data.head())


# 👣 FEATURE ENGINEERING
def map_size(x):
    if x == 220:
        return 'Small'
    elif x == 330:
        return 'Medium'
    elif x == 560:
        return 'Large'
    else:
        return 'Unknown'

if 'Size X' in data.columns:
    data['Size'] = data['Size X'].apply(map_size)
    data.drop(columns=['Size X', 'Size Y', 'AR', 'ASR','PadID'], inplace=True, errors='ignore')
else:
    print("Warning: 'Size X' not found.")
    data['Size'] = 'Unknown'

size_map = {'Small': 0, 'Medium': 1, 'Large': 2, 'Unknown': -1}
data['Size'] = data['Size'].map(size_map)

# Encode categorical features
categorical_cols = ['Cleaning Type', 'Direction', 'Size']
le = LabelEncoder()
for col in categorical_cols:
    if col in data.columns:
        data[col] = le.fit_transform(data[col].astype(str))

print(data.head())

# Define features and targets
targets = ['Volume', 'OffsetX', 'OffsetY']
features = list(data.columns.difference(targets))

X = data[features]
y = data[targets]

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


   PadID  Size X  Size Y  Volume  OffsetX  OffsetY  PCB ID  Printing Speed  \
0      1     220     220  74.447   -9.990    5.766     241              30   
1      2     220     220  84.224  -15.069   17.116     241              30   
2      3     220     220  77.646   -5.895    5.528     241              30   
3      4     220     220  79.544  -11.337    8.486     241              30   
4      5     220     220  79.548  -13.730    8.963     241              30   

   Printing Pressure  Separation Speed Cleaning Type  Cleaning Age Direction  \
0                 60               3.0           Wet             1         F   
1                 60               3.0           Wet             1         F   
2                 60               3.0           Wet             1         F   
3                 60               3.0           Wet             1         F   
4                 60               3.0           Wet             1         F   

   Pos X    Pos Y  Rotation      AR   ASR  
0  312

***Decision Tree Regressor***

In [11]:
# Define individual filter functions
def apply_dtcwt_filter(signal):
    coeffs = pywt.wavedec(signal, 'db4', level=1)
    coeffs[1:] = [np.zeros_like(i) for i in coeffs[1:]]
    return pywt.waverec(coeffs, 'db4')[:len(signal)]

def apply_savgol_filter(signal):
    return savgol_filter(signal, window_length=min(len(signal), 21), polyorder=2)

def apply_bilateral_filter(signal):
    signal_np = signal.to_numpy().reshape(-1, 1).astype(np.float32)
    filtered_2d = bilateralFilter(signal_np, 9, 75, 75)
    return filtered_2d.flatten()

def apply_median_filter(signal):
    return medfilt(signal, kernel_size=3)

def apply_multivariate_filter(signal):
    return np.convolve(signal, np.ones(5)/5, mode='same')

def apply_gaussian_filter(signal):
    return gaussian_filter(signal, sigma=1)

# Filter dictionary (single filters only)
filters = {
    "DTCWT": apply_dtcwt_filter,
    "Savitzky-Golay": apply_savgol_filter,
    "Bilateral": apply_bilateral_filter,
    "Median": apply_median_filter,
    "Multivariate": apply_multivariate_filter,
    "Gaussian": apply_gaussian_filter,
}

# Standardize features
scaler_X = StandardScaler()
X_train_scaled = scaler_X.fit_transform(X_train)
X_test_scaled = scaler_X.transform(X_test)

# Collect results
results = []

for filter_name, filter_func in filters.items():
    filtered_y = y.copy()
    for col in targets:
        filtered_y[col] = filter_func(y[col])

    # Train and evaluate for each target variable
    for target in targets:
        model = DecisionTreeRegressor(random_state=42)
        model.fit(X_train_scaled, filtered_y.loc[y_train.index, target])

        y_pred = model.predict(X_test_scaled)
        rmse = np.sqrt(mean_squared_error(y_test[target], y_pred))
        mae = mean_absolute_error(y_test[target], y_pred)

        results.append([filter_name, target, rmse, mae])

# Convert results to DataFrame
results_df = pd.DataFrame(results, columns=['Filter', 'Target Variable', 'RMSE', 'MAE'])

# Display structured results
print(results_df.pivot(index='Filter', columns='Target Variable', values=['RMSE', 'MAE']))

                     RMSE                           MAE                    
Target Variable   OffsetX   OffsetY    Volume   OffsetX   OffsetY    Volume
Filter                                                                     
Bilateral        3.563994  3.884629  3.683270  2.754936  2.972059  2.787246
DTCWT            4.445730  4.790891  4.357190  3.453846  3.688746  3.279638
Gaussian         3.747688  4.054581  3.772378  2.887433  3.099094  2.828547
Median           4.094550  4.431664  4.389477  2.922401  3.126886  3.016812
Multivariate     3.465095  3.775503  3.547996  2.671814  2.881394  2.677728
Savitzky-Golay   3.744296  4.111538  3.936428  2.898121  3.139850  2.995958


***SVR***

In [13]:
from sklearn.svm import LinearSVR

# Define individual filter functions
def apply_dtcwt_filter(signal):
    coeffs = pywt.wavedec(signal, 'db4', level=1)
    coeffs[1:] = [np.zeros_like(i) for i in coeffs[1:]]
    return pywt.waverec(coeffs, 'db4')[:len(signal)]

def apply_savgol_filter(signal):
    return savgol_filter(signal, window_length=min(len(signal), 21), polyorder=2)

def apply_bilateral_filter(signal):
    signal_np = signal.to_numpy().reshape(-1, 1).astype(np.float32)
    filtered_2d = bilateralFilter(signal_np, 9, 75, 75)
    return filtered_2d.flatten()

def apply_median_filter(signal):
    return medfilt(signal, kernel_size=3)

def apply_multivariate_filter(signal):
    return np.convolve(signal, np.ones(5)/5, mode='same')

def apply_gaussian_filter(signal):
    return gaussian_filter(signal, sigma=1)

# Filter dictionary (single filters only)
filters = {
    "DTCWT": apply_dtcwt_filter,
    "Savitzky-Golay": apply_savgol_filter,
    "Bilateral": apply_bilateral_filter,
    "Median": apply_median_filter,
    "Multivariate": apply_multivariate_filter,
    "Gaussian": apply_gaussian_filter,
}

# Standardize features
scaler_X = StandardScaler()
X_train_scaled = scaler_X.fit_transform(X_train)
X_test_scaled = scaler_X.transform(X_test)

# Initialize results list
results = []

# Apply each filter and fit LinearSVR
for filter_name, filter_func in filters.items():
    filtered_y = y.copy()
    for col in targets:
        filtered_y[col] = filter_func(y[col])

    # Train and evaluate for each target variable
    for target in targets:
        svr_model = LinearSVR(max_iter=1000, random_state=42)
        svr_model.fit(X_train_scaled, filtered_y.loc[y_train.index, target])

        y_pred = svr_model.predict(X_test_scaled)
        rmse = np.sqrt(mean_squared_error(y_test[target], y_pred))
        mae = mean_absolute_error(y_test[target], y_pred)

        results.append([filter_name, target, rmse, mae])

# Convert results to DataFrame
results_df = pd.DataFrame(results, columns=['Filter', 'Target Variable', 'RMSE', 'MAE'])

# Display results pivoted by filter and target variable
print(results_df.pivot(index='Filter', columns='Target Variable', values=['RMSE', 'MAE']))

                     RMSE                           MAE                    
Target Variable   OffsetX   OffsetY    Volume   OffsetX   OffsetY    Volume
Filter                                                                     
Bilateral        7.906325  9.397972  6.889674  6.308800  7.293141  5.188623
DTCWT            7.900701  9.385869  6.881083  6.306641  7.290908  5.187302
Gaussian         7.903881  9.391273  6.887531  6.307451  7.291991  5.188205
Median           7.904043  9.388779  6.883599  6.306952  7.290978  5.187852
Multivariate     7.905178  9.389759  6.888225  6.307528  7.291977  5.188089
Savitzky-Golay   7.908503  9.396227  6.890569  6.308602  7.293285  5.188594


***ANN***

In [3]:
from sklearn.neural_network import MLPRegressor


# Define individual filter functions
def apply_dtcwt_filter(signal):
    coeffs = pywt.wavedec(signal, 'db4', level=1)
    coeffs[1:] = [np.zeros_like(i) for i in coeffs[1:]]
    return pywt.waverec(coeffs, 'db4')[:len(signal)]

def apply_savgol_filter(signal):
    return savgol_filter(signal, window_length=min(len(signal), 21), polyorder=2)

def apply_bilateral_filter(signal):
    signal_np = signal.to_numpy().reshape(-1, 1).astype(np.float32)
    filtered_2d = bilateralFilter(signal_np, 9, 75, 75)
    return filtered_2d.flatten()

def apply_median_filter(signal):
    return medfilt(signal, kernel_size=3)

def apply_multivariate_filter(signal):
    return np.convolve(signal, np.ones(5)/5, mode='same')

def apply_gaussian_filter(signal):
    return gaussian_filter(signal, sigma=1)

# Filter dictionary
filters = {
    "DTCWT": apply_dtcwt_filter,
    "Savitzky-Golay": apply_savgol_filter,
    "Bilateral": apply_bilateral_filter,
    "Median": apply_median_filter,
    "Multivariate": apply_multivariate_filter,
    "Gaussian": apply_gaussian_filter,
}

# Standardize features
scaler_X = StandardScaler()
X_train_scaled = scaler_X.fit_transform(X_train)
X_test_scaled = scaler_X.transform(X_test)

# ANN model setup (lightweight + early stopping)
def get_fast_ann():
    return MLPRegressor(
        hidden_layer_sizes=(32,),  # Single smaller layer
        activation='relu',
        solver='adam',
        max_iter=100,              # Faster convergence
        early_stopping=True,       # Stops if no improvement
        random_state=42,
        verbose=True              # Set True if you want training logs
    )

# Collect results
results = []

for filter_name, filter_func in filters.items():
    filtered_y = y.copy()
    for col in targets:
        filtered_y[col] = filter_func(y[col])

    for target in targets:
        ann_model = get_fast_ann()
        ann_model.fit(X_train_scaled, filtered_y.loc[y_train.index, target])
        y_pred = ann_model.predict(X_test_scaled)

        rmse = np.sqrt(mean_squared_error(y_test[target], y_pred))
        mae = mean_absolute_error(y_test[target], y_pred)

        results.append([filter_name, target, rmse, mae])

# Results DataFrame
results_df = pd.DataFrame(results, columns=['Filter', 'Target Variable', 'RMSE', 'MAE'])

# Pivoted output
print(results_df.pivot(index='Filter', columns='Target Variable', values=['RMSE', 'MAE']))

Iteration 1, loss = 590.32778484
Validation score: -0.084253
Iteration 2, loss = 21.88123220
Validation score: 0.330713
Iteration 3, loss = 16.60864036
Validation score: 0.394729
Iteration 4, loss = 15.61636082
Validation score: 0.416994
Iteration 5, loss = 15.20011307
Validation score: 0.427471
Iteration 6, loss = 14.95587281
Validation score: 0.434611
Iteration 7, loss = 14.77738457
Validation score: 0.439882
Iteration 8, loss = 14.67091205
Validation score: 0.444182
Iteration 9, loss = 14.57282492
Validation score: 0.448801
Iteration 10, loss = 14.48345801
Validation score: 0.452057
Iteration 11, loss = 14.39918405
Validation score: 0.454911
Iteration 12, loss = 14.33482449
Validation score: 0.456342
Iteration 13, loss = 14.28147753
Validation score: 0.458586
Iteration 14, loss = 14.21975216
Validation score: 0.460078
Iteration 15, loss = 14.15849121
Validation score: 0.459833
Iteration 16, loss = 14.08878436
Validation score: 0.465104
Iteration 17, loss = 14.03321662
Validation sco