In [45]:
import pandas as pd
import numpy as np
import pywt
from scipy.signal import savgol_filter, medfilt
from scipy.ndimage import gaussian_filter
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import StandardScaler
from skimage.restoration import denoise_wavelet
from cv2 import bilateralFilter
from sklearn.preprocessing import LabelEncoder
import warnings
warnings.filterwarnings('ignore')

# Load dataset
data = pd.read_csv('PrinterSimulatorDataset.csv', low_memory=False)
data.columns = data.columns.str.strip()
print(data.head())


# 👣 FEATURE ENGINEERING
def map_size(x):
    if x == 220:
        return 'Small'
    elif x == 330:
        return 'Medium'
    elif x == 560:
        return 'Large'
    else:
        return 'Unknown'

if 'Size X' in data.columns:
    data['Size'] = data['Size X'].apply(map_size)
    data.drop(columns=['Size X', 'Size Y', 'AR', 'ASR','PadID'], inplace=True, errors='ignore')
else:
    print("Warning: 'Size X' not found.")
    data['Size'] = 'Unknown'

size_map = {'Small': 0, 'Medium': 1, 'Large': 2, 'Unknown': -1}
data['Size'] = data['Size'].map(size_map)

# Encode categorical features
categorical_cols = ['Cleaning Type', 'Direction', 'Size']
le = LabelEncoder()
for col in categorical_cols:
    if col in data.columns:
        data[col] = le.fit_transform(data[col].astype(str))

print(data.head())

# Define features and targets
targets = ['Volume', 'OffsetX', 'OffsetY']
features = list(data.columns.difference(targets))

X = data[features]
y = data[targets]

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


   PadID  Size X  Size Y  Volume  OffsetX  OffsetY  PCB ID  Printing Speed  \
0      1     220     220  74.447   -9.990    5.766     241              30   
1      2     220     220  84.224  -15.069   17.116     241              30   
2      3     220     220  77.646   -5.895    5.528     241              30   
3      4     220     220  79.544  -11.337    8.486     241              30   
4      5     220     220  79.548  -13.730    8.963     241              30   

   Printing Pressure  Separation Speed Cleaning Type  Cleaning Age Direction  \
0                 60               3.0           Wet             1         F   
1                 60               3.0           Wet             1         F   
2                 60               3.0           Wet             1         F   
3                 60               3.0           Wet             1         F   
4                 60               3.0           Wet             1         F   

   Pos X    Pos Y  Rotation      AR   ASR  
0  312

In [46]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
import pandas as pd
import numpy as np
import pywt
from scipy.signal import savgol_filter, medfilt
from scipy.ndimage import gaussian_filter
from cv2 import bilateralFilter

# Define filters (including your new ones)
def apply_dtcwt_filter(signal):
    coeffs = pywt.wavedec(signal, 'db4', level=1)
    coeffs[1:] = [np.zeros_like(i) for i in coeffs[1:]]
    return pywt.waverec(coeffs, 'db4')[:len(signal)]

def apply_median_filter(signal):
    return medfilt(signal, kernel_size=3)

def apply_gaussian_filter(signal):
    return gaussian_filter(signal, sigma=1)

def apply_savgol_filter(signal):
    window_length = min(len(signal) if len(signal) % 2 == 1 else len(signal)-1, 21)
    return savgol_filter(signal, window_length=window_length, polyorder=2)

def apply_bilateral_filter(signal):
    return bilateralFilter(signal.astype(np.float32), 9, 75, 75)

def apply_multivariate_filter(signal):
    kernel = np.ones(5)/5
    return np.convolve(signal, kernel, mode='same')

# Map filters to functions
filter_functions = {
    'DTCWT': apply_dtcwt_filter,
    'Gaussian': apply_gaussian_filter,
    'Median': apply_median_filter,
    'Savitzky-Golay': apply_savgol_filter,
    'Bilateral': apply_bilateral_filter,
    'Multivariate': apply_multivariate_filter,
}

# Your specific filter combinations to try (order matters)
filter_orders = [
    ('DTCWT', 'Savitzky-Golay', 'Median'),
    ('DTCWT', 'Savitzky-Golay', 'Gaussian'),
    ('DTCWT', 'Savitzky-Golay', 'Bilateral'),
    ('DTCWT', 'Savitzky-Golay', 'Multivariate'),
]

# Standard scalers
scaler_X = StandardScaler()
scaler_y = StandardScaler()

# Training and evaluation function
def train_evaluate_model(model, model_name, X_train, X_test, y_train, y_test):
    results = []
    for i, target in enumerate(targets):
        model.fit(X_train, y_train[:, i])
        y_pred = model.predict(X_test)

        rmse = np.sqrt(mean_squared_error(y_test[:, i], y_pred))
        mae = mean_absolute_error(y_test[:, i], y_pred)

        results.append([model_name, target, rmse, mae])
    return results

# Initialize model
dt_model = DecisionTreeRegressor(random_state=42)

# Collect results
all_results = []
for order in filter_orders:
    filtered_data = y.copy()

    for col in targets:
        signal = y[col].values.copy()
        for filter_name in order:
            signal = filter_functions[filter_name](signal)
        filtered_data[col] = signal

    # Scale features and targets
    X_train_scaled = scaler_X.fit_transform(X_train)
    X_test_scaled = scaler_X.transform(X_test)
    y_train_scaled = scaler_y.fit_transform(filtered_data.loc[y_train.index])
    y_test_scaled = scaler_y.transform(filtered_data.loc[y_test.index])

    results_dt = train_evaluate_model(dt_model, "Decision Tree", X_train_scaled, X_test_scaled, y_train_scaled, y_test_scaled)

    for row in results_dt:
        all_results.append(order + tuple(row))  # (filter1, filter2, filter3, model, target, rmse, mae)

# Create results dataframe
columns = ['Filter 1', 'Filter 2', 'Filter 3', 'Model', 'Target Variable', 'RMSE', 'MAE']
results_df = pd.DataFrame(all_results, columns=columns)

# Print pivot table
print(results_df.pivot_table(index=['Filter 1', 'Filter 2', 'Filter 3'],
                             columns='Target Variable',
                             values=['RMSE', 'MAE']).round(4))

                                         MAE                    RMSE          \
Target Variable                      OffsetX OffsetY  Volume OffsetX OffsetY   
Filter 1 Filter 2       Filter 3                                               
DTCWT    Savitzky-Golay Bilateral     0.0467  0.0202  0.0589  0.0866  0.0407   
                        Gaussian      0.0552  0.0237  0.0690  0.0960  0.0431   
                        Median        0.0595  0.0256  0.0739  0.0999  0.0451   
                        Multivariate  0.0510  0.0219  0.0642  0.0922  0.0414   

                                              
Target Variable                       Volume  
Filter 1 Filter 2       Filter 3              
DTCWT    Savitzky-Golay Bilateral     0.1027  
                        Gaussian      0.1144  
                        Median        0.1209  
                        Multivariate  0.1096  
