In [None]:
# Cell 1: Imports
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
import joblib

sns.set(style="whitegrid")


In [None]:
# Cell 2: Load and preprocess data
from src.data_preprocessing import load_data, preprocess_data

df = load_data("../data/extended_printability_dataset_with_gelatin_silk.csv")
target_column = "Printable"

X_train, X_test, y_train, y_test, preprocessor = preprocess_data(df, target_column)


In [None]:
# Cell 3: Train model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
print("✅ Model trained successfully!")


In [None]:
# Cell 4: Evaluate model
y_pred = model.predict(X_test)

print("📋 Classification Report:\n")
print(classification_report(y_test, y_pred))

# Confusion matrix
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(6, 4))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.title("Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.tight_layout()
plt.show()


In [None]:
# Cell 5: Save model and preprocessor
joblib.dump(model, "../outputs/models/printability_model.pkl")  # ✅ Renamed to match main.py
joblib.dump(preprocessor, "../outputs/models/preprocessor.pkl")
print("💾 Model and preprocessor saved to ../outputs/models/")
