In [None]:
# Cell 1: Imports
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.multioutput import MultiOutputRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

sns.set(style="whitegrid")


In [None]:
# Cell 2: Load Dataset
df = pd.read_csv("../data/degradation_dataset.csv")
df.head()


In [None]:
# Cell 3: Basic Info
print("Shape:", df.shape)
print("\nData Types:\n", df.dtypes)
print("\nMissing Values:\n", df.isnull().sum())


In [None]:
# Cell 4: Distribution of Input Features
fig, axes = plt.subplots(1, 2, figsize=(12, 4))
sns.histplot(df["Porosity_Percentage"], kde=True, ax=axes[0])
axes[0].set_title("Porosity Percentage Distribution")

sns.histplot(df["Immersion_Time_Days"], kde=False, bins=10, ax=axes[1])
axes[1].set_title("Immersion Time (Days)")

plt.tight_layout()
plt.show()


In [None]:
# Cell 5: Target Correlations
target_cols = ["Compressive_Stiffness_MPa", "Weight_Loss_Percentage", "Water_Absorption_Percentage"]
sns.heatmap(df[target_cols].corr(), annot=True, cmap="coolwarm")
plt.title("Correlation Matrix - Output Variables")
plt.show()


In [None]:
# Cell 6: Boxplots for Scaffold_Geometry
for col in target_cols:
    plt.figure(figsize=(6, 4))
    sns.boxplot(x="Scaffold_Geometry", y=col, data=df)
    plt.title(f"{col} vs Scaffold Geometry")
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()


In [None]:
# Cell 7: Encode Categorical Column
df["Scaffold_Geometry"] = LabelEncoder().fit_transform(df["Scaffold_Geometry"])
df.head()


In [None]:
# Cell 8: Define Inputs and Outputs
X = df[["Scaffold_Geometry", "Porosity_Percentage", "Immersion_Time_Days", "Mechanical_Loading"]]
y = df[target_cols]

print("X shape:", X.shape)
print("y shape:", y.shape)


In [None]:
# Cell 9: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
# Cell 10: Initial Model Training
model = MultiOutputRegressor(RandomForestRegressor(random_state=42))
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print("R² Score:", r2_score(y_test, y_pred, multioutput='uniform_average'))
print("RMSE:", mean_squared_error(y_test, y_pred, squared=False))


In [None]:
# Cell 11: Compare Predictions
results = y_test.copy()
results[["Pred_Stiffness", "Pred_WeightLoss", "Pred_WaterAbsorp"]] = y_pred
results.head()
