In [None]:
# Cell 1: Imports
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style="whitegrid")  # Optional: For better visualizations


In [None]:
# Cell 2: Load the dataset
df = pd.read_csv("../data/extended_printability_dataset_with_gelatin_silk.csv")
print("Dataset loaded successfully!")


In [None]:
# Cell 3: Basic info
df.info()


In [None]:
# Cell 4: First few rows
df.head()


In [None]:
# Cell 5: Summary statistics
df.describe(include='all')


In [None]:
# Cell 6: Check for missing values
plt.figure(figsize=(10, 6))
sns.heatmap(df.isnull(), cbar=False, cmap='viridis')
plt.title("Missing Values Heatmap")
plt.show()


In [None]:
# Cell 7: Target distribution
target_column = 'Printable'
if target_column in df.columns:
    sns.countplot(data=df, x=target_column)
    plt.title(f"Distribution of {target_column}")
    plt.show()
else:
    print(f"Target column '{target_column}' not found.")


In [None]:
# Cell 8: Explore the new column: Used_crosslinker
if 'Used_crosslinker' in df.columns:
    sns.countplot(data=df, x='Used_crosslinker', hue='Printable')
    plt.title("Used_crosslinker vs. Printable")
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()
else:
    print("Column 'Used_crosslinker' not found.")


In [None]:
# Cell 9: Explore categorical features
cat_cols = ['Needle', 'Remarks', 'Used_crosslinker']
for col in cat_cols:
    if col in df.columns:
        plt.figure(figsize=(8, 4))
        sns.countplot(data=df, x=col, hue='Printable')
        plt.title(f'{col} vs. Printable')
        plt.xticks(rotation=45)
        plt.tight_layout()
        plt.show()
