In [1]:
# Cell 1: Imports
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
import os

# Cell 2: Load raw Iris data
iris = load_iris(as_frame=True)
df = iris.frame  # Includes features + target
print("Raw Iris data shape:", df.shape)

# Cell 3: Save raw data
raw_dir = "../data/raw/"
os.makedirs(raw_dir, exist_ok=True)
df.to_csv(os.path.join(raw_dir, "raw.csv"), index=False)
print(f"✅ Raw data saved to {raw_dir}raw.csv")

# Cell 4: Preprocessing (e.g., rename target)
df = df.rename(columns={"target": "label"})

# Cell 5: Split train/test
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df["label"])

# Cell 6: Save processed data
processed_dir = "../data/processed/"
os.makedirs(processed_dir, exist_ok=True)
df.to_csv(os.path.join(processed_dir, "processed.csv"), index=False)
train_df.to_csv(os.path.join(processed_dir, "train.csv"), index=False)
test_df.to_csv(os.path.join(processed_dir, "test.csv"), index=False)
print(f"✅ Processed data saved to {processed_dir}")


Raw Iris data shape: (150, 5)
✅ Raw data saved to ../data/raw/raw.csv
✅ Processed data saved to ../data/processed/
