# Download Iris Dataset

This notebook demonstrates several ways to download the famous Iris dataset.

The Iris dataset contains 150 samples of iris flowers with 4 features (sepal length, sepal width, petal length, petal width) and 3 species (setosa, versicolor, virginica).

## Prepare Directory

In [None]:
import os
output_dir = '../data/raw'
os.makedirs(output_dir, exist_ok=True)

## Method 1: Using scikit-learn

In [None]:
from sklearn.datasets import load_iris
import pandas as pd

# Load iris dataset from scikit-learn
iris_sklearn = load_iris()

# Convert to DataFrame with consistent column names
df_sklearn = pd.DataFrame(
    data=iris_sklearn.data,
    columns=['sepal_length', 'sepal_width', 'petal_length', 'petal_width']
)
df_sklearn['species'] = [iris_sklearn.target_names[t] for t in iris_sklearn.target]

# Save to CSV
df_sklearn.to_csv(f'{output_dir}/iris.csv', index=False)
print(f"Saved to {output_dir}/iris.csv")

print("Shape:", df_sklearn.shape)
df_sklearn.head()

## Method 2: Using seaborn

In [None]:
import seaborn as sns

# Load iris dataset from seaborn
df_seaborn = sns.load_dataset('iris')

# Save to CSV
df_seaborn.to_csv(f'{output_dir}/iris.csv', index=False)
print(f"Saved to {output_dir}/iris.csv")

print("Shape:", df_seaborn.shape)
df_seaborn.head()

## Method 3: Using pandas from UCI Machine Learning Repository

In [None]:
import pandas as pd

# Download directly from UCI ML Repository
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
column_names = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species']

df_uci = pd.read_csv(url, header=None, names=column_names)

# Remove 'Iris-' prefix from species names
df_uci['species'] = df_uci['species'].str.replace('Iris-', '', regex=False)

# Save to CSV
df_uci.to_csv(f'{output_dir}/iris.csv', index=False)
print(f"Saved to {output_dir}/iris.csv")

print("Shape:", df_uci.shape)
df_uci.head()

## Method 4: Using TensorFlow Datasets

In [None]:
import tensorflow_datasets as tfds
import pandas as pd

# Load iris dataset from TensorFlow Datasets
ds, info = tfds.load('iris', split='train', with_info=True, as_supervised=True)

# Convert to DataFrame
features = []
labels = []
for feature, label in tfds.as_numpy(ds):
    features.append(feature)
    labels.append(label)

df_tfds = pd.DataFrame(features, columns=['sepal_length', 'sepal_width', 'petal_length', 'petal_width'])
df_tfds['species'] = pd.Series(labels).map({0: 'setosa', 1: 'versicolor', 2: 'virginica'})

# Save to CSV
df_tfds.to_csv(f'{output_dir}/iris.csv', index=False)
print(f"Saved to {output_dir}/iris.csv")

print("Shape:", df_tfds.shape)
df_tfds.head()