# Notebook: Experiment template — Iris E2E

This notebook is a reproducible, end-to-end template demonstrating:
- data loading and inspection,
- preprocessing and training a pipeline,
- evaluation and model persistence,
- programmatic execution and tests.

Follow the cells sequentially. Replace or adapt to your domain as needed.

In [1]:
# 1) Importar librerías y configurar semilla

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import datasets
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import joblib

import time
import os

np.random.seed(42)
pd.options.display.width = 120
pd.options.display.max_columns = 200


In [2]:
# 2) Cargar y examinar Iris
iris = datasets.load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df['target'] = iris.target

print(df.shape)
df.head()

df.to_csv('data/iris_sample.csv', index=False)


(150, 5)


OSError: Cannot save file into a non-existent directory: 'data'

In [None]:
# 3) Análisis exploratorio rápido

df.describe()

sns.pairplot(df, hue='target', corner=True)
plt.show()


In [None]:
# 4) Preprocesamiento y pipeline

X = df[iris.feature_names]
y = df['target']

pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('clf', LogisticRegression(max_iter=200, random_state=42))
])

X_sample = pipeline.named_steps['scaler'].fit_transform(X[:5])
print('Sample transformed shape:', X_sample.shape)


In [None]:
# 5) Separar datos y entrenar
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)
start = time.time()
pipeline.fit(X_train, y_train)
end = time.time()
print('Train time (s):', end - start)
print('Coefficients:', pipeline.named_steps['clf'].coef_)


In [None]:
# 6) Validación cruzada y búsqueda de hiperparámetros
param_grid = {'clf__C': [0.01, 0.1, 1.0, 10.0]}
cv = GridSearchCV(pipeline, param_grid, cv=5)
cv.fit(X_train, y_train)
print('Best params:', cv.best_params_)
print('CV best score:', cv.best_score_)


In [None]:
# 7) Evaluación final y métricas
from sklearn.metrics import classification_report, ConfusionMatrixDisplay

preds = cv.predict(X_test)
print(classification_report(y_test, preds))

disp = ConfusionMatrixDisplay(confusion_matrix(y_test, preds))
disp.plot()
plt.show()


In [None]:
# 8) Guardar y cargar modelo
joblib.dump(cv, 'model_iris.joblib')
model_loaded = joblib.load('model_iris.joblib')
print('Loaded model score on test:', model_loaded.score(X_test, y_test))


In [None]:
# 9) Inferencia End-to-End con ejemplo realista
sample_record = X_test.iloc[0:1]
print('Input sample:')
print(sample_record)
print('Predicted:', model_loaded.predict(sample_record))
print('Probabilities:', model_loaded.predict_proba(sample_record))


# 10) Pruebas unitarias para funciones clave (pytest)
# Example simple tests inline using pytest - these are illustrative

def test_preprocess_sample():
    assert X.shape[1] == 4

# To run pytest in the notebook environment use: !pytest -q -k test_preprocess_sample


In [None]:
# 11) Ejecutar el notebook programáticamente (nbclient)

from nbclient import NotebookClient
from nbformat import read, write

nb_path = 'notebooks/experiment_template.ipynb'
nb = read(open(nb_path, 'r', encoding='utf8'), as_version=4)
client = NotebookClient(nb)
res = client.execute()
print('Execution finished; check cell outputs for errors')


In [None]:
# 12) Celdas con ejemplos de ejecución y salidas esperadas
print('Expected outputs:')
print('- DataFrame head showing 150 rows total and 5 columns (features + target)')
print('- Pairplot: visible scatter matrix per class')
print('- Best params from GridSearch and CV scores printed above')
print('- classification_report printed and non-zero accuracy')
print('- model_iris.joblib saved and loaded with same test score')
