# Iris Dataset


## Importar bibliotecas

In [5]:
import pandas as pd                                    
from sklearn.model_selection import train_test_split   
from sklearn.impute import SimpleImputer               
from sklearn.preprocessing import StandardScaler       
from sklearn.pipeline import Pipeline                  
from sklearn.compose import ColumnTransformer 

## Importar dataset

In [6]:
from sklearn.datasets import load_iris 
iris = load_iris()                                     
 

### Observações


In [7]:
df = pd.DataFrame(iris.data, columns=iris.feature_names)  
df['target'] = iris.target
print("Shape:", df.shape)                              
print(df.head())  

Shape: (150, 5)
   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)  \
0                5.1               3.5                1.4               0.2   
1                4.9               3.0                1.4               0.2   
2                4.7               3.2                1.3               0.2   
3                4.6               3.1                1.5               0.2   
4                5.0               3.6                1.4               0.2   

   target  
0       0  
1       0  
2       0  
3       0  
4       0  


#### Verificação de valores faltantes ( não é o caso, mas é bom conferir)


In [8]:
print("Missing per column:\n", df.isnull().sum())

Missing per column:
 sepal length (cm)    0
sepal width (cm)     0
petal length (cm)    0
petal width (cm)     0
target               0
dtype: int64


In [9]:
X = df.drop('target', axis=1) 
y = df['target'] 

## Divisão de grupo de treino e teste

In [11]:
X_train, X_test, y_train, y_test = train_test_split(   
    X, y, test_size=0.2, random_state=42, stratify=y
)

### Pipeline de pré-processamento: imputar valores faltantes (média) e padronizar

In [13]:
numeric_transformer = Pipeline( steps=[ ('imputer', SimpleImputer(strategy='mean')),('scaler', StandardScaler()) ])

### Aplicação de transformer nas colunas de X

In [14]:
preprocessor = ColumnTransformer(transformers=[('num', numeric_transformer, X.columns)])

### Ajustes em treino e trasformação de  treino em teste

In [15]:
X_train_processed = preprocessor.fit_transform(X_train) 
X_test_processed = preprocessor.transform(X_test) 

In [16]:
print("X_train_processed shape:", X_train_processed.shape)

X_train_processed shape: (120, 4)
