# Ingeniería de Características

In [None]:
import pandas as pd
import numpy as np
import pickle

## 1. Cargamos dataset

In [2]:
df = pd.read_csv('../data/processed/df_test.csv')
df.head(10)

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,98,58,33,190,34.0,0.43,43,0
1,2,112,75,32,0,35.7,0.148,21,0
2,2,108,64,0,0,30.8,0.158,21,0
3,8,107,80,0,0,24.6,0.856,34,0
4,7,136,90,0,0,29.9,0.21,50,0
5,6,103,72,32,190,37.7,0.324,55,0
6,1,71,48,18,76,20.4,0.323,22,0
7,0,117,0,0,0,33.8,0.932,44,0
8,4,154,72,29,126,31.3,0.338,37,0
9,5,147,78,0,0,33.7,0.218,65,0


## 2. Eliminamos variables con muchos faltantes

In [3]:
df.drop(['SkinThickness', 'Insulin'], axis=1, inplace=True)
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,98,58,34.0,0.43,43,0
1,2,112,75,35.7,0.148,21,0
2,2,108,64,30.8,0.158,21,0
3,8,107,80,24.6,0.856,34,0
4,7,136,90,29.9,0.21,50,0


## 3. Ingeniería de características

### 3.1 Imputación de variables

In [4]:
proporcion_ceros = (df == 0).mean()
proporcion_ceros

Pregnancies                 0.129870
Glucose                     0.000000
BloodPressure               0.071429
BMI                         0.025974
DiabetesPedigreeFunction    0.000000
Age                         0.000000
Outcome                     0.642857
dtype: float64

In [5]:
cols_imputacion = ["Glucose", "BloodPressure", "BMI"]

feature_eng_configs = {}

for col in cols_imputacion:
    media = int(df[col].mean())
    imputed_key = f"{col}_imputed_value"  
    feature_eng_configs[imputed_key] = media
    df[col] = df[col].replace(0, media).astype(int)

### Guardamos valores imputados como artefacto

In [6]:
with open("../artifacts/feature_eng_configs.pkl", "wb") as f:
    pickle.dump(feature_eng_configs, f)

## 5. Guardamos dataset procesado

In [8]:
df.to_csv('../data/processed/features_for_model.csv', index=False)

In [7]:
df

Unnamed: 0,Pregnancies,Glucose,BloodPressure,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,98,58,34,0.430,43,0
1,2,112,75,35,0.148,21,0
2,2,108,64,30,0.158,21,0
3,8,107,80,24,0.856,34,0
4,7,136,90,29,0.210,50,0
...,...,...,...,...,...,...,...
149,9,165,88,30,0.302,49,1
150,1,77,56,33,1.251,24,0
151,8,95,72,36,0.485,57,0
152,2,146,70,28,0.337,29,1
