# Imputing

In [16]:
import pandas as pd
import numpy as np
from sklearn.impute import SimpleImputer

original = pd.DataFrame({
    "stock": ["AAPL", "GOOG", "TSLA", np.nan],
    "price": [20, 30, np.nan, 50]
})
display(original)

Unnamed: 0,stock,price
0,AAPL,20.0
1,GOOG,30.0
2,TSLA,
3,,50.0


## Mean Imputing

In [17]:
mean_imputer = SimpleImputer(strategy="mean")

df = original.copy()
df[["price"]] = mean_imputer.fit_transform(df[["price"]])

display(df)

Unnamed: 0,stock,price
0,AAPL,20.0
1,GOOG,30.0
2,TSLA,33.333333
3,,50.0


## Value Imputing

In [13]:
constant_imputer = SimpleImputer(strategy="constant", fill_value="MISSING")
df = original.copy()
df[["stock"]] = constant_imputer.fit_transform(df[["stock"]])

display(df)

Unnamed: 0,stock,price
0,AAPL,20.0
1,GOOG,30.0
2,TSLA,
3,MISSING,50.0


## Imputing categorical values

In [18]:
constant_imputer = SimpleImputer(strategy="most_frequent")
df = original.copy()
df[["stock"]] = constant_imputer.fit_transform(df[["stock"]])

# In this case it grabs the first value since they all appear once
display(df)

Unnamed: 0,stock,price
0,AAPL,20.0
1,GOOG,30.0
2,TSLA,
3,AAPL,50.0
