In [None]:
import numpy as np
from sklearn.impute import KNNImputer
from sklearn.preprocessing import StandardScaler

# Sample data with missing values (NaNs)
data = np.array([[1.0, np.nan, 3.0],
                 [4.0, 5.0, np.nan],
                 [np.nan, 8.0, 9.0]])

# Initialize a KNN imputer
knn_imputer = KNNImputer(n_neighbors=2)  # You can adjust the number of neighbors

# Impute missing values
data_imputed = knn_imputer.fit_transform(data)

# Separate the feature names (chicken and egg)
feature_names = ["chicken", "egg"]
target_name = "outcome"  # We'll create an outcome variable for demonstration

# Scale the features using StandardScaler
scaler = StandardScaler()
data_scaled = scaler.fit_transform(data_imputed)

# Reattach feature names and target variable
data_with_names = np.column_stack((data_scaled, data_imputed.sum(axis=1)))
feature_names_with_target = feature_names + [target_name]

# Display the data with feature names
print("Original Data:")
print(data)

print("\nImputed and Scaled Data:")
print(data_with_names)

print("\nFeature Names (Including 'Chicken' and 'Egg'):")
print(feature_names_with_target)


Original Data:
[[ 1. nan  3.]
 [ 4.  5. nan]
 [nan  8.  9.]]

Imputed and Scaled Data:
[[-1.22474487  0.         -1.22474487 10.5       ]
 [ 1.22474487 -1.22474487  0.         15.        ]
 [ 0.          1.22474487  1.22474487 19.5       ]]

Feature Names (Including 'Chicken' and 'Egg'):
['chicken', 'egg', 'outcome']
