### Imputing Missing Data
**Description**: Impute missing data using various strategies like mean, median, or mode.

In [1]:
# Write your code from here
import pandas as pd
from sklearn.impute import SimpleImputer

# Create a sample DataFrame with missing values
data = {'A': [1, 2, None, 4, 5],
        'B': [None, 6, 7, None, 9],
        'C': ['x', 'y', 'x', None, 'y'],
        'D': [1.1, 2.2, 3.3, None, 5.5]}
df = pd.DataFrame(data)

print("Original DataFrame:")
print(df)

# 1. Imputing numerical missing values with the mean
mean_imputer = SimpleImputer(strategy='mean')
df['A_mean_imputed'] = mean_imputer.fit_transform(df[['A']])
df['B_mean_imputed'] = mean_imputer.fit_transform(df[['B']])

print("\nDataFrame after mean imputation:")
print(df)

# 2. Imputing numerical missing values with the median
median_imputer = SimpleImputer(strategy='median')
df['A_median_imputed'] = median_imputer.fit_transform(df[['A']])
df['B_median_imputed'] = median_imputer.fit_transform(df[['B']])

print("\nDataFrame after median imputation:")
print(df)

# 3. Imputing categorical missing values with the most frequent value (mode)
mode_imputer = SimpleImputer(strategy='most_frequent')
df['C_mode_imputed'] = mode_imputer.fit_transform(df[['C']])

print("\nDataFrame after mode imputation:")
print(df)

# 4. Imputing numerical missing values with a constant value
constant_imputer = SimpleImputer(strategy='constant', fill_value=-99)
df['D_constant_imputed'] = constant_imputer.fit_transform(df[['D']])

print("\nDataFrame after constant imputation:")
print(df)

# Note: It's generally good practice to apply imputation to a copy of the DataFrame
# to keep the original data intact. For example:
df_imputed = df.copy()
mean_imputer_copy = SimpleImputer(strategy='mean')
df_imputed[['A', 'B']] = mean_imputer_copy.fit_transform(df_imputed[['A', 'B']])
print("\nImputed DataFrame (copy):")
print(df_imputed)

Original DataFrame:
     A    B     C    D
0  1.0  NaN     x  1.1
1  2.0  6.0     y  2.2
2  NaN  7.0     x  3.3
3  4.0  NaN  None  NaN
4  5.0  9.0     y  5.5

DataFrame after mean imputation:
     A    B     C    D  A_mean_imputed  B_mean_imputed
0  1.0  NaN     x  1.1             1.0        7.333333
1  2.0  6.0     y  2.2             2.0        6.000000
2  NaN  7.0     x  3.3             3.0        7.000000
3  4.0  NaN  None  NaN             4.0        7.333333
4  5.0  9.0     y  5.5             5.0        9.000000

DataFrame after median imputation:
     A    B     C    D  A_mean_imputed  B_mean_imputed  A_median_imputed  \
0  1.0  NaN     x  1.1             1.0        7.333333               1.0   
1  2.0  6.0     y  2.2             2.0        6.000000               2.0   
2  NaN  7.0     x  3.3             3.0        7.000000               3.0   
3  4.0  NaN  None  NaN             4.0        7.333333               4.0   
4  5.0  9.0     y  5.5             5.0        9.000000        

ValueError: 2