In [1]:
# Importing necessary libraries
import pandas as pd  # Importing pandas to work with DataFrames
from sklearn.impute import SimpleImputer  # Importing SimpleImputer to handle missing data


In [2]:
# Example DataFrame with missing values
data = {'Age': [25, None, 30, 22, None], 'Salary': [50000, 60000, None, 40000, 35000]}
# Creating a dictionary with 'Age' and 'Salary' columns, where 'None' represents missing values


In [3]:
df = pd.DataFrame(data)  # Converting the dictionary into a pandas DataFrame
print(df)  # Printing the original DataFrame with missing values


    Age   Salary
0  25.0  50000.0
1   NaN  60000.0
2  30.0      NaN
3  22.0  40000.0
4   NaN  35000.0


In [4]:
# Handling missing values by filling with the mean
imputer = SimpleImputer(strategy='mean')  # Initializing the SimpleImputer to fill missing values with the mean
# The strategy='mean' argument specifies that missing values will be replaced by the column mean


In [5]:
df['Age'] = imputer.fit_transform(df[['Age']])  # Applying the imputer to the 'Age' column and updating it with the filled values
# fit_transform() computes the mean and replaces missing values in the 'Age' column with that mean



In [6]:
df['Salary'] = imputer.fit_transform(df[['Salary']])  # Similarly, applying the imputer to the 'Salary' column
# The missing values in the 'Salary' column are replaced by the computed mean


In [7]:
print(df)  # Printing the updated DataFrame with missing values filled


         Age   Salary
0  25.000000  50000.0
1  25.666667  60000.0
2  30.000000  46250.0
3  22.000000  40000.0
4  25.666667  35000.0
