In [None]:
# Title: Data Cleaning using Pandas
# Description: Check for missing values and handle them by imputing the median.

In [1]:
import pandas as pd
import numpy as np

# Create a sample DataFrame with missing values
data = {'A': [1, 2, np.nan, 4, 5, 6, 7, 8, 9, 10],
        'B': [11, 12, 13, 14, np.nan, 16, 17, 18, 19, 20],
        'C': [21, 22, 23, 24, 25, 26, 27, 28, 29, np.nan]}
df = pd.DataFrame(data)

# Check for missing values
print("Original DataFrame with Missing Values:")
print(df)

print("\nMissing values per column:")
print(df.isnull().sum())

# Handle missing values by imputing the median
for column in df.columns:
    median_val = df[column].median()
    df[column].fillna(median_val, inplace=True)

# Verify that there are no more missing values
print("\nDataFrame after imputing missing values with the median:")
print(df)

print("\nMissing values per column after imputation:")
print(df.isnull().sum())


Original DataFrame with Missing Values:
      A     B     C
0   1.0  11.0  21.0
1   2.0  12.0  22.0
2   NaN  13.0  23.0
3   4.0  14.0  24.0
4   5.0   NaN  25.0
5   6.0  16.0  26.0
6   7.0  17.0  27.0
7   8.0  18.0  28.0
8   9.0  19.0  29.0
9  10.0  20.0   NaN

Missing values per column:
A    1
B    1
C    1
dtype: int64

DataFrame after imputing missing values with the median:
      A     B     C
0   1.0  11.0  21.0
1   2.0  12.0  22.0
2   6.0  13.0  23.0
3   4.0  14.0  24.0
4   5.0  16.0  25.0
5   6.0  16.0  26.0
6   7.0  17.0  27.0
7   8.0  18.0  28.0
8   9.0  19.0  29.0
9  10.0  20.0  25.0

Missing values per column after imputation:
A    0
B    0
C    0
dtype: int64


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[column].fillna(median_val, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[column].fillna(median_val, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values alway