#missing values
#Using isnull(): This method returns a DataFrame of the same shape with boolean values indicating whether the data is missing (True) or not (False).

In [1]:
import pandas as pd
import numpy as np
# Creating a sample DataFrame with missing values
data = {
    'A': [1, 2, np.nan, 4],
    'B': [5, np.nan, np.nan, 8],
    'C': [9, 10, 11, 12]
}
df = pd.DataFrame(data)
# Detecting missing values
missing_values = df.isnull()
print("Missing values in the DataFrame:\n", missing_values)

Missing values in the DataFrame:
        A      B      C
0  False  False  False
1  False   True  False
2   True   True  False
3  False  False  False


In [2]:
not_missing_values = df.notnull()
print("Non-missing values in the DataFrame:\n", not_missing_values)

Non-missing values in the DataFrame:
        A      B     C
0   True   True  True
1   True  False  True
2  False  False  True
3   True   True  True


In [3]:
missing_count_per_column = df.isnull().sum()
print("Number of missing values in each column:\n", missing_count_per_column)

Number of missing values in each column:
 A    1
B    2
C    0
dtype: int64


In [4]:
total_missing_count = df.isnull().sum().sum()
print("Total number of missing values in the DataFrame:\n", total_missing_count)

Total number of missing values in the DataFrame:
 3


In [5]:
missing_in_column_A = df['A'].isnull().any()
print(f"Are there any missing values in column 'A'? {missing_in_column_A}")

Are there any missing values in column 'A'? True


In [6]:
rows_with_missing = df[df.isnull().any(axis=1)]
print("Rows with any missing values:\n", rows_with_missing)

Rows with any missing values:
      A   B   C
1  2.0 NaN  10
2  NaN NaN  11


You can drop rows that contain any missing values using dropna(axis=0):


In [8]:
import pandas as pd
import numpy as np
# Creating a sample DataFrame with missing values
data = {
    'A': [1, 2, np.nan, 4],
    'B': [5, np.nan, np.nan, 8],
    'C': [9, 10, 11, 12]
}
df = pd.DataFrame(data)
# Dropping rows with any missing values
df_dropped_rows = df.dropna(axis=0)
print("DataFrame after dropping rows with any missing values:\n", df_dropped_rows)

DataFrame after dropping rows with any missing values:
      A    B   C
0  1.0  5.0   9
3  4.0  8.0  12


In [9]:
# Dropping columns with any missing values
df_dropped_columns = df.dropna(axis=1)
print("DataFrame after dropping columns with any missing values:\n", df_dropped_columns)

DataFrame after dropping columns with any missing values:
     C
0   9
1  10
2  11
3  12


# Filling Missing Values
a. Filling with a Specific Value
You can fill missing values with a specific value using fillna(value):

In [10]:
# Filling missing values with 0
df_filled_value = df.fillna(0)
print("DataFrame after filling missing values with 0:\n", df_filled_value)

DataFrame after filling missing values with 0:
      A    B   C
0  1.0  5.0   9
1  2.0  0.0  10
2  0.0  0.0  11
3  4.0  8.0  12


In [11]:
# Forward filling missing values
df_filled_ffill = df.fillna(method='ffill')
print("DataFrame after forward filling missing values:\n", df_filled_ffill)

DataFrame after forward filling missing values:
      A    B   C
0  1.0  5.0   9
1  2.0  5.0  10
2  2.0  5.0  11
3  4.0  8.0  12


  df_filled_ffill = df.fillna(method='ffill')


In [12]:
# Backward filling missing values
df_filled_bfill = df.fillna(method='bfill')
print("DataFrame after backward filling missing values:\n", df_filled_bfill)

DataFrame after backward filling missing values:
      A    B   C
0  1.0  5.0   9
1  2.0  8.0  10
2  4.0  8.0  11
3  4.0  8.0  12


  df_filled_bfill = df.fillna(method='bfill')


In [13]:
# Filling missing values with the mean of each column
df_filled_mean = df.fillna(df.mean())
print("DataFrame after filling missing values with the mean:\n", df_filled_mean)

DataFrame after filling missing values with the mean:
           A    B   C
0  1.000000  5.0   9
1  2.000000  6.5  10
2  2.333333  6.5  11
3  4.000000  8.0  12


In [14]:
# Filling missing values with the median of each column
df_filled_median = df.fillna(df.median())
print("DataFrame after filling missing values with the median:\n", df_filled_median)

DataFrame after filling missing values with the median:
      A    B   C
0  1.0  5.0   9
1  2.0  6.5  10
2  2.0  6.5  11
3  4.0  8.0  12


In [15]:
# Filling missing values with the mode of each column
df_filled_mode = df.apply(lambda x: x.fillna(x.mode()[0]) if not x.mode().empty else x)
print("DataFrame after filling missing values with the mode:\n", df_filled_mode)

DataFrame after filling missing values with the mode:
      A    B   C
0  1.0  5.0   9
1  2.0  5.0  10
2  1.0  5.0  11
3  4.0  8.0  12
