In [1]:
!pip install pandas seaborn numpy

import pandas as pd
import seaborn as sns
import numpy as np




In [2]:
# Load the iris dataset from seaborn
df = sns.load_dataset('iris')

# Introduce some NaN values in numeric columns for demonstration
df.loc[2:3, 'sepal_width'] = np.nan
df.loc[5:6, 'petal_length'] = np.nan
# 1. Display first 10 rows of the original data
print("Original Data (First 10 rows):")
print(df.head(10))


Original Data (First 10 rows):
   sepal_length  sepal_width  petal_length  petal_width species
0           5.1          3.5           1.4          0.2  setosa
1           4.9          3.0           1.4          0.2  setosa
2           4.7          NaN           1.3          0.2  setosa
3           4.6          NaN           1.5          0.2  setosa
4           5.0          3.6           1.4          0.2  setosa
5           5.4          3.9           NaN          0.4  setosa
6           4.6          3.4           NaN          0.3  setosa
7           5.0          3.4           1.5          0.2  setosa
8           4.4          2.9           1.4          0.2  setosa
9           4.9          3.1           1.5          0.1  setosa


In [3]:
print("\nNull Values in Dataset:")
print(df.isnull().sum())



Null Values in Dataset:
sepal_length    0
sepal_width     2
petal_length    2
petal_width     0
species         0
dtype: int64


In [4]:
# Determine which columns are numeric
numeric_cols = df.select_dtypes(include=['number']).columns

print("\nMean of Numeric Columns:")
print(df[numeric_cols].mean())

print("\nMedian of Numeric Columns:")
print(df[numeric_cols].median())

print("\nMode of Numeric Columns:")
print(df[numeric_cols].mode().iloc[0])



Mean of Numeric Columns:
sepal_length    5.843333
sepal_width     3.056081
petal_length    3.787838
petal_width     1.199333
dtype: float64

Median of Numeric Columns:
sepal_length    5.8
sepal_width     3.0
petal_length    4.4
petal_width     1.3
dtype: float64

Mode of Numeric Columns:
sepal_length    5.0
sepal_width     3.0
petal_length    1.5
petal_width     0.2
Name: 0, dtype: float64


In [5]:
df_mean = df.copy()
df_mean[numeric_cols] = df_mean[numeric_cols].fillna(df_mean[numeric_cols].mean())

print("\nData after replacing NaNs with Mean:")
print(df_mean.head(10))



Data after replacing NaNs with Mean:
   sepal_length  sepal_width  petal_length  petal_width species
0           5.1     3.500000      1.400000          0.2  setosa
1           4.9     3.000000      1.400000          0.2  setosa
2           4.7     3.056081      1.300000          0.2  setosa
3           4.6     3.056081      1.500000          0.2  setosa
4           5.0     3.600000      1.400000          0.2  setosa
5           5.4     3.900000      3.787838          0.4  setosa
6           4.6     3.400000      3.787838          0.3  setosa
7           5.0     3.400000      1.500000          0.2  setosa
8           4.4     2.900000      1.400000          0.2  setosa
9           4.9     3.100000      1.500000          0.1  setosa


In [6]:
df_median = df.copy()
df_median[numeric_cols] = df_median[numeric_cols].fillna(df_median[numeric_cols].median())

print("\nData after replacing NaNs with Median:")
print(df_median.head(10))



Data after replacing NaNs with Median:
   sepal_length  sepal_width  petal_length  petal_width species
0           5.1          3.5           1.4          0.2  setosa
1           4.9          3.0           1.4          0.2  setosa
2           4.7          3.0           1.3          0.2  setosa
3           4.6          3.0           1.5          0.2  setosa
4           5.0          3.6           1.4          0.2  setosa
5           5.4          3.9           4.4          0.4  setosa
6           4.6          3.4           4.4          0.3  setosa
7           5.0          3.4           1.5          0.2  setosa
8           4.4          2.9           1.4          0.2  setosa
9           4.9          3.1           1.5          0.1  setosa


In [7]:
# Imputation using Mode (for numeric columns only)
df_mode = df.copy()
mode_values = df[numeric_cols].mode().iloc[0]
df_mode[numeric_cols] = df_mode[numeric_cols].fillna(mode_values)

print("\nData after replacing NaNs with Mode:")
print(df_mode.head(10))



Data after replacing NaNs with Mode:
   sepal_length  sepal_width  petal_length  petal_width species
0           5.1          3.5           1.4          0.2  setosa
1           4.9          3.0           1.4          0.2  setosa
2           4.7          3.0           1.3          0.2  setosa
3           4.6          3.0           1.5          0.2  setosa
4           5.0          3.6           1.4          0.2  setosa
5           5.4          3.9           1.5          0.4  setosa
6           4.6          3.4           1.5          0.3  setosa
7           5.0          3.4           1.5          0.2  setosa
8           4.4          2.9           1.4          0.2  setosa
9           4.9          3.1           1.5          0.1  setosa
