### Handling Missing Data

In [1]:
import numpy as np

# 1. Identifying Missing Values
arr = np.array([1, 2, np.nan, 4, np.nan, 6])
print("Array with NaN values:\n", arr)  # Output: [ 1.  2. nan  4. nan  6.]

print("np.isnan(arr):\n", np.isnan(arr))  # Output: [False False  True False  True False]

print("np.nan_to_num(arr): \n", np.nan_to_num(arr))  # Output: [1. 2. 0. 4. 0. 6.]

print("np.isnan(arr).any():\n", np.isnan(arr).any())  # Output: True

print("np.isnan(arr).sum():\n", np.isnan(arr).sum())  # Output: 2

# Checking for NaT (Not a Time) values
arr_datetime = np.array([np.datetime64('2024-01-01'), np.datetime64('NaT')])
print("np.isnat(arr_datetime):\n", np.isnat(arr_datetime))  # Output: [False  True]

# 2. Removing Missing Data
arr2d = np.array([[1, 2, np.nan], [4, 5, 6], [7, np.nan, 9]])
rows_to_keep = ~np.isnan(arr2d).any(axis=1)
cols_to_keep = ~np.isnan(arr2d).any(axis=0)
arr_cleaned = arr2d[np.ix_(rows_to_keep, cols_to_keep)]
print("Cleaned array:\n", arr_cleaned)  # Output: [[4. 5.]]

# 3. Imputing Missing Data
arr = np.array([[1, 2, np.nan], [4, 5, 6], [7, np.nan, 9]])
mean_of_each_column = np.nanmean(arr, axis=0)
nan_mask = np.isnan(arr)
arr[nan_mask] = np.take(mean_of_each_column, np.where(nan_mask)[1])
print("Array after mean imputation (per column):\n", arr)  # Output: [[1.  2.  7.5] [4.  5.  6. ] [7.  3.5 9. ]]

mean_of_all_elements = np.nanmean(arr)
arr_all_mean = np.array([[1, 2, np.nan], [4, 5, 6], [7, np.nan, 9]])
arr_all_mean[np.isnan(arr_all_mean)] = mean_of_all_elements
print("Array after mean imputation (all elements):\n", arr_all_mean)  # Output: [[1.  2.  5.25] [4.  5.  6.  ] [7.  5.25 9.  ]]


Array with NaN values:
 [ 1.  2. nan  4. nan  6.]
np.isnan(arr):
 [False False  True False  True False]
np.nan_to_num(arr): 
 [1. 2. 0. 4. 0. 6.]
np.isnan(arr).any():
 True
np.isnan(arr).sum():
 2
np.isnat(arr_datetime):
 [False  True]
Cleaned array:
 [[4.]]
Array after mean imputation (per column):
 [[1.  2.  7.5]
 [4.  5.  6. ]
 [7.  3.5 9. ]]
Array after mean imputation (all elements):
 [[1. 2. 5.]
 [4. 5. 6.]
 [7. 5. 9.]]
