<a href="https://colab.research.google.com/github/plus2net/numpy/blob/main/numpy_10_nan_dtypes.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

![alt text](https://www.plus2net.com/images/top2.jpg)        Read more on [Nan and dtypes ](https://www.plus2net.com/python/numpy-nan-dtypes.php) | [ Numpy ](https://www.plus2net.com/python/numpy.php)

In [1]:
import numpy as np

x = np.array([1.0, np.nan, np.inf, -np.inf, 5.0])
print(np.isnan(x))    # [False  True False False False]
print(np.isfinite(x)) # [ True False False False  True]

[False  True False False False]
[ True False False False  True]


In [2]:
arr = np.array([1.0, np.nan, 3.0, 4.0])
print(np.nanmean(arr))   # 2.666...
print(np.nanmedian(arr)) # 3.0
print(np.nanstd(arr))    # ignores NaNs

2.6666666666666665
3.0
1.247219128924647


In [3]:
A = np.array([[1.,  np.nan, 3.],
              [4.,  5.,     np.nan],
              [np.nan, 7.,  9.]])

# Fill with column means
col_means = np.nanmean(A, axis=0)
inds = np.where(np.isnan(A))
A[inds] = np.take(col_means, inds[1])

# Drop rows with any NaN
B = np.array([[1.,2.],[np.nan,3.],[4.,np.nan],[5.,6.]])
mask_rows_no_nan = np.all(np.isfinite(B), axis=1)
clean = B[mask_rows_no_nan]

In [5]:
# Integers can't store NaN; cast to float first:
z = np.array([1, 2, 3])
zf = z.astype('float64')
zf[1] = np.nan

# Downcasting can lose precision
f64 = np.array([1.123456789], dtype=np.float64)
f32 = f64.astype(np.float32)
print(f32)  # rounded

# Safe casting check (raises on unsafe)
try:
    np.asarray([1.2, 3.4]).astype(np.int64, casting='safe')
except TypeError as e:
    print('Safe cast blocked:', e)

[1.1234568]
Safe cast blocked: Cannot cast array data from dtype('float64') to dtype('int64') according to the rule 'safe'


In [6]:
a = np.array([1, 2, 3], dtype=np.int32)
b = np.array([0.5, 1.5, np.nan], dtype=np.float32)
c = a + b
print(c.dtype)    # float32 or float64 depending on platform/rules

float64


In [7]:
data = np.array([1, -999, 3, -999, 5])   # -999 means missing
m = np.ma.masked_equal(data, -999)
print(m.mean())                           # masked value ignored
print(m.mask)                             # True where missing

3.0
[False  True False  True False]


In [9]:
np.seterr(divide='warn', invalid='warn', over='ignore', under='ignore')

with np.errstate(divide='ignore', invalid='raise'):
    y = np.array([1.0, 0.0, np.nan])
    try:
        out = 1.0 / y
    except FloatingPointError:
        print('Invalid operation trapped')

In [11]:
from io import StringIO

csv = StringIO("""a,b,c
1,,3
4,5,
,7,9
""")
arr = np.genfromtxt(csv, delimiter=',', skip_header=1)
print(arr)                 # NaNs where empty
print(arr.dtype)           # float64 (due to NaNs)

[[ 1. nan  3.]
 [ 4.  5. nan]
 [nan  7.  9.]]
float64


In [12]:
# 1) Replace NaNs column-wise using medians
X = np.array([[1., np.nan, 3.],
              [np.nan, 5., 6.],
              [7., 8., np.nan]])
med = np.nanmedian(X, axis=0)
idx = np.where(np.isnan(X))
X[idx] = np.take(med, idx[1])
print(X)

# 2) Convert an int array to float, inject NaNs at odd indices
a = np.arange(10, dtype=np.int32)
b = a.astype(np.float64)
b[1::2] = np.nan
print(np.isnan(b).sum())

# 3) Use masked arrays to compute mean of integer data with -1 as missing
y = np.array([5, -1, 7, 9, -1, 2])
ym = np.ma.masked_equal(y, -1)
print(float(ym.mean()))

# 4) Demonstrate unsafe vs safe casting
f = np.array([1.9, 2.1])
print(f.astype(np.int64, casting='unsafe'))   # truncation
try:
    print(f.astype(np.int64, casting='safe')) # should raise
except TypeError as e:
    print('Safe cast blocked:', e)

[[1.  6.5 3. ]
 [4.  5.  6. ]
 [7.  8.  4.5]]
5
5.75
[1 2]
Safe cast blocked: Cannot cast array data from dtype('float64') to dtype('int64') according to the rule 'safe'
