In [1]:
import pandas as pd, numpy as np

counting null values: \
.isna().sum()

In [3]:
# Creating a data frame with missing values
data = {
    'Name': ['John', 'Doe', 'Alice', 'Bob', 'Chris'],
    'Age': [25, 30, 22, 28, 35],
    'Salary': [50000, 60000, None, 75000, 90000],
    'Experience': [2, 5, 1, None, 10]
}
df = pd.DataFrame(data)
print(df)
print()

salary_nan_count = df['Salary'].isna().sum() #counting NaN values in the 'Salary' column
exp_nan_count = df['Experience'].isna().sum() #counting NaN values in the 'Experience' column
print(salary_nan_count, exp_nan_count, sep=', ')
print()

nan_count = df.isna().sum() #counting NaN values in all columns
print(nan_count)
print()


    Name  Age   Salary  Experience
0   John   25  50000.0         2.0
1    Doe   30  60000.0         5.0
2  Alice   22      NaN         1.0
3    Bob   28  75000.0         NaN
4  Chris   35  90000.0        10.0

1, 1

Name          0
Age           0
Salary        1
Experience    1
dtype: int64



working with null values: \
.dropna()

In [4]:
ser = pd.Series(
    [0, 1, 2, np.NaN, 9],
     index = ['red', 'blue', 'yellow', 'white', 'green']
)
print(ser)
print()

print("a few ways of deleting NaN: can be done manually, but it's tricky and doesn't guarantee that you'll \
delete them all, thus dropna() function is used")
ser = ser.dropna() #you need to force it, can't just write ser.dropna() -> it doesn't update properly
print(ser)
print()

red       0.0
blue      1.0
yellow    2.0
white     NaN
green     9.0
dtype: float64

a few ways of deleting NaN: can be done manually, but it's tricky and doesn't guarantee that you'll delete them all, thus dropna() function is used
red       0.0
blue      1.0
yellow    2.0
green     9.0
dtype: float64



an alternative method is: \
.notnull()

In [5]:
ser_2 = pd.Series(
    [0, 1, 2, np.NaN, 9],
     index = ['red', 'blue', 'yellow', 'white', 'green']
)
print(ser_2)
print()

ser_2 = ser_2[ser_2.notnull()]
print(ser_2)
print()

red       0.0
blue      1.0
yellow    2.0
white     NaN
green     9.0
dtype: float64

red       0.0
blue      1.0
yellow    2.0
green     9.0
dtype: float64



but with DataFrame dropna() deletes any record with just one instance of NaN \
dangerous as all the data is lost

In [6]:
frame3 = pd.DataFrame([
    [6, np.nan, 6],
    [np.nan, np.nan, np.nan],
    [2, np.nan, 5]
],
    index = ['blue', 'green', 'red'],
    columns = ['ball', 'mug', 'pen']
)
print(frame3)
print()

frame3 = frame3.dropna()
print(frame3)
print()

       ball  mug  pen
blue    6.0  NaN  6.0
green   NaN  NaN  NaN
red     2.0  NaN  5.0

Empty DataFrame
Columns: [ball, mug, pen]
Index: []



to avoid problems use how=all \
.dropna(how='all')

In [7]:
frame4 = pd.DataFrame([
    [6, np.nan, 6],
    [np.nan, np.nan, np.nan],
    [2, np.nan, 5]
],
    index = ['blue', 'green', 'red'],
    columns = ['ball', 'mug', 'pen']
)
print(frame4)
print()

frame4 = frame4.dropna(how='all')
print(frame4)
print()

       ball  mug  pen
blue    6.0  NaN  6.0
green   NaN  NaN  NaN
red     2.0  NaN  5.0

      ball  mug  pen
blue   6.0  NaN  6.0
red    2.0  NaN  5.0



instead of deleting the column / row, \
use .fillna()

In [9]:
frame3 = pd.DataFrame([
    [6, np.nan, 6],
    [np.nan, np.nan, np.nan],
    [2, np.nan, 5]
],
    index = ['blue', 'green', 'red'],
    columns = ['ball', 'mug', 'pen']
)
print(frame3)
print()

frame3 = frame3.fillna(0) #fill with 0
print(frame3)
print()

       ball  mug  pen
blue    6.0  NaN  6.0
green   NaN  NaN  NaN
red     2.0  NaN  5.0

       ball  mug  pen
blue    6.0  0.0  6.0
green   0.0  0.0  0.0
red     2.0  0.0  5.0



or use a specific fill with the dict

In [10]:
frame7 = pd.DataFrame([
    [6, np.nan, 6],
    [np.nan, np.nan, np.nan],
    [2, np.nan, 5]
],
    index = ['blue', 'green', 'red'],
    columns = ['ball', 'mug', 'pen']
)
print(frame7)
print()
frame7 = frame7.fillna({'ball': 1, 'mug': 0, 'pen': 5}) #specific fill
print(frame7)
print()

       ball  mug  pen
blue    6.0  NaN  6.0
green   NaN  NaN  NaN
red     2.0  NaN  5.0

       ball  mug  pen
blue    6.0  0.0  6.0
green   1.0  0.0  5.0
red     2.0  0.0  5.0

