In [0]:
data = [
    (1, "Alice", None),
    (2, None, 2000),
    (3, "Charlie", None),
    (4, None, None)
]

df = spark.createDataFrame(data, ["id", "name", "salary"])

df.display()

In [0]:
# Drop rows where ANY column is null
df.na.drop().display()

# Drop rows where ALL columns are null
df.na.drop(how="all").display()

# Drop rows where specific columns have nulls
df.na.drop(subset=["name"]).display()


In [0]:
# Fill all nulls with a single value
df.na.fill("Unknown").display()

# Fill nulls with different values per column
df.na.fill({"name": "No Name", "salary": 0}).display()


In [0]:
# Replace a specific value in a column
df.na.replace("Alice", "Alicia", subset=["name"]).display()


In [0]:
empty_df = spark.createDataFrame([], schema=df.schema)

print(df.isEmpty())       # False → df has rows
print(empty_df.isEmpty()) # True  → no rows


### Common Use Cases
Data validation: Check if filtering returned no results.

Avoid writing empty DataFrames to storage.

Prevent downstream job failures when no data exists.

In [0]:
filtered_df = df.filter(df.salary > 5000)

if filtered_df.isEmpty():
    print("No employees with salary > 5000")
else:
    filtered_df.display()


### Summary Table
| Function          | Purpose                                  | Example                       |
| ----------------- | ---------------------------------------- | ----------------------------- |
| `df.na.drop()`    | Remove rows with null values             | `df.na.drop(subset=["col1"])` |
| `df.na.fill()`    | Replace nulls with specified value(s)    | `df.na.fill({"col1":0})`      |
| `df.na.replace()` | Replace specific values (not just nulls) | `df.na.replace("old","new")`  |
| `df.isEmpty()`    | Check if DataFrame has zero rows         | `if df.isEmpty(): ...`        |

