In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, StringType, IntegerType

# Define the schema for the employee dataframe
schema = StructType([
    StructField("id", IntegerType(), nullable=True),
    StructField("name", StringType(), nullable=True),
    StructField("age", IntegerType(), nullable=True),
    StructField("salary", IntegerType(), nullable=True)
])

# Create the employee dataframe with some null values
employee_data = [
    (1, "John Doe", 30, 50000),
    (2, "Jane Smith", None, 60000),
    (3, None, 35, 55000),
    (4, "Mike Johnson", 40, None)
]

employee_df = spark.createDataFrame(employee_data, schema)

employee_df.show()

+---+------------+----+------+
| id|        name| age|salary|
+---+------------+----+------+
|  1|    John Doe|  30| 50000|
|  2|  Jane Smith|NULL| 60000|
|  3|        NULL|  35| 55000|
|  4|Mike Johnson|  40|  NULL|
+---+------------+----+------+



### filter 'not null' values

In [0]:
from pyspark.sql.functions import col
employee_df.filter(col('age').isNotNull()).show()

+---+------------+---+------+
| id|        name|age|salary|
+---+------------+---+------+
|  1|    John Doe| 30| 50000|
|  3|        NULL| 35| 55000|
|  4|Mike Johnson| 40|  NULL|
+---+------------+---+------+



### filter 'null' values

In [0]:
employee_df.filter(col('age').isNull()).show()

+---+----------+----+------+
| id|      name| age|salary|
+---+----------+----+------+
|  2|Jane Smith|NULL| 60000|
+---+----------+----+------+



### using SQL style syntax

In [0]:
employee_df.filter("age IS NOT NULL").show()

+---+------------+---+------+
| id|        name|age|salary|
+---+------------+---+------+
|  1|    John Doe| 30| 50000|
|  3|        NULL| 35| 55000|
|  4|Mike Johnson| 40|  NULL|
+---+------------+---+------+



### can combine conditions with boolean oprators

In [0]:
employee_df.filter((col('age').isNull()) | (col('salary').isNull())).show()

+---+------------+----+------+
| id|        name| age|salary|
+---+------------+----+------+
|  2|  Jane Smith|NULL| 60000|
|  4|Mike Johnson|  40|  NULL|
+---+------------+----+------+

