# Introduction

In [None]:
!python -m pip install polars

In [None]:
import polars as pl

tips = pl.scan_parquet("tips.parquet")

tips.collect()

In [None]:
(tips.null_count()).collect()

# How to Work With Missing Data in Polars

In [None]:
import polars as pl

tips = pl.scan_parquet("tips.parquet")

(tips.filter(pl.col("total").is_null() & pl.col("tip").is_null())).collect()

In [None]:
(
    tips.drop_nulls(pl.col("total")).filter(
        pl.col("total").is_null() & pl.col("tip").is_null()
    )
).collect()

In [None]:
(
    tips.drop_nulls(pl.col("total"))
    .with_columns(pl.col("tip").fill_null(0))
    .filter(pl.col("tip").is_null())
).collect()

# Using a More Strategic Approach

In [None]:
import polars as pl

tips = pl.scan_parquet("tips.parquet")

(tips.filter(pl.col("time").is_null())).collect()

In [None]:
(tips.filter(pl.col("record_id").is_in([2, 3, 4, 14, 15, 16]))).collect()

In [None]:
(
    tips.drop_nulls("total")
    .with_columns(pl.col("tip").fill_null(0))
    .with_columns(pl.col("time").fill_null(strategy="forward"))
    .filter(pl.col("record_id").is_in([3, 15]))
).collect()

# Dealing With Nulls Across Multiple Columns

In [None]:
import polars as pl

tips = pl.scan_parquet("tips.parquet")

(tips.filter(pl.all_horizontal(pl.col("total", "tip").is_null()))).collect()

In [None]:
tips = pl.scan_parquet("tips.parquet")

(tips.filter(~pl.all_horizontal(pl.col("total", "tip").is_null()))).collect()

In [None]:
import polars as pl

tips = pl.scan_parquet("tips.parquet")

(
    tips.filter(~pl.all_horizontal(pl.col("total", "tip").is_null()))
    .with_columns(pl.col("tip").fill_null(0))
    .with_columns(pl.col("time").fill_null(strategy="forward"))
).null_count().collect()

# Dealing With Nulls by Column Data Type

In [None]:
import polars as pl

scientists = pl.LazyFrame(
    {
        "scientist_id": [1, 2, 3, 4, 5],
        "first_name": ["Isaac", "Louis", None, "Charles", "Marie"],
        "last_name": [None, "Pasteur", "Einstein", "Darwin", "Curie"],
        "birth_year": [1642, 1822, None, 1809, 1867],
        "death_year": [1726, 1895, 1955, None, 1934],
    }
)

scientists.collect()

In [None]:
import polars.selectors as cs

(
    scientists.with_columns(cs.string().fill_null("Unknown")).with_columns(
        cs.integer().fill_null(0)
    )
).collect()

# Dealing With Those Pesky NaNs and infs

In [None]:
import polars as pl

sales_trends = pl.scan_csv("sales_trends.csv")

sales_trends.collect()

In [None]:
(
    sales_trends.with_columns(
        pl.col("next_year").replace(
            [float("inf"), -float("inf"), float("NaN")], None
        )
    )
).collect()

In [None]:
(
    sales_trends.with_columns(
        pl.col("next_year").replace(
            [float("inf"), -float("inf"), float("NaN")], None
        )
    ).with_columns(
        pl.col("next_year").fill_null(
            pl.col("current_year")
            + (pl.col("current_year") - pl.col("last_year"))
        )
    )
).collect()

#  Practicing Your Skills - Solution

In [None]:
import polars as pl

episodes = pl.scan_parquet("ft_exercise.parquet")

episodes.null_count().collect()

In [None]:
import polars as pl

episodes = pl.scan_parquet("ft_exercise.parquet")

(
    episodes.with_columns(
        pl.when(pl.col("episode") == 6)
        .then(pl.col("series").fill_null(strategy="forward"))
        .otherwise(pl.col("series").fill_null(strategy="backward"))
    )
    .with_columns(
        pl.when(pl.col("episode") == 4)
        .then(pl.col("title").fill_null("The Hotel Inspectors"))
        .otherwise(pl.col("title").fill_null("Waldorf Salad"))
    )
    .with_columns(pl.col("original_date").interpolate())
).null_count().collect()