# IDE demo

This notebook contains the code accompanying the IDE demo. You can find the video [here](
https://github.com/kaiko-ai/typedspark/assets/47976799/e6f7fa9c-6d14-4f68-baba-fe3c22f75b67).

In [1]:
from pyspark.sql.types import DateType, LongType, StringType
from typedspark import DataSet, Column, Schema


class Pets(Schema):
    pet_id: Column[LongType]
    owner_id: Column[LongType]
    pet_name: Column[StringType]
    species: Column[StringType]
    breed: Column[StringType]
    age: Column[LongType]
    birthdate: Column[DateType]
    gender: Column[StringType]


class Vaccinations(Schema):
    vaccination_id: Column[LongType]
    pet_id: Column[LongType]
    vaccine_name: Column[StringType]
    vaccine_date: Column[DateType]
    next_due_date: Column[DateType]


class Owners(Schema):
    owner_id: Column[LongType]
    first_name: Column[StringType]
    last_name: Column[StringType]
    email: Column[StringType]
    phone_number: Column[StringType]
    address: Column[StringType]

In [2]:
def get_dogs(pets: DataSet[Pets]) -> DataSet[Pets]:
    return pets.filter(Pets.species == "dog")

In [3]:
from chispa.dataframe_comparer import assert_df_equality
from pyspark.sql import SparkSession
from typedspark import create_partially_filled_dataset


def test_get_dogs(spark: SparkSession):
    pets = create_partially_filled_dataset(
        spark,
        Pets,
        {
            Pets.pet_id: [1, 2, 3],
            Pets.species: ["dog", "cat", "dog"],
        },
    )

    observed = get_dogs(pets)
    expected = create_partially_filled_dataset(
        spark,
        Pets,
        {
            Pets.pet_id: [1, 3],
            Pets.species: ["dog", "dog"],
        },
    )

    assert_df_equality(
        observed,
        expected,
        ignore_row_order=True,
        ignore_nullable=True,
    )

In [4]:
from pyspark.sql.functions import concat_ws
from typedspark import (
    register_schema_to_dataset,
    transform_to_schema,
)


class Reminder(Schema):
    owner_id: Column[LongType]
    pet_id: Column[LongType]
    vaccination_id: Column[LongType]
    full_name: Column[StringType]
    email_address: Column[StringType]
    pet_name: Column[StringType]
    vaccine: Column[StringType]
    due: Column[DateType]


def find_owners_who_need_to_renew_their_pets_vaccinations(
    owners: DataSet[Owners],
    pets: DataSet[Pets],
    vaccinations: DataSet[Vaccinations],
) -> DataSet[Reminder]:
    _owners = register_schema_to_dataset(owners, Owners)
    _pets = register_schema_to_dataset(pets, Pets)
    _vaccinations = register_schema_to_dataset(vaccinations, Vaccinations)

    return transform_to_schema(
        owners.join(
            pets,
            _owners.owner_id == _pets.owner_id,
            "inner",
        ).join(
            vaccinations,
            _pets.pet_id == _vaccinations.pet_id,
            "inner",
        ),
        Reminder,
        {
            Reminder.owner_id: _owners.owner_id,
            Reminder.pet_id: _pets.pet_id,
            Reminder.vaccination_id: _vaccinations.vaccination_id,
            Reminder.full_name: concat_ws(
                " ",
                _owners.first_name,
                _owners.last_name,
            ),
            Reminder.email_address: _owners.email,
            Reminder.pet_name: _pets.pet_name,
            Reminder.vaccine: _vaccinations.vaccine_name,
            Reminder.due: _vaccinations.next_due_date,
        },
    )