In [1]:
from typing import Protocol, TypeVar
from pyspark.sql import SparkSession
from pyspark.sql.types import LongType, StringType
from typedspark import Schema, DataSet, Column, PartialDataSet, transform_to_schema, create_empty_dataset

class Person(Schema):
    name: Column[StringType]
    age: Column[LongType]

class Age(Schema, Protocol):
    type: Column[StringType]

def get_age(df: PartialDataSet[Age]) -> DataSet[Age]:
    return transform_to_schema(df, Age)

spark = SparkSession.builder.getOrCreate()

df = create_empty_dataset(spark, Person)
get_age(df).show()

Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
23/07/03 18:53:46 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable


+----+
| age|
+----+
|null|
|null|
|null|
+----+



                                                                                

In [2]:
T = TypeVar("T", bound=Schema)

def birthday(df: PartialDataSet[Age], schema: T) -> DataSet[T]:
    return transform_to_schema(
        df, 
        schema,  # type: ignore
        {Age.age: Age.age + 1}
    )

res: DataSet[Person] = birthday(df, Person)
res.show()

+----+----+
|name| age|
+----+----+
|null|null|
|null|null|
|null|null|
+----+----+



In [4]:
Age.get_docstring()

'Base class for protocol classes.\n\nProtocol classes are defined as::\n\n    class Proto(Protocol):\n        def meth(self) -> int:\n            ...\n\nSuch classes are primarily used with static type checkers that recognize\nstructural subtyping (static duck-typing), for example::\n\n    class C:\n        def meth(self) -> int:\n            return 0\n\n    def func(x: Proto) -> int:\n        return x.meth()\n\n    func(C())  # Passes static type check\n\nSee PEP 544 for details. Protocol classes decorated with\n@typing.runtime_checkable act as simple-minded runtime protocols that check\nonly the presence of given attributes, ignoring their type signatures.\nProtocol classes can be generic, they are defined as::\n\n    class GenProto(Protocol[T]):\n        def meth(self) -> T:\n            ...'