In [1]:
from dataclasses import dataclass
from typing import Annotated, Any, Optional, get_origin, get_type_hints

from typedspark._core.column import Column
from typedspark._core.column_meta import ColumnMeta
from typedspark._schema.schema import MetaSchema, Schema

import pyspark.sql.types as T


@dataclass
class PartitionedColumnMeta(ColumnMeta):
    is_partition_column: Optional[bool] = None


class PartitionedSchema(MetaSchema):
    def get_partitions(self) -> list[str | Any]:
        return [
            column_name
            # this returns the column name and the column type
            for column_name, column_type in get_type_hints(self, include_extras=True).items()
            if get_origin(column_type) is Annotated
            # for some reason the metadata returns a tuple instead of the actual class.
            and column_type.__metadata__[0].is_partition_column
        ]


class PartitionedSubSchema(Schema):
    """Schema for the lineage table."""

    description: Annotated[
        Column[T.StringType],
        PartitionedColumnMeta(
            comment="the description of the action",
            is_partition_column=True,
        ),
    ]
    app: Annotated[
        Column[T.StringType],
        PartitionedColumnMeta(
            comment="the name of the app",
            is_partition_column=True,
        ),
    ]
    appVersion: Annotated[
        Column[T.StringType],
        PartitionedColumnMeta(
            comment="the app version ",
            is_partition_column=True,
        ),
    ]

In [2]:
from dataclasses import asdict


data = PartitionedColumnMeta(is_partition_column=True)
data.get_metadata()

{'comment': None, 'is_partition_column': True}

In [3]:
PartitionedSubSchema.get_structtype()[0].metadata

{'comment': 'the description of the action', 'is_partition_column': True}

In [4]:
{field.name: field.metadata for field in PartitionedSubSchema.get_structtype().fields}

{'description': {'comment': 'the description of the action',
  'is_partition_column': True},
 'app': {'comment': 'the name of the app', 'is_partition_column': True},
 'appVersion': {'comment': 'the app version ', 'is_partition_column': True}}

In [26]:
{field.name: field.metadata for field in PartitionedSubSchema.get_structtype().fields}

{'description': {'comment': None},
 'app': {'comment': None},
 'appVersion': {'comment': None}}

In [23]:
@dataclass
class A:
    pass


len(asdict(A()))

0

In [37]:
@dataclass
class B:
    comment: Optional[str] = None

    def get_metadata(self):
        """Returns the metadata of this column."""
        res = asdict(self)
        return res if len(res) > 0 else None


@dataclass
class C(B):
    dog: Optional[bool] = None


C(comment="hi").get_metadata()

{'comment': 'hi', 'dog': None}