## Define data model



In [23]:
from datetime import date
import json
from typing import Annotated, Literal, Union
from pydantic import BaseModel, Field, field_serializer, model_serializer

class SemanticVersion(BaseModel):
    major: int = Field(ge=1)
    minor: int = Field(ge=1)
    patch: int = Field(ge=1)

    @model_serializer
    def serialize_version(self) -> str:
        return f"{self.major}.{self.minor}.{self.patch}"

class ExampleData(BaseModel):
    adcid: int = Field(ge=0, lt=100)
    ptid: str = Field(min_length=1, max_length=10)
    visitnum: str = Field(min_length=1, max_length=3)
    visitdate: date 
    formver: SemanticVersion
    variable1: Union[Annotated[int,Field(ge=1, le=6)], Literal[88]]
    variable2: Literal[1,2]
    variable3: Literal[1,2,4,9]

    @classmethod
    @field_serializer('visitdate')
    def serialize_visitdate(cls, visitdate: date) -> str:
        return visitdate.isoformat()

## Write schema

In [24]:
with open('data/example-data-schema.json', mode='w', encoding='utf-8') as schema_file:
    json.dump(ExampleData.model_json_schema(), schema_file, indent=2)

## Write data file

In [25]:
import random
from csv import DictWriter
from polyfactory.factories.pydantic_factory import ModelFactory

class ExampleFactory(ModelFactory[ExampleData]):
    ...

example_list = []
count = random.randrange(start=20, stop=1000)
while count > 0:
    example_list.append(ExampleFactory.build())
    count -= 1

with open('data/example-data.csv', mode='w', encoding='utf-8') as outfile:
    writer = DictWriter(outfile, fieldnames=ExampleData.model_fields.keys(), dialect='unix')
    writer.writeheader()
    for example in example_list:
        writer.writerow(example.model_dump())