# Install required python packages

In [None]:
! pip install -r requirements.txt

In [2]:
import os
import json

import jsonlines
from mimesis import Generic

# Create dummy data

In [3]:
def generate_person_initial_schema(seed):
    """
    Generate a person with initial schema.

    Args:
        seed (int): Seed value for randomization.

    Returns:
        dict: Dictionary representing a person with initial schema.
    """
    generic = Generic('en', seed=seed)  # Set the desired seed value

    return {
        "id": generic.person.identifier(mask='######'),
        "first_name": generic.person.first_name(),
        "age": generic.random.randint(18, 60),
        "address": {
            "street_number": generic.address.street_number(),
            "street_name": generic.address.street_name(),
            "city": generic.address.city(),
            "state": generic.address.state(),
            "zipcode": generic.address.zip_code(),
        },
        "favourite_colurs": [generic.text.color() for _ in range(generic.random.randint(1, 5))],
        "friends": [
            {
                "name": generic.person.name(),
                "age": generic.random.randint(18, 60),
            }
            for _ in range(generic.random.randint(1, 3))
        ],
    }

def generate_person_evolved_schema(seed):
    """
    Generate a person with evolved schema.

    Args:
        seed (int): Seed value for randomization.

    Returns:
        dict: Dictionary representing a person with evolved schema.
    """
    generic = Generic('en', seed=seed)  # Set the desired seed value

    return {
        "id": generic.person.identifier(mask='######'),
        "first_name": generic.person.first_name(),
        "last_name": generic.person.last_name(),
        "age": generic.random.randint(18, 60),
        "address": {
            "street_number": generic.address.street_number(),
            "street_name": generic.address.street_name(),
            "city": generic.address.city(),
            "state": generic.address.state(),
            "zipcode": generic.address.zip_code(),
        },
        "favourite_colurs": [generic.text.color() for _ in range(generic.random.randint(1, 5))],
        "friends": [
            {
                "name": generic.person.name(),
                "age": generic.random.randint(18, 60),
            }
            for _ in range(generic.random.randint(1, 3))
        ],
    }

def generate_data_initial_schema(filename, num_records, seed):
    """
    Generate data with initial schema and write it to a JSON lines file.

    Args:
        filename (str): Name of the output JSON lines file.
        num_records (int): Number of records to generate.
        seed (int): Seed value for randomization.

    Returns:
        list: List of generated data with initial schema.
    """
    data = [generate_person_initial_schema(seed+i) for i in range(num_records)]

    with jsonlines.open(filename, mode='w') as writer:
        writer.write_all(data)

    return

def generate_data_evolved_schema(filename, num_records, seed):
    """
    Generate data with evolved schema and write it to a JSON lines file.

    Args:
        filename (str): Name of the output JSON lines file.
        num_records (int): Number of records to generate.
        seed (int): Seed value for randomization.

    Returns:
        list: List of generated data with evolved schema.
    """
    data = [generate_person_evolved_schema(seed+i) for i in range(num_records)]

    with jsonlines.open(filename, mode='w') as writer:
        writer.write_all(data)

    return

In [4]:
generate_data_initial_schema(filename="persons-1.json", num_records=10, seed=123)
generate_data_evolved_schema(filename="persons-2.json", num_records=20, seed=1234)
generate_data_evolved_schema(filename="persons-3.json", num_records=10, seed=123)

# Create schema for dummy data

In [5]:
generate_data_initial_schema(filename="sample-persons.json", num_records=1, seed=123)

! datamodel-codegen --input "sample-persons.json" --input-file-type json --output "model1.py"

generate_data_evolved_schema(filename="sample-persons.json", num_records=1, seed=123)

! datamodel-codegen --input "sample-persons.json" --input-file-type json --output "model2.py"

In [6]:
import model1

schema = json.loads(model1.Model.schema_json())
print(schema)

with open("persons-1.schema.json", "w") as f:
    json.dump(schema, f)

{'title': 'Model', 'type': 'object', 'properties': {'id': {'title': 'Id', 'type': 'string'}, 'first_name': {'title': 'First Name', 'type': 'string'}, 'age': {'title': 'Age', 'type': 'integer'}, 'address': {'$ref': '#/definitions/Address'}, 'favourite_colurs': {'title': 'Favourite Colurs', 'type': 'array', 'items': {'type': 'string'}}, 'friends': {'title': 'Friends', 'type': 'array', 'items': {'$ref': '#/definitions/Friend'}}}, 'required': ['id', 'first_name', 'age', 'address', 'favourite_colurs', 'friends'], 'definitions': {'Address': {'title': 'Address', 'type': 'object', 'properties': {'street_number': {'title': 'Street Number', 'type': 'string'}, 'street_name': {'title': 'Street Name', 'type': 'string'}, 'city': {'title': 'City', 'type': 'string'}, 'state': {'title': 'State', 'type': 'string'}, 'zipcode': {'title': 'Zipcode', 'type': 'string'}}, 'required': ['street_number', 'street_name', 'city', 'state', 'zipcode']}, 'Friend': {'title': 'Friend', 'type': 'object', 'properties': {'

In [7]:
import model2

schema = json.loads(model2.Model.schema_json())
print(schema)

with open("persons-2.schema.json", "w") as f:
    json.dump(schema, f)

{'title': 'Model', 'type': 'object', 'properties': {'id': {'title': 'Id', 'type': 'string'}, 'first_name': {'title': 'First Name', 'type': 'string'}, 'last_name': {'title': 'Last Name', 'type': 'string'}, 'age': {'title': 'Age', 'type': 'integer'}, 'address': {'$ref': '#/definitions/Address'}, 'favourite_colurs': {'title': 'Favourite Colurs', 'type': 'array', 'items': {'type': 'string'}}, 'friends': {'title': 'Friends', 'type': 'array', 'items': {'$ref': '#/definitions/Friend'}}}, 'required': ['id', 'first_name', 'last_name', 'age', 'address', 'favourite_colurs', 'friends'], 'definitions': {'Address': {'title': 'Address', 'type': 'object', 'properties': {'street_number': {'title': 'Street Number', 'type': 'string'}, 'street_name': {'title': 'Street Name', 'type': 'string'}, 'city': {'title': 'City', 'type': 'string'}, 'state': {'title': 'State', 'type': 'string'}, 'zipcode': {'title': 'Zipcode', 'type': 'string'}}, 'required': ['street_number', 'street_name', 'city', 'state', 'zipcode'

In [8]:
! rm -f "sample-persons.json"
! rm -f "model1.py"
! rm -f "model2.py"