In [127]:
import os
import csv
import io
import json
from uuid import uuid4
from flatten_json import flatten, unflatten
from datamodel_code_generator import DataModelType, PythonVersion
from datamodel_code_generator.model import get_data_model_types
from datamodel_code_generator.parser.jsonschema import JsonSchemaParser


In [30]:
test_schema = {
    "$schema": "http://json-schema.org/schema#",
    "type": "object",
    "properties": {
      "hello": {
        "type": "array",
        "items": {
          "type": "string"
        }
      },
      "goodbye": {
        "type": "object",
        "properties": {
          "lovers": {
            "type": "array"
          },
          "friends": {
            "type": "array",
            "items": {
              "type": "string"
            }
          }
        },
        "required": [
          "friends"
        ]
      },
      "fu": {
        "type": "string"
      }
    },
    "required": [
      "fu",
      "goodbye",
      "hello"
    ]
  }

In [None]:

def unflatten_schema(json_schema):
    response = unflatten(json_schema)
    return response





In [59]:
flattened

{'$schema': 'http://json-schema.org/schema#',
 'type': 'object',
 'properties.hello.type': 'array',
 'properties.hello.items.type': 'string',
 'properties.goodbye.type': 'object',
 'properties.goodbye.properties.lovers.type': 'array',
 'properties.goodbye.properties.friends.type': 'array',
 'properties.goodbye.properties.friends.items.type': 'string',
 'properties.goodbye.required.0': 'friends',
 'properties.fu.type': 'string',
 'required.0': 'fu',
 'required.1': 'goodbye',
 'required.2': 'hello'}

In [133]:

def key_cleaner(key):
    clean_key = key.replace("properties", "")[1:]
    clean_key = clean_key.replace("..", ".")
    clean_key = clean_key.replace(".type", "")
    clean_key = clean_key.replace("items", "")
    clean_key = clean_key[:-1] if clean_key[-1]=="." else clean_key
    return clean_key

def flat_schema_adjuster(flattened: dict):
    clean_entries = {}
    entries_being_processed = []
    clean_entry_array =[]
    loop_start = True
    expecting_data_type=False
    key_counter = 0
    adjustable_keys = [key for key in flattened.keys() if key.startswith("properties")]
    key_in_process = adjustable_keys[-1]

    for key in adjustable_keys:
        clean_key = key_cleaner(key)
        entry = flattened[key]
        if "required" in key or entry=="object":
            continue
        if not loop_start and clean_key != key_in_process:
            key_counter = 0
            if key_counter == 0 and expecting_data_type:
                clean_item['type']=None
                clean_entry_array.append(clean_item)
        if clean_key not in clean_entries.keys() and clean_key not in key_in_process:
            expecting_data_type=False
            clean_item = {
                        "uid": str(uuid4()),
                        "name": clean_key
                        }
            if entry == "array":
                clean_item['array']=True
                expecting_data_type = True
            else:
                clean_item["array"]=False
                clean_item["type"] = entry
        else:
            clean_item['type'] = entry
            expecting_data_type = False
            
        if "type" in clean_item.keys():
            clean_entries[clean_key]="processed"
            clean_entry_array.append(clean_item)
        if clean_key not in entries_being_processed:
            entries_being_processed.append(clean_key)

        key_in_process = clean_key
        key_counter +=1
        loop_start = False
    return clean_entry_array

def csv_string_writer(clean_array: list):
    # Convert dictionary list to CSV string
    output = io.StringIO()
    fieldnames = clean_array[0].keys()
    writer = csv.DictWriter(output, fieldnames=fieldnames)
    
    writer.writeheader()
    writer.writerows(clean_array)

    csv_string = output.getvalue()
    return csv_string


def schema_to_flat_csv(json_schema:dict):
    flattened = flatten(json_schema,
                       separator=".")
    adjusted_schema = flat_schema_adjuster(flattened)
    for entry in adjusted_schema:
        entry['Required']=True
        entry["Default"]=None
    schema_csv = csv_string_writer(adjusted_schema)
    return schema_csv




In [137]:
csv_data = schema_to_flat_csv(test_schema)
csv_data

'uid,name,array,type,Required,Default\r\nec10ca83-7683-45ff-a0e2-8015c8a4b719,hello,True,string,True,\r\n69fade43-91a3-4016-9ba1-a992a1e4f4db,goodbye.lovers,True,,True,\r\nf076da1c-01ea-44d6-9ec8-b7db6acfe0aa,goodbye.friends,True,string,True,\r\nf1bacd06-b195-4356-91d1-0b5fadffaf26,fu,False,string,True,\r\n'

'uid,name,array,type\r\nbcf2c8ad-90b2-4faa-afef-0b0003243cba,hello,True,string\r\n26b34398-c5b8-4658-83a3-94bc573ad88d,goodbye.lovers,True,\r\nacd5e440-e5c8-46b8-bf64-fcf2a19e2dff,goodbye.friends,True,string\r\nfb1f6495-fdbf-4dcf-8888-00394002390b,fu,False,string\r\n'

In [31]:
data_model_types = get_data_model_types(
    DataModelType.PydanticV2BaseModel,
    target_python_version=PythonVersion.PY_312
)
parser = JsonSchemaParser(
   json.dumps(test_schema),
   data_model_type=data_model_types.data_model,
   data_model_root_type=data_model_types.root_model,
   data_model_field_type=data_model_types.field_model,
   data_type_manager_type=data_model_types.data_type_manager,
   dump_resolve_reference_action=data_model_types.dump_resolve_reference_action,
                       )
result = parser.parse()
print(result)

from __future__ import annotations

from typing import List, Optional

from pydantic import BaseModel


class Goodbye(BaseModel):
    lovers: Optional[List] = None
    friends: List[str]


class Model(BaseModel):
    hello: List[str]
    goodbye: Goodbye
    fu: str



In [53]:
str(uuid4())

'0fb0a94f-70ed-4739-aeea-750c567517e6'