In [1]:
import json
import types

import pandas as pd
from pandas_to_pydantic import (
    dataframe_to_pydantic,
    get_model_columns,
    serialize_dataframe,
)
from pydantic import BaseModel, RootModel
from pydantic._internal._model_construction import ModelMetaclass

from pandas_to_pydantic_test.config import BOOK_DATA_PATH, DATA_PATH, LIBRARY_DATA_PATH
from pandas_to_pydantic_test.libraryTypes import Library

In [2]:
libraryData = pd.read_csv(LIBRARY_DATA_PATH)
bookData = pd.read_csv(BOOK_DATA_PATH)

In [11]:
class Book(BaseModel):
    BookID: int
    Title: str
    PublishedYear: int


class Author(BaseModel):
    AuthorName: str
    BookList: list[Book]


class Genre(BaseModel):
    Genre: str
    AuthorList: list[Author]


dataframe_to_pydantic(
    data=bookData,
    model=Genre,
    id_column_map={"Genre": "Genre", "AuthorList": "AuthorName"},
).model_dump()

[{'Genre': 'Fantasy',
  'AuthorList': [{'AuthorName': 'J.K. Rowling',
    'BookList': [{'BookID': 1,
      'Title': "Harry Potter and the Philosopher's Stone",
      'PublishedYear': 1997},
     {'BookID': 2,
      'Title': 'Harry Potter and the Chamber of Secrets',
      'PublishedYear': 1998}]},
   {'AuthorName': 'J.R.R. Tolkien',
    'BookList': [{'BookID': 11, 'Title': 'The Hobbit', 'PublishedYear': 1937},
     {'BookID': 12,
      'Title': 'The Lord of the Rings',
      'PublishedYear': 1954}]}]},
 {'Genre': 'Dystopian Fiction',
  'AuthorList': [{'AuthorName': 'George Orwell',
    'BookList': [{'BookID': 3, 'Title': '1984', 'PublishedYear': 1949}]}]},
 {'Genre': 'Political Satire',
  'AuthorList': [{'AuthorName': 'George Orwell',
    'BookList': [{'BookID': 4,
      'Title': 'Animal Farm',
      'PublishedYear': 1945}]}]},
 {'Genre': 'Romance',
  'AuthorList': [{'AuthorName': 'Jane Austen',
    'BookList': [{'BookID': 5,
      'Title': 'Pride and Prejudice',
      'PublishedYear':

In [3]:
libraryData.to_dict("records")[0]

{'LibraryID': 1,
 'LibraryName': 'City Central Library',
 'Location': 'Cityville',
 'EstablishedYear': 1950,
 'BookCollectionSize': 50000,
 'AuthorID': 1,
 'AuthorName': 'J.K. Rowling',
 'AuthorBirthdate': '1965-07-31',
 'BookID': 1,
 'Title': "Harry Potter and the Philosopher's Stone",
 'Genre': 'Fantasy',
 'PublishedYear': 1997,
 'AvailableCopies': 5}

In [14]:
class LibaryDetail(BaseModel):
    LibraryName: str
    Location: str
    EstablishedYear: int
    BookCollectionSize: int


class Author(BaseModel):
    AuthorID: int
    AuthorName: str
    AuthorBirthdate: str


class Book(BaseModel):
    BookID: int
    Title: str
    Genre: str
    PublishedYear: int


class Library(BaseModel):
    LibraryID: int
    Detail: LibaryDetail
    AuthorList: list[Author]
    BookList: list[Book]

In [15]:
library_list_root = dataframe_to_pydantic(
    libraryData,
    Library,
    {
        "Library": "LibraryID",
        "BookList": "BookID",
        "AuthorList": "AuthorID",
    },
)

In [17]:
library_list_root.model_dump()

[{'LibraryID': 1,
  'Detail': {'LibraryName': 'City Central Library',
   'Location': 'Cityville',
   'EstablishedYear': 1950,
   'BookCollectionSize': 50000},
  'AuthorList': [{'AuthorID': 1,
    'AuthorName': 'J.K. Rowling',
    'AuthorBirthdate': '1965-07-31'},
   {'AuthorID': 5,
    'AuthorName': 'Mark Twain',
    'AuthorBirthdate': '1835-11-30'}],
  'BookList': [{'BookID': 1,
    'Title': "Harry Potter and the Philosopher's Stone",
    'Genre': 'Fantasy',
    'PublishedYear': 1997},
   {'BookID': 2,
    'Title': 'Harry Potter and the Chamber of Secrets',
    'Genre': 'Fantasy',
    'PublishedYear': 1998},
   {'BookID': 10,
    'Title': 'The Adventures of Tom Sawyer',
    'Genre': 'Adventure',
    'PublishedYear': 1876}]},
 {'LibraryID': 2,
  'Detail': {'LibraryName': 'Greenwood Public Library',
   'Location': 'Greenwood',
   'EstablishedYear': 1975,
   'BookCollectionSize': 35000},
  'AuthorList': [{'AuthorID': 2,
    'AuthorName': 'George Orwell',
    'AuthorBirthdate': '1903-06-2

In [56]:
serialized_data = serialize_dataframe(
    libraryData,
    get_model_columns(
        DetailLibrary,
        {
            "DetailLibrary": "LibraryID",
            "BookList": "BookID",
            "AuthorList": "AuthorID",
        },
    ),
)

In [59]:
serialized_data

[{'LibraryID': 1,
  'AuthorList': [{'AuthorID': 1,
    'AuthorName': 'J.K. Rowling',
    'AuthorBirthdate': '1965-07-31'},
   {'AuthorID': 5,
    'AuthorName': 'Mark Twain',
    'AuthorBirthdate': '1835-11-30'}],
  'BookList': [{'BookID': 1,
    'Title': "Harry Potter and the Philosopher's Stone",
    'Genre': 'Fantasy',
    'PublishedYear': 1997},
   {'BookID': 2,
    'Title': 'Harry Potter and the Chamber of Secrets',
    'Genre': 'Fantasy',
    'PublishedYear': 1998},
   {'BookID': 10,
    'Title': 'The Adventures of Tom Sawyer',
    'Genre': 'Adventure',
    'PublishedYear': 1876}],
  'Detail': {'LibraryName': 'City Central Library',
   'Location': 'Cityville',
   'EstablishedYear': 1950,
   'BookCollectionSize': 50000}},
 {'LibraryID': 2,
  'AuthorList': [{'AuthorID': 2,
    'AuthorName': 'George Orwell',
    'AuthorBirthdate': '1903-06-25'},
   {'AuthorID': 6,
    'AuthorName': 'J.R.R. Tolkien',
    'AuthorBirthdate': '1892-01-03'}],
  'BookList': [{'BookID': 3,
    'Title': '198

In [60]:
with open(DATA_PATH + "test.json", "w") as outfile:
    outfile.write(json.dumps(serialized_data))

In [10]:
libraryListRoot = dataframe_to_pydantic(libraryData, Library)

In [11]:
libraryListRoot.root

[Library(LibraryID=1, LibraryName='City Central Library', Location='Cityville', EstablishedYear=1950, BookCollectionSize=50000, AuthorList=[Author(AuthorID=1, AuthorName='J.K. Rowling', AuthorBirthdate='1965-07-31', BookList=[Book(BookID=1, Title="Harry Potter and the Philosopher's Stone", Genre='Fantasy', PublishedYear=1997, AvailableCopies=5), Book(BookID=2, Title='Harry Potter and the Chamber of Secrets', Genre='Fantasy', PublishedYear=1998, AvailableCopies=3)]), Author(AuthorID=5, AuthorName='Mark Twain', AuthorBirthdate='1835-11-30', BookList=[Book(BookID=10, Title='The Adventures of Tom Sawyer', Genre='Adventure', PublishedYear=1876, AvailableCopies=2)])]),
 Library(LibraryID=2, LibraryName='Greenwood Public Library', Location='Greenwood', EstablishedYear=1975, BookCollectionSize=35000, AuthorList=[Author(AuthorID=2, AuthorName='George Orwell', AuthorBirthdate='1903-06-25', BookList=[Book(BookID=3, Title='1984', Genre='Dystopian Fiction', PublishedYear=1949, AvailableCopies=7)]),

In [13]:
with open(DATA_PATH + "test.json", "w") as outfile:
    outfile.write(libraryListRoot.model_dump_json())

In [14]:
libraryJsonRoot = RootModel[list[Library]]

with open(DATA_PATH + "test.json") as f:
    jsonData = json.load(f)