Skip to content

Conversation

misrasaurabh1
Copy link
Contributor

📄 GenerateJsonSchema.literal_schema in pydantic/json_schema.py

✨ Performance Summary:

  • Speed Increase: 📈 11% (0.11x faster)
  • Runtime Reduction: ⏱️ From 1.41 millisecond down to 1.27 millisecond (best of 57 runs)

📝 Explanation and details

To optimize the code for speed, we can make several improvements.

  1. Avoid the Usage of Expensive Operations in Loops.
    • Use set comprehensions directly.
    • Avoid usage of multiple schema validation in the loop.

Correctness verification

The new optimized code was tested for correctness. The results are listed below:

Test Status Details
⚙️ Existing Unit Tests 🔘 None Found
🌀 Generated Regression Tests 36 Passed See below
⏪ Replay Tests 🔘 None Found
🔎 Concolic Coverage Tests 🔘 None Found
📊 Coverage 100.00%

🌀 Generated Regression Tests Details

Click to view details
from collections import defaultdict
from enum import Enum
from typing import Any, Dict

# imports
import pytest  # used for our unit tests
from pydantic._internal import _config
from pydantic.errors import PydanticInvalidForJsonSchema
from pydantic.json_schema import GenerateJsonSchema
from pydantic_core import core_schema, to_jsonable_python

JsonSchemaValue = Dict[str, Any]
from pydantic.json_schema import GenerateJsonSchema


# Example Enum for Testing
class SomeEnum(Enum):
    VALUE1 = 'value1'
    VALUE2 = 'value2'

# unit tests
@pytest.fixture
def schema_generator():
    return GenerateJsonSchema()

def test_single_integer(schema_generator):
    schema = {'expected': [42]}
    codeflash_output = schema_generator.literal_schema(schema)

def test_single_string(schema_generator):
    schema = {'expected': ['hello']}
    codeflash_output = schema_generator.literal_schema(schema)

def test_single_boolean(schema_generator):
    schema = {'expected': [True]}
    codeflash_output = schema_generator.literal_schema(schema)

def test_single_float(schema_generator):
    schema = {'expected': [3.14]}
    codeflash_output = schema_generator.literal_schema(schema)

def test_single_none(schema_generator):
    schema = {'expected': [None]}
    codeflash_output = schema_generator.literal_schema(schema)

def test_multiple_integers(schema_generator):
    schema = {'expected': [1, 2, 3]}
    codeflash_output = schema_generator.literal_schema(schema)

def test_multiple_strings(schema_generator):
    schema = {'expected': ['a', 'b', 'c']}
    codeflash_output = schema_generator.literal_schema(schema)

def test_multiple_booleans(schema_generator):
    schema = {'expected': [True, False]}
    codeflash_output = schema_generator.literal_schema(schema)

def test_multiple_floats(schema_generator):
    schema = {'expected': [1.1, 2.2, 3.3]}
    codeflash_output = schema_generator.literal_schema(schema)

def test_multiple_nones(schema_generator):
    schema = {'expected': [None, None]}
    codeflash_output = schema_generator.literal_schema(schema)

def test_mixed_integers_strings(schema_generator):
    schema = {'expected': [1, 'a']}
    codeflash_output = schema_generator.literal_schema(schema)

def test_mixed_integers_floats(schema_generator):
    schema = {'expected': [1, 2.2]}
    codeflash_output = schema_generator.literal_schema(schema)

def test_mixed_booleans_none(schema_generator):
    schema = {'expected': [True, None]}
    codeflash_output = schema_generator.literal_schema(schema)

def test_single_enum_value(schema_generator):
    schema = {'expected': [SomeEnum.VALUE1]}
    codeflash_output = schema_generator.literal_schema(schema)

def test_multiple_enum_values(schema_generator):
    schema = {'expected': [SomeEnum.VALUE1, SomeEnum.VALUE2]}
    codeflash_output = schema_generator.literal_schema(schema)

def test_list_of_lists(schema_generator):
    schema = {'expected': [[1, 2], [3, 4]]}
    codeflash_output = schema_generator.literal_schema(schema)

def test_list_of_dicts(schema_generator):
    schema = {'expected': [{'key1': 'value1'}, {'key2': 'value2'}]}
    codeflash_output = schema_generator.literal_schema(schema)

def test_empty_expected_list(schema_generator):
    schema = {'expected': []}
    codeflash_output = schema_generator.literal_schema(schema)

def test_large_number_of_expected_values(schema_generator):
    schema = {'expected': list(range(1000))}
    codeflash_output = schema_generator.literal_schema(schema)


def test_large_single_literal_value(schema_generator):
    schema = {'expected': ['a' * 10000]}
    codeflash_output = schema_generator.literal_schema(schema)

def test_large_number_of_mixed_values(schema_generator):
    schema = {'expected': [i if i % 2 == 0 else str(i) for i in range(1000)]}
    codeflash_output = schema_generator.literal_schema(schema)

def test_invalid_schema_structure(schema_generator):
    schema = {'unexpected_key': [1, 2, 3]}
    with pytest.raises(KeyError):
        schema_generator.literal_schema(schema)


from collections import defaultdict
from enum import Enum
from typing import Any, Dict

# imports
import pytest  # used for our unit tests
from pydantic._internal import _config
from pydantic.errors import PydanticInvalidForJsonSchema
from pydantic.json_schema import GenerateJsonSchema
from pydantic_core import core_schema, to_jsonable_python

JsonSchemaValue = Dict[str, Any]
from pydantic.json_schema import GenerateJsonSchema

# unit tests

def test_single_integer():
    schema = core_schema.LiteralSchema(expected=[42])
    generator = GenerateJsonSchema()
    codeflash_output = generator.literal_schema(schema)

def test_single_string():
    schema = core_schema.LiteralSchema(expected=['hello'])
    generator = GenerateJsonSchema()
    codeflash_output = generator.literal_schema(schema)

def test_single_boolean():
    schema = core_schema.LiteralSchema(expected=[True])
    generator = GenerateJsonSchema()
    codeflash_output = generator.literal_schema(schema)

def test_multiple_integers():
    schema = core_schema.LiteralSchema(expected=[1, 2, 3])
    generator = GenerateJsonSchema()
    codeflash_output = generator.literal_schema(schema)

def test_multiple_strings():
    schema = core_schema.LiteralSchema(expected=['a', 'b', 'c'])
    generator = GenerateJsonSchema()
    codeflash_output = generator.literal_schema(schema)

def test_mixed_types():
    schema = core_schema.LiteralSchema(expected=[1, 'a', True])
    generator = GenerateJsonSchema()
    codeflash_output = generator.literal_schema(schema)



def test_empty_expected_list():
    schema = core_schema.LiteralSchema(expected=[])
    generator = GenerateJsonSchema()
    codeflash_output = generator.literal_schema(schema)

def test_none_value():
    schema = core_schema.LiteralSchema(expected=[None])
    generator = GenerateJsonSchema()
    codeflash_output = generator.literal_schema(schema)

def test_list_of_integers():
    schema = core_schema.LiteralSchema(expected=[[1, 2, 3]])
    generator = GenerateJsonSchema()
    codeflash_output = generator.literal_schema(schema)

def test_list_of_mixed_types():
    schema = core_schema.LiteralSchema(expected=[[1, 'a', True]])
    generator = GenerateJsonSchema()
    codeflash_output = generator.literal_schema(schema)

def test_large_number_of_integers():
    schema = core_schema.LiteralSchema(expected=list(range(1000)))
    generator = GenerateJsonSchema()
    codeflash_output = generator.literal_schema(schema)

def test_large_number_of_strings():
    schema = core_schema.LiteralSchema(expected=[str(i) for i in range(1000)])
    generator = GenerateJsonSchema()
    codeflash_output = generator.literal_schema(schema)

def test_integers_and_floats():
    schema = core_schema.LiteralSchema(expected=[1, 2.0, 3])
    generator = GenerateJsonSchema()
    codeflash_output = generator.literal_schema(schema)

def test_strings_and_none():
    schema = core_schema.LiteralSchema(expected=['a', None, 'b'])
    generator = GenerateJsonSchema()
    codeflash_output = generator.literal_schema(schema)
# codeflash_output is used to check that the output of the original code is the same as that of the optimized code.

Optimized with codeflash.ai

codeflash-ai bot and others added 3 commits December 3, 2024 01:15
To optimize the code for speed, we can make several improvements.

1. **Avoid the Usage of Expensive Operations in Loops**.
    - Use set comprehensions directly.
    - Avoid usage of multiple schema validation in the loop.

2. **Memoization**.
    - Simplify frequent type checking by caching results in a dictionary.

3. **Efficient Dictionary Access**.
    - Replace `defaultdict` where standard dict can be used.
    - Avoid recalculating or reassessing the same values multiple times.

Here's the optimized version of the program.



**Notes**.
- The default argument for `DEFAULT_REF_TEMPLATE` was assumed.
- `defaultdict` for `_collision_counter` was replaced with a regular dictionary to avoid initializing it with zeros.
- Type checking now happens more efficiently. Types of expected values are cached and reassessed only once.
- Computed dictionary access and unnecessary usage of intermediate variables were reduced.
@github-actions github-actions bot added the relnotes-fix Used for bugfixes. label Jan 23, 2025
Copy link

codspeed-hq bot commented Jan 23, 2025

CodSpeed Performance Report

Merging #11321 will not alter performance

Comparing misrasaurabh1:codeflash/optimize-GenerateJsonSchema.literal_schema-2024-12-03T01.15.46 (c8e6856) with main (6a5b640)

Summary

✅ 45 untouched benchmarks

Copy link
Contributor

Coverage report

Click to see where and how coverage changed

FileStatementsMissingCoverageCoverage
(new stmts)
Lines missing
  pydantic
  json_schema.py
Project Total  

This report was generated by python-coverage-comment-action

@Viicos Viicos changed the title ⚡️ Speed up method GenerateJsonSchema.literal_schema by 11% (codeflash) Simplify GenerateJsonSchema.literal_schema() implementation Jan 23, 2025
@Viicos Viicos merged commit 14d14b0 into pydantic:main Jan 23, 2025
53 checks passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
relnotes-fix Used for bugfixes.
Projects
None yet
Development

Successfully merging this pull request may close these issues.

2 participants