In [None]:
from bs4 import BeautifulSoup

from soupsavvy import ClassSelector, TypeSelector
from soupsavvy.models import BaseModel
from soupsavvy.operations import Operation, Text


class Book(BaseModel):

    __scope__ = TypeSelector("div") & ClassSelector("book")

    title = ClassSelector("title") | Text()
    price = ClassSelector("price") | Text() | Operation(int)


text = """
    <div class="book" href="www.book.com">
        <p class="title">Animal Farm</p>
        <p class="price">100</p>
        <p>George Orwell</p>
    </div>
"""
soup = BeautifulSoup(text, features="lxml")
Book.find(soup)

In [None]:
from bs4 import BeautifulSoup

from soupsavvy import ClassSelector, TypeSelector
from soupsavvy.models import BaseModel
from soupsavvy.operations import Operation, Text


class Book(BaseModel):

    __scope__ = TypeSelector("div") & ClassSelector("book")

    title = ClassSelector("title") | Text()
    price = ClassSelector("price") | Text() | Operation(int)


text = """
    <div class="ebook" href="www.book.com">
        <p class="title">Animal Farm</p>
        <p class="price">100</p>
        <p>George Orwell</p>
    </div>
"""
soup = BeautifulSoup(text, features="lxml")
result = Book.find(soup)
assert result is None

In [None]:
from bs4 import BeautifulSoup

from soupsavvy import ClassSelector, TypeSelector
from soupsavvy.exceptions import ModelScopeNotFoundException
from soupsavvy.models import BaseModel
from soupsavvy.operations import Operation, Text


class Book(BaseModel):

    __scope__ = TypeSelector("div") & ClassSelector("book")

    title = ClassSelector("title") | Text()
    price = ClassSelector("price") | Text() | Operation(int)


text = """
    <div class="ebook" href="www.book.com">
        <p class="title">Animal Farm</p>
        <p class="price">100</p>
        <p>George Orwell</p>
    </div>
"""
soup = BeautifulSoup(text, features="lxml")

try:
    Book.find(soup, strict=True)
except ModelScopeNotFoundException as e:
    print(e)

In [None]:
from bs4 import BeautifulSoup

from soupsavvy import ClassSelector, TypeSelector
from soupsavvy.exceptions import FieldExtractionException
from soupsavvy.models import BaseModel
from soupsavvy.operations import Operation, Text


class Book(BaseModel):

    __scope__ = TypeSelector("div") & ClassSelector("book")

    title = ClassSelector("title") | Text()
    price = ClassSelector("price") | Text() | Operation(int)


text = """
    <div class="book" href="www.book.com">
        <p class="title">Animal Farm</p>
        <p>George Orwell</p>
    </div>
"""
soup = BeautifulSoup(text, features="lxml")

try:
    Book.find(soup)
except FieldExtractionException as e:
    print(e)

In [None]:
from bs4 import BeautifulSoup

from soupsavvy import ClassSelector, TypeSelector
from soupsavvy.models import BaseModel, SkipNone
from soupsavvy.operations import Operation, Text


class Book(BaseModel):

    __scope__ = TypeSelector("div") & ClassSelector("book")

    title = ClassSelector("title") | Text()
    price = ClassSelector("price") | SkipNone(Text() | Operation(int))


text = """
    <div class="book" href="www.book.com">
        <p class="title">Animal Farm</p>
        <p>George Orwell</p>
    </div>
"""
soup = BeautifulSoup(text, features="lxml")
Book.find(soup)

In [None]:
from bs4 import BeautifulSoup

from soupsavvy import ClassSelector, TypeSelector
from soupsavvy.models import BaseModel, Suppress
from soupsavvy.operations import Operation, Text


class Book(BaseModel):

    __scope__ = TypeSelector("div") & ClassSelector("book")

    title = ClassSelector("title") | Text()
    price = ClassSelector("price") | Text() | Suppress(Operation(int))


text = """
    <div class="book" href="www.book.com">
        <p class="title">Animal Farm</p>
        <p class="price">hundred</p>
        <p>George Orwell</p>
    </div>
"""
soup = BeautifulSoup(text, features="lxml")
Book.find(soup)

In [None]:
from bs4 import BeautifulSoup

from soupsavvy import ClassSelector, TypeSelector
from soupsavvy.models import BaseModel, Default, Suppress
from soupsavvy.operations import Operation, Text


class Book(BaseModel):

    __scope__ = TypeSelector("div") & ClassSelector("book")

    title = ClassSelector("title") | Text()
    price = Default(ClassSelector("price") | Text() | Suppress(Operation(int)), 0)


text = """
    <div class="book" href="www.book.com">
        <p class="title">Animal Farm</p>
        <p class="price">hundred</p>
        <p>George Orwell</p>
    </div>
"""
soup = BeautifulSoup(text, features="lxml")
Book.find(soup)

In [None]:
from bs4 import BeautifulSoup

from soupsavvy import ClassSelector, TypeSelector
from soupsavvy.exceptions import FieldExtractionException
from soupsavvy.models import BaseModel, Required, SkipNone
from soupsavvy.operations import Operation, Text


class Book(BaseModel):

    __scope__ = TypeSelector("div") & ClassSelector("book")

    title = ClassSelector("title") | Text()
    price = Required(ClassSelector("price") | SkipNone(Text() | Operation(int)))


text = """
    <div class="book" href="www.book.com">
        <p class="title">Animal Farm</p>
        <p>George Orwell</p>
    </div>
"""
soup = BeautifulSoup(text, features="lxml")

try:
    Book.find(soup)
except FieldExtractionException as e:
    print(e)

In [None]:
from bs4 import BeautifulSoup

from soupsavvy import ClassSelector, TypeSelector
from soupsavvy.models import BaseModel
from soupsavvy.operations import Operation, Text
from soupsavvy.selectors.nth import NthLastOfSelector


class Book(BaseModel):

    __scope__ = TypeSelector("div") & ClassSelector("book")

    title = ClassSelector("title") | Text()
    price = NthLastOfSelector(ClassSelector("price"), nth="1") | Text() | Operation(int)


text = """
    <div class="book" href="www.book.com">
        <p class="price"><s>100</s></p>
        <p class="price"><s>80</s></p>
        <p class="title">Animal Farm</p>
        <p class="price">60</p>
        <p>George Orwell</p>
    </div>
"""
soup = BeautifulSoup(text, features="lxml")
Book.find(soup)

In [None]:
from bs4 import BeautifulSoup

from soupsavvy import ClassSelector, TypeSelector
from soupsavvy.models import All, BaseModel
from soupsavvy.operations import Operation, Text


class Book(BaseModel):

    __scope__ = TypeSelector("div") & ClassSelector("book")

    title = ClassSelector("title") | Text()
    price = All(ClassSelector("price") | Text() | Operation(int))


text = """
    <div class="book" href="www.book.com">
        <p class="title">Animal Farm</p>
        <p class="price"><s>100</s></p>
        <p class="price"><s>80</s></p>
        <p class="price">60</p>
        <p>George Orwell</p>
    </div>
"""
soup = BeautifulSoup(text, features="lxml")
Book.find(soup)

In [None]:
from bs4 import BeautifulSoup

from soupsavvy import ClassSelector, TypeSelector
from soupsavvy.models import BaseModel
from soupsavvy.operations import Operation, Text


class Book(BaseModel):

    __scope__ = TypeSelector("div") & ClassSelector("book")

    title = ClassSelector("title") | Text()
    price = (
        ClassSelector("price")
        | Text()
        | Operation(lambda x: x.strip("$"))
        | Operation(int)
    )


text = """
    <div class="book" href="www.book.com">
        <p class="title">Animal Farm</p>
        <p class="price">100$</p>
        <p>George Orwell</p>
    </div>
"""
soup = BeautifulSoup(text, features="lxml")
Book.find(soup)

In [None]:
import re
from datetime import datetime

from bs4 import BeautifulSoup

from soupsavvy import ClassSelector, PatternSelector, TypeSelector
from soupsavvy.models import BaseModel
from soupsavvy.operations import Operation, Text
from soupsavvy.selectors.css import FirstChild


class Author(BaseModel):
    __scope__ = ClassSelector("author")

    birth = (
        PatternSelector(re.compile(r"\d{4}-\d{2}-\d{2}"))
        | Text()
        | Operation(lambda x: datetime.strptime(x, "%Y-%m-%d"))
    )
    name = FirstChild() | Text()


class Book(BaseModel):

    __scope__ = TypeSelector("div") & ClassSelector("book")

    author = Author
    title = ClassSelector("title") | Text()


text = """
    <div class="book" href="www.book.com">
        <p class="title">Animal Farm</p>
        <div class="author">
            <p>George Orwell</p>
            <p>Great author</p>
            <p>1903-06-25</p>
        </div>
    </div>
"""
soup = BeautifulSoup(text, features="lxml")
Book.find(soup)

In [None]:
import re

from bs4 import BeautifulSoup

from soupsavvy import ClassSelector, PatternSelector, TypeSelector
from soupsavvy.models import BaseModel
from soupsavvy.operations import Href, Operation, Text


class Book(BaseModel):

    __scope__ = TypeSelector("div") & ClassSelector("book")

    title = ClassSelector("title") | Text()
    price = ClassSelector("price") | Text() | Operation(int)


class eBook(Book):
    __scope__ = TypeSelector("div") & ClassSelector("ebook")

    link = Href()
    duration = PatternSelector(re.compile(r"\d{1,2}:\d{2}")) | Text()


text = """
    <div class="ebook" href="www.ebook.com">
        <p class="title">Animal Farm</p>
        <p class="price">50</p>
        <p>George Orwell</p>
        <p>2:30</p>
    </div>
"""
soup = BeautifulSoup(text, features="lxml")
eBook.find(soup)

In [None]:
import re

from bs4 import BeautifulSoup

from soupsavvy import ClassSelector, PatternSelector, TypeSelector
from soupsavvy.models import BaseModel
from soupsavvy.operations import Href, Operation, Text


class Book(BaseModel):

    __scope__ = TypeSelector("div")

    title = ClassSelector("title") | Text()
    price = ClassSelector("price") | Text() | Operation(int)


class eBook(Book):
    __inherit_fields__ = False

    link = Href()
    duration = PatternSelector(re.compile(r"\d{1,2}:\d{2}")) | Text()


text = """
    <div class="ebook" href="www.ebook.com">
        <p class="title">Animal Farm</p>
        <p class="price">50</p>
        <p>George Orwell</p>
        <p>2:30</p>
    </div>
"""
soup = BeautifulSoup(text, features="lxml")
eBook.find(soup)

In [None]:
from bs4 import BeautifulSoup

from soupsavvy import ClassSelector, TypeSelector
from soupsavvy.models import BaseModel
from soupsavvy.operations import Operation, Text
from soupsavvy.selectors.css import LastOfType


class Book(BaseModel):

    __scope__ = TypeSelector("div") & ClassSelector("book")

    title = ClassSelector("title") | Text()
    price = ClassSelector("price") | Text() | Operation(int)
    author = (LastOfType() & TypeSelector("p")) | Text()


text = """
    <div class="ebook" href="www.book.com">
        <p class="title">Animal Farm</p>
        <p class="price">100</p>
        <p>George Orwell</p>
    </div>
    <div class="book">
        <p class="title">Animal Farm</p>
        <p class="price">100</p>
        <p>George Orwell</p>
    </div>
    <div class="book">
        <p class="title">The Giver</p>
        <p class="price">80</p>
        <p>Lois Lowry</p>
    </div>
"""
soup = BeautifulSoup(text, features="lxml")
Book.find_all(soup)

In [None]:
from bs4 import BeautifulSoup

from soupsavvy import ClassSelector, TypeSelector
from soupsavvy.models import BaseModel
from soupsavvy.operations import Operation, Text
from soupsavvy.selectors.css import LastOfType


class Book(BaseModel):

    __scope__ = TypeSelector("div") & ClassSelector("book")

    title = ClassSelector("title") | Text()
    price = ClassSelector("price") | Text() | Operation(int)
    author = (LastOfType() & TypeSelector("p")) | Text()


text = """
    <div class="ebook" href="www.book.com">
        <p class="title">Animal Farm</p>
        <p class="price">100</p>
        <p>George Orwell</p>
    </div>
    <p class="title">Animal Farm</p>
    <p class="price">100</p>
    <p>George Orwell</p>
"""
soup = BeautifulSoup(text, features="lxml")
Book.find_all(soup)

In [None]:
from bs4 import BeautifulSoup

from soupsavvy import ClassSelector, TypeSelector
from soupsavvy.models import BaseModel
from soupsavvy.operations import Operation, Text
from soupsavvy.selectors.css import LastOfType


class Book(BaseModel):

    __scope__ = TypeSelector("div") & ClassSelector("book")

    title = ClassSelector("title") | Text()
    price = ClassSelector("price") | Text() | Operation(int)
    author = (LastOfType() & TypeSelector("p")) | Text()


text = """
    <div class="book" href="www.book.com">
        <span>
            <p class="title">Animal Farm</p>
            <p class="price">100</p>
            <span class="author">
                <p>George Orwell</p>
            </span>
        </span>
    </div>
"""
soup = BeautifulSoup(text, features="html.parser")
Book.find(soup, recursive=False)

In [None]:
from bs4 import BeautifulSoup

from soupsavvy import ClassSelector, TypeSelector
from soupsavvy.models import BaseModel
from soupsavvy.operations import Operation, Text
from soupsavvy.selectors.css import LastOfType


class Book(BaseModel):

    __scope__ = TypeSelector("div") & ClassSelector("book")

    title = ClassSelector("title") | Text()
    price = ClassSelector("price") | Text() | Operation(int)
    author = (LastOfType() & TypeSelector("p")) | Text()


text = """
    <span>
        <div class="book" href="www.book.com">
            <p class="title">Animal Farm</p>
            <p class="price">100</p>
            <p>George Orwell</p>
        </div>
    </span>
"""
soup = BeautifulSoup(text, features="lxml")
result = Book.find(soup.body, recursive=False)  # type: ignore
assert result is None

Book.find(soup.span, recursive=False)  # type: ignore

In [None]:
from bs4 import BeautifulSoup

from soupsavvy import ClassSelector, TypeSelector
from soupsavvy.models import BaseModel
from soupsavvy.operations import Operation, Text
from soupsavvy.selectors.css import LastOfType


class Book(BaseModel):

    __scope__ = TypeSelector("div") & ClassSelector("book")

    title = ClassSelector("title") | Text()
    price = ClassSelector("price") | Text()
    author = (LastOfType() & TypeSelector("p")) | Text()

    def __post_init__(self) -> None:
        self.price = int(str(self.price).strip("$"))
        self.title = str(self.title).upper()


text = """
    <div class="book" href="www.book.com">
        <p class="title">Animal Farm</p>
        <p class="price">100$</p>
        <p>George Orwell</p>
    </div>
"""
soup = BeautifulSoup(text, features="lxml")
Book.find(soup)

In [None]:
from typing import cast

from bs4 import BeautifulSoup

from soupsavvy import ClassSelector, TypeSelector
from soupsavvy.models import BaseModel
from soupsavvy.operations import Operation, Text


class Book(BaseModel):

    __scope__ = TypeSelector("div") & ClassSelector("book")

    title = cast(str, ClassSelector("title") | Text())
    price = cast(int, ClassSelector("price") | Text() | Operation(int))


text = """
    <div class="book" href="www.book.com">
        <p class="title">Animal Farm</p>
        <p class="price">100</p>
        <p>George Orwell</p>
    </div>
"""
soup = BeautifulSoup(text, features="lxml")
Book.find(soup)