In [36]:
from pydantic import BaseModel

class User(BaseModel):
    id: int
    name: str
    email: str

# Example input
user = User(id=1, name="Alice", email="alice@example.com")
print(user)

id=1 name='Alice' email='alice@example.com'


In [38]:
User(id="not-an-int", name=123, email=True)


ValidationError: 3 validation errors for User
id
  Input should be a valid integer, unable to parse string as an integer [type=int_parsing, input_value='not-an-int', input_type=str]
    For further information visit https://errors.pydantic.dev/2.11/v/int_parsing
name
  Input should be a valid string [type=string_type, input_value=123, input_type=int]
    For further information visit https://errors.pydantic.dev/2.11/v/string_type
email
  Input should be a valid string [type=string_type, input_value=True, input_type=bool]
    For further information visit https://errors.pydantic.dev/2.11/v/string_type

In [14]:
user = User(id='123', name='Bob', email='bob@example.com')
print(user.id)  # 123 as int
# Pydantic automatically converts types where possible (e.g., string to int), which is great for APIs and file inputs.


123


In [39]:
from pydantic import BaseModel, EmailStr, constr

class User(BaseModel):
    id: int
    name: constr(min_length=2)
    email: EmailStr  # Checks for proper email format

# Will raise an error: name too short or invalid email
User(id=1, name="A", email="invalid")


ValidationError: 2 validation errors for User
name
  String should have at least 2 characters [type=string_too_short, input_value='A', input_type=str]
    For further information visit https://errors.pydantic.dev/2.11/v/string_too_short
email
  value is not a valid email address: An email address must have an @-sign. [type=value_error, input_value='invalid', input_type=str]

In [40]:
class Address(BaseModel):
    city: str
    zip_code: str

class User(BaseModel):
    id: int
    name: str
    address: Address

user = User(id=1, name="Alice", address={"city": "New York", "zip_code": "10001"})
# Pydantic automatically parses nested dictionaries into model instances.


In [41]:
import pandas as pd
from pydantic import BaseModel, EmailStr, ValidationError

class UserRow(BaseModel):
    id: int
    name: str
    email: EmailStr

# Load sample data
df = pd.DataFrame([
    {"id": 1, "name": "Alice", "email": "alice@example.com"},
    {"id": "two", "name": "Bob", "email": "bob[at]email.com"},
])

df

Unnamed: 0,id,name,email
0,1,Alice,alice@example.com
1,two,Bob,bob[at]email.com


In [42]:
# Validate each row
def validate_row(row):
    try:
        user = UserRow(**row)
        return user
    except ValidationError as e:
        print(f"Validation failed for row {row}:\n{e}\n")
        return None

validated = df.apply(validate_row, axis=1)

Validation failed for row id                    two
name                  Bob
email    bob[at]email.com
Name: 1, dtype: object:
2 validation errors for UserRow
id
  Input should be a valid integer, unable to parse string as an integer [type=int_parsing, input_value='two', input_type=str]
    For further information visit https://errors.pydantic.dev/2.11/v/int_parsing
email
  value is not a valid email address: An email address must have an @-sign. [type=value_error, input_value='bob[at]email.com', input_type=str]



# Logging

In [43]:
# --- Part 2: Basic Logging with Python's logging module ---
import logging
import os

# Configure logging
# Create a logs directory if it doesn't exist
LOG_DIR = "logs"
if not os.path.exists(LOG_DIR):
    os.makedirs(LOG_DIR)

LOG_FILE = os.path.join(LOG_DIR, "ml_course_app.log")

# Basic configuration:
# - Level: DEBUG, INFO, WARNING, ERROR, CRITICAL
# - Format: How the log messages will look
# - Handlers: Where the log messages go (e.g., file, console)

logging.basicConfig(
    level=logging.INFO, # Log messages of INFO level and above
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler(LOG_FILE), # Log to a file
        logging.StreamHandler()        # Log to the console
    ]
)

# Get a logger instance (best practice to use __name__ for module-level logger)
logger = logging.getLogger(__name__)

# If you want a specific logger for a component:
# data_loader_logger = logging.getLogger("data_loader")
# model_trainer_logger = logging.getLogger("model.trainer")

print("\n--- Python Logging ---")
logger.debug("This is a debug message. (Will not be shown with INFO level)") # Not shown due to level=INFO
logger.info("Application started. This is an informational message.")
logger.warning("A potential issue was detected, but the application can continue.")
logger.error("An error occurred. Something went wrong.")
logger.critical("A critical error occurred. The application might be unable to continue.")




INFO:__main__:Application started. This is an informational message.
ERROR:__main__:An error occurred. Something went wrong.
CRITICAL:__main__:A critical error occurred. The application might be unable to continue.



--- Python Logging ---


In [23]:
import logging

logging.basicConfig(level=logging.INFO)
logging.info("This is an info message")

INFO:root:This is an info message


In [24]:
logging.debug("Debug message")    # Not shown by default
logging.info("Info message")      # Visible if level is INFO or lower
logging.warning("Warning!")       # Always visible at INFO level or lower
logging.error("An error occurred")
logging.critical("Critical error")


INFO:root:Info message
ERROR:root:An error occurred
CRITICAL:root:Critical error


In [25]:
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s"
)

logging.info("Formatted info message")


INFO:root:Formatted info message


In [32]:

# Example usage in a function
def process_data_with_logging(data_item):
    logger.info(f"Starting to process data item: {data_item}")
    try:
        # Simulate some processing
        if not isinstance(data_item, dict):
            logger.error(f"Invalid data type for item: {type(data_item)}. Expected dict.")
            raise TypeError("Data item must be a dictionary.")
        result = data_item.get("value", 0) * 2
        logger.info(f"Successfully processed data item. Result: {result}")
        return result
    except Exception as e:
        logger.error(f"Error processing data item {data_item}: {e}", exc_info=True) # exc_info=True logs stack trace
        # raise # Optionally re-raise the exception


In [33]:

process_data_with_logging({"value": 10})
process_data_with_logging(None) # This will cause an error

print(f"\nLog messages are being written to console and to '{LOG_FILE}'.")
print("Check the log file for more details.")

INFO:__main__:Starting to process data item: {'value': 10}
INFO:__main__:Successfully processed data item. Result: 20
INFO:__main__:Starting to process data item: None
ERROR:__main__:Invalid data type for item: <class 'NoneType'>. Expected dict.
ERROR:__main__:Error processing data item None: Data item must be a dictionary.
Traceback (most recent call last):
  File "/var/folders/7j/qr6tvgr97s3b65k1tx46fry80000gn/T/ipykernel_83481/3953309450.py", line 8, in process_data_with_logging
    raise TypeError("Data item must be a dictionary.")
TypeError: Data item must be a dictionary.



Log messages are being written to console and to 'logs/ml_course_app.log'.
Check the log file for more details.
