# Data Formats and Persistence

## Pickle

In [None]:
import pickle

car = {"brand": "Ford", "model": "Mustang", "year": 1965}

with open("data.pckl", "wb") as f:
    pickle.dump(car, f)
print("[1] Dumped car to file:", car_loaded)

with open("data.pckl", "rb") as f:
    car_loaded = pickle.load(f)
print("[1] Loaded car from file:", car_loaded, '\n')

data_bytes = pickle.dumps(car)
print("[2] Type after dumps:", type(data_bytes).__name__)

car_from_memory = pickle.loads(data_bytes)
print("[2] Restored from memory:", car_from_memory, '\n')

class Employee:
    def __init__(self, name, position, salary):
        self.name = name
        self.position = position
        self.salary = salary

    def promote(self, new_position, raise_amount):
        self.position = new_position
        self.salary += raise_amount

    def __repr__(self):
        return f"Employee({self.name}, {self.position}, {self.salary})"


team = [
    Employee("Alice", "Developer", 70000),
    Employee("Bob", "Designer", 65000),
    Employee("Charlie", "Manager", 85000),
]

bundle = {"team": team, "department": "R&D", "year": 2025}

with open("employees.pkl", "wb") as f:
    pickle.dump(bundle, f)

with open("employees.pkl", "rb") as f:
    bundle_loaded = pickle.load(f)

print("[3] Loaded bundle keys:", list(bundle_loaded.keys()))
print("[3] Type of first employee:", type(bundle_loaded["team"][0]).__name__)
print("[3] Example restored object:", bundle_loaded["team"][0])

[1] Dumped car to file: {'brand': 'Ford', 'model': 'Mustang', 'year': 1965}
[1] Loaded car from file: {'brand': 'Ford', 'model': 'Mustang', 'year': 1965} 

[2] Type after dumps: <class 'bytes'>
[2] Restored from memory: {'brand': 'Ford', 'model': 'Mustang', 'year': 1965} 

[3] Loaded bundle keys: ['team', 'department', 'year']
[3] Type of first employee: Employee
[3] Example restored object: Employee(Alice, Developer, 70000)


## Shelve

In [14]:
import shelve

class Employee:
    def __init__(self, name: str, role: str, salary: int):
        self.name = name
        self.role = role
        self.salary = salary

    def promote(self, new_role: str, raise_amount: int):
        self.role = new_role
        self.salary += raise_amount

    def __repr__(self):
        return f"Employee({self.name!r}, {self.role!r}, {self.salary!r})"


def create_shelf(path: str = "company"):
    with shelve.open(path, flag="n") as db:
        db["name"] = "Apple"
        db["country"] = "USA"
        db["currency"] = ["USD", "$"]
        db["meta"] = {"founded": 1976, "public": True}

        db["employees"] = [
            Employee("Alice", "Developer", 70000),
            Employee("Bob", "Designer", 65000),
            Employee("Charlie", "Manager", 85000),
        ]

        print("[CREATE] Wrote initial entries:", list(db.keys()), '\n')

def inspect_shelf(path: str = "company"):
    with shelve.open(path, flag='c') as db:
        print("[INSPECT] keys ->", list(db.keys()))
        print("[INSPECT] name ->", db["name"])
        print("[INSPECT] currency ->", db["currency"])
        print("[INSPECT] country exists? ->", "country" in db)
        print("[INSPECT] get('missing', 'N/A') ->", db.get("missing", "N/A"))
        print("[INSPECT] meta ->", db["meta"])

        print("[INSPECT] employees ->")
        for e in db["employees"]:
            print("   ", e)
        print()

def update_with_write_flag(path: str = "company") -> None:
    with shelve.open(path, flag="w") as db:
        db["currency"] = ["USD", "$", "cents"]
        print("[UPDATE] currency updated to:", db["currency"])

        emps = db["employees"]
        emps[0].promote("Senior Developer", 5000)
        db["employees"] = emps
        print("[UPDATE] promoted:", emps[0], '\n')

create_shelf("company")
inspect_shelf("company")
update_with_write_flag("company")
inspect_shelf("company")

[CREATE] Wrote initial entries: ['country', 'currency', 'employees', 'meta', 'name'] 

[INSPECT] keys -> ['country', 'currency', 'employees', 'meta', 'name']
[INSPECT] name -> Apple
[INSPECT] currency -> ['USD', '$']
[INSPECT] country exists? -> True
[INSPECT] get('missing', 'N/A') -> N/A
[INSPECT] meta -> {'founded': 1976, 'public': True}
[INSPECT] employees ->
    Employee('Alice', 'Developer', 70000)
    Employee('Bob', 'Designer', 65000)
    Employee('Charlie', 'Manager', 85000)

[UPDATE] currency updated to: ['USD', '$', 'cents']
[UPDATE] promoted: Employee('Alice', 'Senior Developer', 75000) 

[INSPECT] keys -> ['country', 'currency', 'employees', 'meta', 'name']
[INSPECT] name -> Apple
[INSPECT] currency -> ['USD', '$', 'cents']
[INSPECT] country exists? -> True
[INSPECT] get('missing', 'N/A') -> N/A
[INSPECT] meta -> {'founded': 1976, 'public': True}
[INSPECT] employees ->
    Employee('Alice', 'Senior Developer', 75000)
    Employee('Bob', 'Designer', 65000)
    Employee('Char

## JSON

In [94]:
import json
from pathlib import Path
from typing import Any, Dict

class Employee:
    def __init__(self, name, position, salary):
        self.name = name
        self.position = position
        self.salary = float(salary)

    def promote(self, new_position, raise_amount):
        self.position = new_position
        self.salary += float(raise_amount)

    def __repr__(self) -> str:
        return f"Employee({self.name!r}, {self.position!r}, {self.salary!r})"

class EmployeeEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, Employee):
            return {
                "__type__": "Employee",
                "name": obj.name,
                "position": obj.position,
                "salary": obj.salary,
            }
        return super().default(obj)


class EmployeeDecoder(json.JSONDecoder):
    def __init__(self, *args, **kwargs):
        super().__init__(object_hook=self._object_hook, *args, **kwargs)

    @staticmethod
    def _object_hook(obj):
        if obj.get("__type__") == "Employee":
            return Employee(
                name=obj["name"],
                position=obj["position"],
                salary=obj["salary"],
            )
        return obj


json_input = """
{
  "team": [
    {"__type__": "Employee", "name": "Alice", "position": "Developer", "salary": 70000},
    {"__type__": "Employee", "name": "Bob", "position": "Designer", "salary": 65000},
    {"__type__": "Employee", "name": "Charlie", "position": "Manager", "salary": 85000}
  ],
  "department": "R&D",
  "year": 2025
}
"""

bundle = json.loads(json_input, cls=EmployeeDecoder)
print("[LOADS] type(bundle):", type(bundle).__name__)
print("[LOADS] type(bundle['team'][0]):", type(bundle["team"][0]).__name__)
print("[LOADS] exemple Employee object:", bundle["team"][0], '\n')

bundle["team"][0].promote("Senior Developer", 5000)

json_path = Path("employees.json")
with json_path.open("w") as f:
    json.dump(bundle, f, cls=EmployeeEncoder, indent=2)
print(f"[DUMP] wrote JSON to: {json_path.resolve()}\n")

with json_path.open("r") as f:
    bundle_loaded = json.load(f, cls=EmployeeDecoder)
print("[LOAD] type(bundle_loaded):", type(bundle_loaded).__name__)
print("[LOAD] restored Employee sample:", bundle_loaded["team"][0], '\n')

json_output = json.dumps(bundle_loaded, cls=EmployeeEncoder, indent=2)
print("[DUMPS] type(json_output):", type(json_output).__name__)
print("[DUMPS] JSON string length:", len(json_output))
print("[DUMPS] preview:")
print(json_output)

[LOADS] type(bundle): dict
[LOADS] type(bundle['team'][0]): Employee
[LOADS] exemple Employee object: Employee('Alice', 'Developer', 70000.0) 

[DUMP] wrote JSON to: C:\Endava\EndevLocal\pcpp\employees.json

[LOAD] type(bundle_loaded): dict
[LOAD] restored Employee sample: Employee('Alice', 'Senior Developer', 75000.0) 

[DUMPS] type(json_output): str
[DUMPS] JSON string length: 424
[DUMPS] preview:
{
  "team": [
    {
      "__type__": "Employee",
      "name": "Alice",
      "position": "Senior Developer",
      "salary": 75000.0
    },
    {
      "__type__": "Employee",
      "name": "Bob",
      "position": "Designer",
      "salary": 65000.0
    },
    {
      "__type__": "Employee",
      "name": "Charlie",
      "position": "Manager",
      "salary": 85000.0
    }
  ],
  "department": "R&D",
  "year": 2025
}


## XML, DTD and XPath

In [36]:
from xml.etree import ElementTree as ET
from lxml import etree
from pathlib import Path

DTD_TEXT = """\
<!ELEMENT bookstore (book*)>
<!ELEMENT book (title, author, year, price)>
<!ATTLIST book category CDATA #REQUIRED>
<!ELEMENT title (#PCDATA)>
<!ELEMENT author (#PCDATA)>
<!ELEMENT year (#PCDATA)>
<!ELEMENT price (#PCDATA)>
"""

DTD_PATH = Path("bookstore.dtd")
XML_PATH = Path("books.xml")

def write_dtd(dtd_path, dtd_text):
    dtd_path.write_text(dtd_text)
    print(f"[WRITE] DTD -> {dtd_path.resolve()}")

def generate_xml(xml_path, dtd_path):
    root = etree.Element("bookstore")

    def add_book(category, title, author, year, price):
        book = etree.SubElement(root, "book", category=category)
        etree.SubElement(book, "title").text  = title
        etree.SubElement(book, "author").text = author
        etree.SubElement(book, "year").text   = str(year)
        etree.SubElement(book, "price").text  = str(price)

    add_book("fiction", "The Great Gatsby", "F. Scott Fitzgerald", 1925, 10.99)
    add_book("non-fiction", "Sapiens", "Yuval Noah Harari", 2011, 14.99)
    add_book("fiction", "Dune", "Frank Herbert", 1965, 9.99)

    tree = etree.ElementTree(root)
    tree.write(
        str(xml_path),
        pretty_print=True,
        xml_declaration=True,
        doctype=f'<!DOCTYPE bookstore SYSTEM "{dtd_path.name}">'
    )

    print(f"[WRITE] XML -> {xml_path.resolve()}\n")

def print_dtd_and_xml(dtd_path, xml_path):
    print(dtd_path.read_text())
    print(xml_path.read_text())

def process_bookstore(xml_path):
    tree = etree.parse(str(xml_path))

    root = tree.getroot()
    print(f"[INFO] Root tag: {root.tag}")

    for book in root.findall("book"):
        book.set("updated", "false")

        price = book.find("price")
        if price is not None and book.get("category") == "fiction":
            try:
                book.set("updated", "true")
                old_price = float(price.text)
                new_price = round(old_price * 0.9, 2)
                price.text = f"{new_price:.2f}"
            except (TypeError, ValueError):
                pass

    xml_bytes = etree.tostring(
        root,
        pretty_print=True,
        xml_declaration=True,
    )

    new_root = etree.fromstring(xml_bytes)

    etree.dump(new_root)

    etree.ElementTree(new_root).write(
        str(xml_path),
        pretty_print=True,
        xml_declaration=True,
    )
    print(f"[WRITE] XML -> {xml_path.resolve()}", '\n')

def xpath_with_lxml(file_path):
    doc = etree.parse(str(file_path))
    cheap_fiction = doc.xpath("//book[@category='fiction' and number(price) < 11]/title/text()")
    modern_authors = doc.xpath("//book[number(year) >= 2000]/author/text()")
    categories = doc.xpath("//book/@category")

    print("[XPath] Cheap fiction:", cheap_fiction)
    print("[XPath] Modern authors:", modern_authors)
    print("[XPath] Categories:", categories)

write_dtd(DTD_PATH, DTD_TEXT)
generate_xml(XML_PATH, DTD_PATH)
print_dtd_and_xml(DTD_PATH, XML_PATH)

et_report = process_bookstore(XML_PATH)

xpath_report = xpath_with_lxml(XML_PATH)

[WRITE] DTD -> C:\Endava\EndevLocal\pcpp\bookstore.dtd
[WRITE] XML -> C:\Endava\EndevLocal\pcpp\books.xml

<!ELEMENT bookstore (book*)>
<!ELEMENT book (title, author, year, price)>
<!ATTLIST book category CDATA #REQUIRED>
<!ELEMENT title (#PCDATA)>
<!ELEMENT author (#PCDATA)>
<!ELEMENT year (#PCDATA)>
<!ELEMENT price (#PCDATA)>

<?xml version='1.0' encoding='ASCII'?>
<!DOCTYPE bookstore SYSTEM "bookstore.dtd">
<bookstore>
  <book category="fiction">
    <title>The Great Gatsby</title>
    <author>F. Scott Fitzgerald</author>
    <year>1925</year>
    <price>10.99</price>
  </book>
  <book category="non-fiction">
    <title>Sapiens</title>
    <author>Yuval Noah Harari</author>
    <year>2011</year>
    <price>14.99</price>
  </book>
  <book category="fiction">
    <title>Dune</title>
    <author>Frank Herbert</author>
    <year>1965</year>
    <price>9.99</price>
  </book>
</bookstore>

[INFO] Root tag: bookstore
<bookstore>
  <book category="fiction" updated="true">
    <title>The Gre

## CSV

In [60]:
import csv
from pathlib import Path

CSV_FILE = Path("items.csv")

fieldnames = ["name", "quantity"]
with open(CSV_FILE, "w", newline="") as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerow({"name": "laptop", "quantity": 3})
    writer.writerow({"name": "mobile phone", "quantity": 2})
    writer.writerow({"quantity": 1, "name": "car"})
print(f"[WRITE DictWriter] File created at: {CSV_FILE.resolve()}")

with open(CSV_FILE, "r", newline="") as csvfile:
    reader = csv.DictReader(csvfile)
    print("[READ DictReader]", reader.fieldnames)
    for row in reader:
        print("[READ DictReader]", row)
    print()

with open(CSV_FILE, "a", newline="") as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(["pen", 10])
    writer.writerow(["notebook", 5])
print("[APPEND writer] Added 2 new rows.")

with open(CSV_FILE, "r", newline="") as csvfile:
    reader = csv.reader(csvfile)
    for line in reader:
        print("[READ reader]", line)

[WRITE DictWriter] File created at: C:\Endava\EndevLocal\pcpp\items.csv
[READ DictReader] ['name', 'quantity']
[READ DictReader] {'name': 'laptop', 'quantity': '3'}
[READ DictReader] {'name': 'mobile phone', 'quantity': '2'}
[READ DictReader] {'name': 'car', 'quantity': '1'}

[APPEND writer] Added 2 new rows.
[READ reader] ['name', 'quantity']
[READ reader] ['laptop', '3']
[READ reader] ['mobile phone', '2']
[READ reader] ['car', '1']
[READ reader] ['pen', '10']
[READ reader] ['notebook', '5']


## INI

In [81]:
import configparser
from pathlib import Path
from io import StringIO

config_file = Path("settings.ini")
default_text = """
[DEFAULT]
company = TechCorp
location = EU
"""
config_file.write_text(default_text)
print(f"[WRITE] Created {config_file.resolve()} with default content.\n")

config = configparser.ConfigParser()
read_files = config.read(config_file)
print("[READ] Files successfully loaded:", read_files, '\n')

config.read_string("""
[database]
host = localhost
port = 3306
timeout = 3.5
secure = yes
""")

config.read_dict({
    "api": {"endpoint": "https://api.example.com", "retries": "5", "active": "true"}
})

config.add_section("app")
config.set("app", "debug", "True")
config.set("app", "language", "Python")
config["app"]["version"] = "1.0.0"

print("[app after creation]")
for k, v in config["app"].items():
    if k not in config.defaults():
        print(f"{k} = {v}")
print()

config.set("app", "debug", "False")
config["app"]["language"] = "C++"
config.remove_option("app", "version")

print("[app after modification]")
for k, v in config["app"].items():
    if k not in config.defaults():
        print(f"{k} = {v}")
print()

port = config.getint("database", "port")
timeout = config.getfloat("database", "timeout")
secure = config.getboolean("database", "secure")
debug = config.getboolean("app", "debug")

print("[getint port]:", port)
print("[getfloat timeout]:", timeout)
print("[getboolean secure]:", secure)
print("[getboolean debug]:", debug)
print("[has_section 'user']:", config.has_section("app"))
print("[has_option 'database','host']:", config.has_option("database", "host"), '\n')

if config.defaults():
    print("[DEFAULT]")
    for k, v in config.defaults().items():
        print(f"{k} = {v}")
    print()

for section in config.sections():
    print(f"[{section}]")
    for k in config[section]:
        if k not in config.defaults():
            print(f"{k} = {config[section][k]}")
    print()

with open(config_file, "w") as f:
    config.write(f)
print(f"[WRITE] Combined configuration saved to: {config_file.resolve()}")


[WRITE] Created C:\Endava\EndevLocal\pcpp\settings.ini with default content.

[READ] Files successfully loaded: ['settings.ini'] 

[app after creation]
debug = True
language = Python
version = 1.0.0

[app after modification]
debug = False
language = C++

[getint port]: 3306
[getfloat timeout]: 3.5
[getboolean secure]: True
[getboolean debug]: False
[has_section 'user']: True
[has_option 'database','host']: True 

[DEFAULT]
company = TechCorp
location = EU

[database]
host = localhost
port = 3306
timeout = 3.5
secure = yes

[api]
endpoint = https://api.example.com
retries = 5
active = true

[app]
debug = False
language = C++

[WRITE] Combined configuration saved to: C:\Endava\EndevLocal\pcpp\settings.ini


## SQLite3

In [93]:
import sqlite3
from pathlib import Path

DB_PATH = Path("db.sqlite3")

conn = sqlite3.connect(DB_PATH)
print("[CONNECT] database at:", DB_PATH.resolve())

try:
    cur = conn.cursor()
    print("[CURSOR] cursor created")

    cur.execute("DROP TABLE IF EXISTS users")
    cur.execute("""
        CREATE TABLE users (
            id      INTEGER PRIMARY KEY AUTOINCREMENT,
            name    TEXT    NOT NULL,
            age     INTEGER,
            country TEXT
        )
    """)
    print("[EXECUTE] table created")

    users = [
        ("Alice", 30, "USA"),
        ("Bob",   25, "UK"),
        ("Carla", 28, "RO"),
    ]
    cur.executemany(
        "INSERT INTO users(name, age, country) VALUES (?, ?, ?)",
        users
    )
    print("[EXECUTEMANY] inserted", len(users), "rows")

    cur.execute(
        "INSERT INTO users(name, age, country) VALUES (?, ?, ?)",
        ("Dumitru", 33, "MD")
    )
    print("[EXECUTE] inserted 1 row")

    conn.commit()
    print("[COMMIT] changes saved")

    cur.execute("SELECT id, name, age, country FROM users WHERE name = ?", ("Alice",))
    one = cur.fetchone()
    print("[FETCHONE]", one)

    cur.execute("SELECT id, name, age, country FROM users ORDER BY id")
    all_rows = cur.fetchall()
    print("[FETCHALL] total rows:", len(all_rows))
    for r in all_rows:
        print("  ", r)
finally:
    try:
        cur.close()
        print("[CLOSE] cursor closed")
    except Exception:
        pass
    conn.close()
    print("[CLOSE] connection closed")

[CONNECT] database at: C:\Endava\EndevLocal\pcpp\db.sqlite3
[CURSOR] cursor created
[EXECUTE] table created
[EXECUTEMANY] inserted 3 rows
[EXECUTE] inserted 1 row
[COMMIT] changes saved
[FETCHONE] (1, 'Alice', 30, 'USA')
[FETCHALL] total rows: 4
   (1, 'Alice', 30, 'USA')
   (2, 'Bob', 25, 'UK')
   (3, 'Carla', 28, 'RO')
   (4, 'Dumitru', 33, 'MD')
[CLOSE] cursor closed
[CLOSE] connection closed
