In [2]:
!pip install lxml xmlschema

import xml.etree.ElementTree as ET
from lxml import etree
import xmlschema
import requests
from io import StringIO, BytesIO
from google.colab import files
import os

print("Libraries installed and imported successfully!")

Libraries installed and imported successfully!


In [4]:
print("Upload your XML and XSD files:")
print("=" * 50)

# Upload files
uploaded = files.upload()

# List uploaded files
uploaded_files = list(uploaded.keys())
print(f"\nUploaded files: {uploaded_files}")

Upload your XML and XSD files:


Saving Library.sxd to Library.sxd
Saving Library.xml to Library (1).xml

Uploaded files: ['Library.sxd', 'Library (1).xml']


In [6]:
def validate_xml_with_xsd(xml_content, xsd_content):
    """
    Checks if an XML document is well-formed and valid against an XSD schema.

    Args:
        xml_content (bytes): The content of the XML file as bytes.
        xsd_content (bytes): The content of the XSD file as bytes.

    Returns:
        tuple: A tuple containing:
            - bool: True if the XML is well-formed and valid, False otherwise.
            - str: An empty string if valid, or an error message if not.
    """
    try:
        # Check for well-formedness using ElementTree
        ET.fromstring(xml_content)

        # Check for validity against XSD using xmlschema
        schema = xmlschema.XMLSchema(xsd_content)
        schema.validate(xml_content)

        return True, ""
    except ET.ParseError as e:
        return False, f"XML is not well-formed: {e}"
    except xmlschema.XMLSchemaException as e:
        return False, f"XML is not valid against XSD: {e}"
    except Exception as e:
        return False, f"An unexpected error occurred: {e}"

# Assuming the uploaded files are named 'Library.xml' and 'Library.sxd'
# We need to get the content of the uploaded files.
xml_filename = 'Library (1).xml'  # Use the correct uploaded file name
xsd_filename = 'Library.sxd'

if xml_filename in uploaded and xsd_filename in uploaded:
    xml_content = uploaded[xml_filename]
    xsd_content = uploaded[xsd_filename]

    is_valid, message = validate_xml_with_xsd(xml_content, xsd_content)

    if is_valid:
        print(f"'{xml_filename}' is well-formed and valid against '{xsd_filename}'.")
    else:
        print(f"Validation failed: {message}")
else:
    print("XML or XSD file not found in uploaded files.")

'Library (1).xml' is well-formed and valid against 'Library.sxd'.


In [14]:
print("QUERY 1: Get All Book Titles")
print("=" * 40)

titles = root.xpath('//book/title/text()')
for i, title in enumerate(titles, 1):
    print(f"{i}. {title}")

print(f"\n Total books: {len(titles)}")

QUERY 1: Get All Book Titles
1. 1984
2. To Kill a Mockingbird
3. The Great Gatsby
4. The Lord of the Rings
5. Brave New World
6. Pride and Prejudice

 Total books: 6


In [15]:
print("QUERY 2: Available Books with Details")
print("=" * 40)

available_books = root.xpath('//book[@available="true"]')
print("ðŸ“– Available Books:")
for book in available_books:
    title = book.xpath('title/text()')[0]
    author = book.xpath('author/text()')[0]
    price = book.xpath('price/text()')[0]
    print(f"   â€¢ {title} by {author} - ${price}")

print(f"\n Available: {len(available_books)} books")

QUERY 2: Available Books with Details
ðŸ“– Available Books:
   â€¢ 1984 by George Orwell - $8.99
   â€¢ The Great Gatsby by F. Scott Fitzgerald - $10.25
   â€¢ The Lord of the Rings by J.R.R. Tolkien - $22.99
   â€¢ Pride and Prejudice by Jane Austen - $7.99

 Available: 4 books


In [16]:
print("QUERY 3: Books Published After 1950")
print("=" * 40)

recent_books = root.xpath('//book[year > 1950]')
print("ðŸ†• Books after 1950:")
for book in recent_books:
    title = book.xpath('title/text()')[0]
    author = book.xpath('author/text()')[0]
    year = book.xpath('year/text()')[0]
    print(f"   â€¢ {title} ({year}) by {author}")

print(f"\n Count: {len(recent_books)} books")

QUERY 3: Books Published After 1950
ðŸ†• Books after 1950:
   â€¢ To Kill a Mockingbird (1960) by Harper Lee
   â€¢ The Lord of the Rings (1954) by J.R.R. Tolkien

 Count: 2 books


In [17]:
print("QUERY 4: Books Sorted by Price (High to Low)")
print("=" * 40)

# Get all books and sort by price
books = root.xpath('//book')
books_sorted = sorted(books,
                     key=lambda x: float(x.xpath('price/text()')[0]),
                     reverse=True)

print("Books by price:")
for book in books_sorted:
    title = book.xpath('title/text()')[0]
    price = book.xpath('price/text()')[0]
    author = book.xpath('author/text()')[0]
    print(f"   â€¢ ${price:5} - {title} by {author}")

QUERY 4: Books Sorted by Price (High to Low)
Books by price:
   â€¢ $22.99 - The Lord of the Rings by J.R.R. Tolkien
   â€¢ $12.50 - To Kill a Mockingbird by Harper Lee
   â€¢ $10.25 - The Great Gatsby by F. Scott Fitzgerald
   â€¢ $9.75  - Brave New World by Aldous Huxley
   â€¢ $8.99  - 1984 by George Orwell
   â€¢ $7.99  - Pride and Prejudice by Jane Austen


In [18]:
print("QUERY 5: Statistical Analysis of Book Collection")
print("=" * 40)

prices = [float(price) for price in root.xpath('//book/price/text()')]
years = [int(year) for year in root.xpath('//book/year/text()')]
pages = [int(page) for page in root.xpath('//book/pages/text()')]

print(" Collection Statistics:")
print(f"   â€¢ Average Price: ${sum(prices)/len(prices):.2f}")
print(f"   â€¢ Most Expensive: ${max(prices):.2f}")
print(f"   â€¢ Cheapest: ${min(prices):.2f}")
print(f"   â€¢ Oldest Book: {min(years)}")
print(f"   â€¢ Newest Book: {max(years)}")
print(f"   â€¢ Average Pages: {sum(pages)/len(pages):.0f}")
print(f"   â€¢ Longest Book: {max(pages)} pages")

QUERY 5: Statistical Analysis of Book Collection
 Collection Statistics:
   â€¢ Average Price: $12.08
   â€¢ Most Expensive: $22.99
   â€¢ Cheapest: $7.99
   â€¢ Oldest Book: 1813
   â€¢ Newest Book: 1960
   â€¢ Average Pages: 448
   â€¢ Longest Book: 1178 pages


In [19]:
print("QUERY 6: Books Grouped by Genre")
print("=" * 40)

genres = set(root.xpath('//book/genre/text()'))
print(" Books by Genre:")

for genre in sorted(genres):
    genre_books = root.xpath(f'//book[genre="{genre}"]')
    print(f"\n  {genre} ({len(genre_books)} books):")
    for book in genre_books:
        title = book.xpath('title/text()')[0]
        author = book.xpath('author/text()')[0]
        print(f"   â€¢ {title} - {author}")

QUERY 6: Books Grouped by Genre
 Books by Genre:

  Classic (1 books):
   â€¢ The Great Gatsby - F. Scott Fitzgerald

  Dystopian (1 books):
   â€¢ 1984 - George Orwell

  Fantasy (1 books):
   â€¢ The Lord of the Rings - J.R.R. Tolkien

  Fiction (1 books):
   â€¢ To Kill a Mockingbird - Harper Lee

  Romance (1 books):
   â€¢ Pride and Prejudice - Jane Austen

  Science Fiction (1 books):
   â€¢ Brave New World - Aldous Huxley


In [20]:
print("QUERY 7: Affordable Classic Books (Price < $15, Available)")
print("=" * 40)

affordable_classics = root.xpath('//book[price < 15 and genre="Classic" and @available="true"]')

if affordable_classics:
    print("  Affordable Classic Books:")
    for book in affordable_classics:
        title = book.xpath('title/text()')[0]
        author = book.xpath('author/text()')[0]
        price = book.xpath('price/text()')[0]
        year = book.xpath('year/text()')[0]
        print(f"   â€¢ {title} by {author}")
        print(f"     Year: {year}, Price: ${price}")
else:
    print(" No affordable classic books available")

QUERY 7: Affordable Classic Books (Price < $15, Available)
  Affordable Classic Books:
   â€¢ The Great Gatsby by F. Scott Fitzgerald
     Year: 1925, Price: $10.25


In [21]:
print("QUERY 8: Books Containing 'World' or 'New' in Title")
print("=" * 40)

# Using contains() function for text search
world_books = root.xpath('//book[contains(title, "World") or contains(title, "New")]')

if world_books:
    print("Books with 'World' or 'New' in title:")
    for book in world_books:
        title = book.xpath('title/text()')[0]
        author = book.xpath('author/text()')[0]
        year = book.xpath('year/text()')[0]
        print(f"   â€¢ {title} by {author} ({year})")
else:
    print("No books found with search terms")

QUERY 8: Books Containing 'World' or 'New' in Title
Books with 'World' or 'New' in title:
   â€¢ Brave New World by Aldous Huxley (1932)


In [22]:
print("QUERY 9: Books with 20% Discount Calculation")
print("=" * 40)

print("Books with 20% discount applied:")
all_books = root.xpath('//book')

for book in all_books:
    title = book.xpath('title/text()')[0]
    original_price = float(book.xpath('price/text()')[0])
    discount_price = original_price * 0.8
    available = book.get('available')

    status = "Available" if available == "true" else "Checked Out"

    print(f"   â€¢ {title}")
    print(f"     Original: ${original_price:.2f}")
    print(f"     Discount: ${discount_price:.2f} (20% off)")
    print(f"     Status: {status}")
    print()

QUERY 9: Books with 20% Discount Calculation
Books with 20% discount applied:
   â€¢ 1984
     Original: $8.99
     Discount: $7.19 (20% off)
     Status: Available

   â€¢ To Kill a Mockingbird
     Original: $12.50
     Discount: $10.00 (20% off)
     Status: Checked Out

   â€¢ The Great Gatsby
     Original: $10.25
     Discount: $8.20 (20% off)
     Status: Available

   â€¢ The Lord of the Rings
     Original: $22.99
     Discount: $18.39 (20% off)
     Status: Available

   â€¢ Brave New World
     Original: $9.75
     Discount: $7.80 (20% off)
     Status: Checked Out

   â€¢ Pride and Prejudice
     Original: $7.99
     Discount: $6.39 (20% off)
     Status: Available



In [23]:
print("QUERY 10: Comprehensive Library Report")
print("=" * 40)

# Multiple aggregations and analyses
total_books = len(root.xpath('//book'))
available_count = len(root.xpath('//book[@available="true"]'))
unavailable_count = len(root.xpath('//book[@available="false"]'))
genres_count = len(set(root.xpath('//book/genre/text()')))
authors_count = len(set(root.xpath('//book/author/text()')))
total_value = sum(float(price) for price in root.xpath('//book/price/text()'))

print(" COMPREHENSIVE LIBRARY REPORT")
print("=" * 30)
print(f"Total Books: {total_books}")
print(f"Available: {available_count}")
print(f"Checked Out: {unavailable_count}")
print(f"Different Genres: {genres_count}")
print(f"Unique Authors: {authors_count}")
print(f"Total Collection Value: ${total_value:.2f}")

# Genre distribution
print("\n Genre Distribution:")
genres = root.xpath('//book/genre/text()')
genre_counts = {}
for genre in genres:
    genre_counts[genre] = genre_counts.get(genre, 0) + 1

for genre, count in sorted(genre_counts.items()):
    percentage = (count / total_books) * 100
    print(f"   â€¢ {genre}: {count} books ({percentage:.1f}%)")

# Availability by decade
print("\n Publication Timeline:")
decades = {}
for year in root.xpath('//book/year/text()'):
    decade = (int(year) // 10) * 10
    decades[decade] = decades.get(decade, 0) + 1

for decade in sorted(decades.keys()):
    print(f"   * {decade}s: {decades[decade]} books")

QUERY 10: Comprehensive Library Report
 COMPREHENSIVE LIBRARY REPORT
Total Books: 6
Available: 4
Checked Out: 2
Different Genres: 6
Unique Authors: 6
Total Collection Value: $72.47

 Genre Distribution:
   â€¢ Classic: 1 books (16.7%)
   â€¢ Dystopian: 1 books (16.7%)
   â€¢ Fantasy: 1 books (16.7%)
   â€¢ Fiction: 1 books (16.7%)
   â€¢ Romance: 1 books (16.7%)
   â€¢ Science Fiction: 1 books (16.7%)

 Publication Timeline:
   * 1810s: 1 books
   * 1920s: 1 books
   * 1930s: 1 books
   * 1940s: 1 books
   * 1950s: 1 books
   * 1960s: 1 books
